aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE.md61
-rw-r--r--.github/ISSUE_TEMPLATE/1_broken_site.md63
-rw-r--r--.github/ISSUE_TEMPLATE/2_site_support_request.md54
-rw-r--r--.github/ISSUE_TEMPLATE/3_site_feature_request.md37
-rw-r--r--.github/ISSUE_TEMPLATE/4_bug_report.md65
-rw-r--r--.github/ISSUE_TEMPLATE/5_feature_request.md38
-rw-r--r--.github/ISSUE_TEMPLATE/6_question.md38
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl.md61
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md63
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md54
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md37
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md65
-rw-r--r--.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md38
-rw-r--r--.github/PULL_REQUEST_TEMPLATE.md4
-rw-r--r--.travis.yml27
-rw-r--r--CONTRIBUTING.md166
-rw-r--r--ChangeLog1491
-rw-r--r--Makefile10
-rw-r--r--README.md213
-rw-r--r--devscripts/buildserver.py2
-rw-r--r--devscripts/check-porn.py8
-rw-r--r--devscripts/create-github-release.py22
-rwxr-xr-xdevscripts/gh-pages/update-feed.py4
-rwxr-xr-xdevscripts/release.sh6
-rw-r--r--devscripts/show-downloads-statistics.py2
-rw-r--r--docs/supportedsites.md213
-rw-r--r--setup.cfg2
-rw-r--r--setup.py11
-rw-r--r--test/helper.py44
-rw-r--r--test/test_InfoExtractor.py351
-rw-r--r--test/test_YoutubeDL.py104
-rw-r--r--test/test_YoutubeDLCookieJar.py51
-rw-r--r--test/test_aes.py8
-rw-r--r--test/test_all_urls.py10
-rw-r--r--test/test_compat.py9
-rw-r--r--test/test_downloader_http.py12
-rw-r--r--test/test_http.py10
-rw-r--r--test/test_postprocessors.py2
-rw-r--r--test/test_subtitles.py13
-rw-r--r--test/test_swfinterp.py4
-rw-r--r--test/test_utils.py81
-rw-r--r--test/test_youtube_chapters.py2
-rw-r--r--test/test_youtube_signature.py22
-rw-r--r--test/testdata/cookies/httponly_cookies.txt6
-rw-r--r--test/testdata/cookies/malformed_cookies.txt9
-rw-r--r--test/testdata/cookies/session_cookies.txt6
-rw-r--r--test/testdata/m3u8/ted_18923.m3u828
-rw-r--r--test/testdata/mpd/unfragmented.mpd28
-rw-r--r--youtube-dl.plugin.zsh2
-rwxr-xr-xyoutube_dl/YoutubeDL.py173
-rw-r--r--youtube_dl/__init__.py22
-rw-r--r--youtube_dl/compat.py46
-rw-r--r--youtube_dl/downloader/common.py14
-rw-r--r--youtube_dl/downloader/dash.py2
-rw-r--r--youtube_dl/downloader/external.py23
-rw-r--r--youtube_dl/downloader/f4m.py14
-rw-r--r--youtube_dl/downloader/fragment.py11
-rw-r--r--youtube_dl/downloader/hls.py30
-rw-r--r--youtube_dl/downloader/http.py20
-rw-r--r--youtube_dl/downloader/ism.py2
-rw-r--r--youtube_dl/extractor/abc.py20
-rw-r--r--youtube_dl/extractor/abcnews.py9
-rw-r--r--youtube_dl/extractor/abcotvs.py79
-rw-r--r--youtube_dl/extractor/acast.py86
-rw-r--r--youtube_dl/extractor/addanime.py95
-rw-r--r--youtube_dl/extractor/adn.py80
-rw-r--r--youtube_dl/extractor/adobeconnect.py37
-rw-r--r--youtube_dl/extractor/adobepass.py5
-rw-r--r--youtube_dl/extractor/adobetv.py241
-rw-r--r--youtube_dl/extractor/adultswim.py192
-rw-r--r--youtube_dl/extractor/aenetworks.py152
-rw-r--r--youtube_dl/extractor/americastestkitchen.py37
-rw-r--r--youtube_dl/extractor/anitube.py30
-rw-r--r--youtube_dl/extractor/anysex.py61
-rw-r--r--youtube_dl/extractor/aol.py40
-rw-r--r--youtube_dl/extractor/aparat.py95
-rw-r--r--youtube_dl/extractor/ard.py278
-rw-r--r--youtube_dl/extractor/arkena.py2
-rw-r--r--youtube_dl/extractor/arte.py324
-rw-r--r--youtube_dl/extractor/asiancrush.py80
-rw-r--r--youtube_dl/extractor/atresplayer.py214
-rw-r--r--youtube_dl/extractor/atvat.py6
-rw-r--r--youtube_dl/extractor/audioboom.py34
-rw-r--r--youtube_dl/extractor/audiomack.py2
-rw-r--r--youtube_dl/extractor/azmedien.py211
-rw-r--r--youtube_dl/extractor/bambuser.py142
-rw-r--r--youtube_dl/extractor/bbc.py53
-rw-r--r--youtube_dl/extractor/beampro.py12
-rw-r--r--youtube_dl/extractor/beeg.py111
-rw-r--r--youtube_dl/extractor/bellmedia.py11
-rw-r--r--youtube_dl/extractor/bfi.py37
-rw-r--r--youtube_dl/extractor/bilibili.py150
-rw-r--r--youtube_dl/extractor/biobiochiletv.py19
-rw-r--r--youtube_dl/extractor/biqle.py83
-rw-r--r--youtube_dl/extractor/bitchute.py36
-rw-r--r--youtube_dl/extractor/bleacherreport.py8
-rw-r--r--youtube_dl/extractor/blinkx.py4
-rw-r--r--youtube_dl/extractor/bokecc.py8
-rw-r--r--youtube_dl/extractor/bravotv.py40
-rw-r--r--youtube_dl/extractor/brightcove.py342
-rw-r--r--youtube_dl/extractor/businessinsider.py28
-rw-r--r--youtube_dl/extractor/byutv.py83
-rw-r--r--youtube_dl/extractor/cammodels.py2
-rw-r--r--youtube_dl/extractor/camtube.py2
-rw-r--r--youtube_dl/extractor/camwithher.py2
-rw-r--r--youtube_dl/extractor/canvas.py93
-rw-r--r--youtube_dl/extractor/carambatv.py6
-rw-r--r--youtube_dl/extractor/cartoonnetwork.py56
-rw-r--r--youtube_dl/extractor/cbc.py53
-rw-r--r--youtube_dl/extractor/cbs.py20
-rw-r--r--youtube_dl/extractor/cbsnews.py78
-rw-r--r--youtube_dl/extractor/ccc.py34
-rw-r--r--youtube_dl/extractor/ceskatelevize.py4
-rw-r--r--youtube_dl/extractor/channel9.py20
-rw-r--r--youtube_dl/extractor/chaturbate.py50
-rw-r--r--youtube_dl/extractor/cinemax.py29
-rw-r--r--youtube_dl/extractor/ciscolive.py151
-rw-r--r--youtube_dl/extractor/cliphunter.py21
-rw-r--r--youtube_dl/extractor/cloudflarestream.py32
-rw-r--r--youtube_dl/extractor/cnbc.py30
-rw-r--r--youtube_dl/extractor/cnn.py12
-rw-r--r--youtube_dl/extractor/comcarcoff.py74
-rw-r--r--youtube_dl/extractor/common.py313
-rw-r--r--youtube_dl/extractor/commonmistakes.py32
-rw-r--r--youtube_dl/extractor/contv.py118
-rw-r--r--youtube_dl/extractor/corus.py171
-rw-r--r--youtube_dl/extractor/crackle.py47
-rw-r--r--youtube_dl/extractor/criterion.py39
-rw-r--r--youtube_dl/extractor/crunchyroll.py139
-rw-r--r--youtube_dl/extractor/ctsnews.py10
-rw-r--r--youtube_dl/extractor/curiositystream.py56
-rw-r--r--youtube_dl/extractor/cwtv.py10
-rw-r--r--youtube_dl/extractor/dailymail.py7
-rw-r--r--youtube_dl/extractor/dailymotion.py570
-rw-r--r--youtube_dl/extractor/daisuki.py154
-rw-r--r--youtube_dl/extractor/daum.py106
-rw-r--r--youtube_dl/extractor/dbtv.py51
-rw-r--r--youtube_dl/extractor/dctp.py54
-rw-r--r--youtube_dl/extractor/discovery.py102
-rw-r--r--youtube_dl/extractor/discoverynetworks.py63
-rw-r--r--youtube_dl/extractor/dispeak.py11
-rw-r--r--youtube_dl/extractor/dlive.py97
-rw-r--r--youtube_dl/extractor/dplay.py403
-rw-r--r--youtube_dl/extractor/dramafever.py266
-rw-r--r--youtube_dl/extractor/dreisat.py2
-rw-r--r--youtube_dl/extractor/drtuber.py6
-rw-r--r--youtube_dl/extractor/drtv.py199
-rw-r--r--youtube_dl/extractor/dtube.py18
-rw-r--r--youtube_dl/extractor/dumpert.py83
-rw-r--r--youtube_dl/extractor/dvtv.py126
-rw-r--r--youtube_dl/extractor/einthusan.py15
-rw-r--r--youtube_dl/extractor/eporner.py3
-rw-r--r--youtube_dl/extractor/escapist.py35
-rw-r--r--youtube_dl/extractor/espn.py22
-rw-r--r--youtube_dl/extractor/expressen.py4
-rw-r--r--youtube_dl/extractor/extractors.py276
-rw-r--r--youtube_dl/extractor/facebook.py39
-rw-r--r--youtube_dl/extractor/fivetv.py6
-rw-r--r--youtube_dl/extractor/flipagram.py115
-rw-r--r--youtube_dl/extractor/fourtube.py5
-rw-r--r--youtube_dl/extractor/fox.py133
-rw-r--r--youtube_dl/extractor/fox9.py43
-rw-r--r--youtube_dl/extractor/foxsports.py32
-rw-r--r--youtube_dl/extractor/franceculture.py8
-rw-r--r--youtube_dl/extractor/francetv.py15
-rw-r--r--youtube_dl/extractor/freespeech.py7
-rw-r--r--youtube_dl/extractor/frontendmasters.py4
-rw-r--r--youtube_dl/extractor/funimation.py11
-rw-r--r--youtube_dl/extractor/funk.py171
-rw-r--r--youtube_dl/extractor/funnyordie.py162
-rw-r--r--youtube_dl/extractor/fusion.py69
-rw-r--r--youtube_dl/extractor/gaia.py130
-rw-r--r--youtube_dl/extractor/gameinformer.py34
-rw-r--r--youtube_dl/extractor/gameone.py134
-rw-r--r--youtube_dl/extractor/gamespot.py5
-rw-r--r--youtube_dl/extractor/gdcvault.py100
-rw-r--r--youtube_dl/extractor/generic.py232
-rw-r--r--youtube_dl/extractor/gfycat.py16
-rw-r--r--youtube_dl/extractor/giantbomb.py11
-rw-r--r--youtube_dl/extractor/globo.py48
-rw-r--r--youtube_dl/extractor/go.py74
-rw-r--r--youtube_dl/extractor/go90.py149
-rw-r--r--youtube_dl/extractor/googledrive.py2
-rw-r--r--youtube_dl/extractor/hark.py33
-rw-r--r--youtube_dl/extractor/hbo.py86
-rw-r--r--youtube_dl/extractor/heise.py17
-rw-r--r--youtube_dl/extractor/hellporno.py73
-rw-r--r--youtube_dl/extractor/hitbox.py4
-rw-r--r--youtube_dl/extractor/hitrecord.py4
-rw-r--r--youtube_dl/extractor/hketv.py191
-rw-r--r--youtube_dl/extractor/hotstar.py210
-rw-r--r--youtube_dl/extractor/hrti.py4
-rw-r--r--youtube_dl/extractor/hungama.py117
-rw-r--r--youtube_dl/extractor/hypem.py50
-rw-r--r--youtube_dl/extractor/iconosquare.py85
-rw-r--r--youtube_dl/extractor/imdb.py58
-rw-r--r--youtube_dl/extractor/imggaming.py133
-rw-r--r--youtube_dl/extractor/imgur.py96
-rw-r--r--youtube_dl/extractor/ina.py75
-rw-r--r--youtube_dl/extractor/indavideo.py2
-rw-r--r--youtube_dl/extractor/infoq.py6
-rw-r--r--youtube_dl/extractor/instagram.py141
-rw-r--r--youtube_dl/extractor/internetvideoarchive.py92
-rw-r--r--youtube_dl/extractor/iprima.py52
-rw-r--r--youtube_dl/extractor/iqiyi.py6
-rw-r--r--youtube_dl/extractor/itv.py8
-rw-r--r--youtube_dl/extractor/ivi.py119
-rw-r--r--youtube_dl/extractor/jamendo.py183
-rw-r--r--youtube_dl/extractor/joj.py2
-rw-r--r--youtube_dl/extractor/jpopsukitv.py68
-rw-r--r--youtube_dl/extractor/jwplatform.py18
-rw-r--r--youtube_dl/extractor/kakao.py60
-rw-r--r--youtube_dl/extractor/kaltura.py48
-rw-r--r--youtube_dl/extractor/karrierevideos.py4
-rw-r--r--youtube_dl/extractor/keek.py39
-rw-r--r--youtube_dl/extractor/kinja.py221
-rw-r--r--youtube_dl/extractor/kontrtube.py73
-rw-r--r--youtube_dl/extractor/kuwo.py2
-rw-r--r--youtube_dl/extractor/la7.py4
-rw-r--r--youtube_dl/extractor/laola1tv.py118
-rw-r--r--youtube_dl/extractor/learnr.py33
-rw-r--r--youtube_dl/extractor/lecturio.py243
-rw-r--r--youtube_dl/extractor/leeco.py2
-rw-r--r--youtube_dl/extractor/lego.py183
-rw-r--r--youtube_dl/extractor/libraryofcongress.py40
-rw-r--r--youtube_dl/extractor/libsyn.py64
-rw-r--r--youtube_dl/extractor/limelight.py125
-rw-r--r--youtube_dl/extractor/linkedin.py182
-rw-r--r--youtube_dl/extractor/linuxacademy.py173
-rw-r--r--youtube_dl/extractor/livejournal.py42
-rw-r--r--youtube_dl/extractor/liveleak.py41
-rw-r--r--youtube_dl/extractor/livestream.py5
-rw-r--r--youtube_dl/extractor/lnkgo.py100
-rw-r--r--youtube_dl/extractor/lynda.py17
-rw-r--r--youtube_dl/extractor/macgamestore.py42
-rw-r--r--youtube_dl/extractor/mailru.py23
-rw-r--r--youtube_dl/extractor/makertv.py32
-rw-r--r--youtube_dl/extractor/malltv.py56
-rw-r--r--youtube_dl/extractor/mangomolo.py17
-rw-r--r--youtube_dl/extractor/manyvids.py62
-rw-r--r--youtube_dl/extractor/mediaset.py62
-rw-r--r--youtube_dl/extractor/mediasite.py157
-rw-r--r--youtube_dl/extractor/metacafe.py5
-rw-r--r--youtube_dl/extractor/mgtv.py50
-rw-r--r--youtube_dl/extractor/minhateca.py70
-rw-r--r--youtube_dl/extractor/mit.py24
-rw-r--r--youtube_dl/extractor/mitele.py99
-rw-r--r--youtube_dl/extractor/mixcloud.py503
-rw-r--r--youtube_dl/extractor/moevideo.py65
-rw-r--r--youtube_dl/extractor/mofosex.py23
-rw-r--r--youtube_dl/extractor/motherless.py20
-rw-r--r--youtube_dl/extractor/msn.py196
-rw-r--r--youtube_dl/extractor/mtv.py88
-rw-r--r--youtube_dl/extractor/musicplayon.py66
-rw-r--r--youtube_dl/extractor/myspass.py77
-rw-r--r--youtube_dl/extractor/nationalgeographic.py149
-rw-r--r--youtube_dl/extractor/naver.py158
-rw-r--r--youtube_dl/extractor/nbc.py221
-rw-r--r--youtube_dl/extractor/ndr.py31
-rw-r--r--youtube_dl/extractor/ndtv.py4
-rw-r--r--youtube_dl/extractor/newstube.py116
-rw-r--r--youtube_dl/extractor/nextmedia.py4
-rw-r--r--youtube_dl/extractor/nexx.py31
-rw-r--r--youtube_dl/extractor/nfb.py112
-rw-r--r--youtube_dl/extractor/nhk.py106
-rw-r--r--youtube_dl/extractor/nhl.py2
-rw-r--r--youtube_dl/extractor/nick.py6
-rw-r--r--youtube_dl/extractor/niconico.py26
-rw-r--r--youtube_dl/extractor/ninenow.py6
-rw-r--r--youtube_dl/extractor/nintendo.py28
-rw-r--r--youtube_dl/extractor/njpwworld.py12
-rw-r--r--youtube_dl/extractor/noco.py2
-rw-r--r--youtube_dl/extractor/nonktube.py13
-rw-r--r--youtube_dl/extractor/noovo.py8
-rw-r--r--youtube_dl/extractor/nova.py137
-rw-r--r--youtube_dl/extractor/novamov.py212
-rw-r--r--youtube_dl/extractor/npo.py129
-rw-r--r--youtube_dl/extractor/npr.py92
-rw-r--r--youtube_dl/extractor/nrk.py126
-rw-r--r--youtube_dl/extractor/nrl.py30
-rw-r--r--youtube_dl/extractor/ntvcojp.py49
-rw-r--r--youtube_dl/extractor/ntvru.py49
-rw-r--r--youtube_dl/extractor/nytimes.py4
-rw-r--r--youtube_dl/extractor/nzz.py13
-rw-r--r--youtube_dl/extractor/odnoklassniki.py18
-rw-r--r--youtube_dl/extractor/once.py2
-rw-r--r--youtube_dl/extractor/onet.py54
-rw-r--r--youtube_dl/extractor/onionstudios.py62
-rw-r--r--youtube_dl/extractor/ooyala.py103
-rw-r--r--youtube_dl/extractor/openload.py13
-rw-r--r--youtube_dl/extractor/orf.py233
-rw-r--r--youtube_dl/extractor/outsidetv.py28
-rw-r--r--youtube_dl/extractor/packtpub.py130
-rw-r--r--youtube_dl/extractor/pandatv.py99
-rw-r--r--youtube_dl/extractor/patreon.py185
-rw-r--r--youtube_dl/extractor/pbs.py6
-rw-r--r--youtube_dl/extractor/peertube.py500
-rw-r--r--youtube_dl/extractor/periscope.py88
-rw-r--r--youtube_dl/extractor/philharmoniedeparis.py124
-rw-r--r--youtube_dl/extractor/picarto.py42
-rw-r--r--youtube_dl/extractor/piksel.py37
-rw-r--r--youtube_dl/extractor/platzi.py224
-rw-r--r--youtube_dl/extractor/playplustv.py109
-rw-r--r--youtube_dl/extractor/pluralsight.py46
-rw-r--r--youtube_dl/extractor/podomatic.py4
-rw-r--r--youtube_dl/extractor/pokemon.py12
-rw-r--r--youtube_dl/extractor/popcorntimes.py99
-rw-r--r--youtube_dl/extractor/porn91.py7
-rw-r--r--youtube_dl/extractor/pornflip.py101
-rw-r--r--youtube_dl/extractor/pornhd.py48
-rw-r--r--youtube_dl/extractor/pornhub.py337
-rw-r--r--youtube_dl/extractor/primesharetv.py62
-rw-r--r--youtube_dl/extractor/promptfile.py70
-rw-r--r--youtube_dl/extractor/prosiebensat1.py245
-rw-r--r--youtube_dl/extractor/puhutv.py70
-rw-r--r--youtube_dl/extractor/radiocanada.py170
-rw-r--r--youtube_dl/extractor/rai.py9
-rw-r--r--youtube_dl/extractor/redbulltv.py30
-rw-r--r--youtube_dl/extractor/reddit.py3
-rw-r--r--youtube_dl/extractor/redtube.py48
-rw-r--r--youtube_dl/extractor/revision3.py170
-rw-r--r--youtube_dl/extractor/rmcdecouverte.py36
-rw-r--r--youtube_dl/extractor/roosterteeth.py159
-rw-r--r--youtube_dl/extractor/rte.py133
-rw-r--r--youtube_dl/extractor/rtl2.py49
-rw-r--r--youtube_dl/extractor/rtlnl.py4
-rw-r--r--youtube_dl/extractor/rtp.py83
-rw-r--r--youtube_dl/extractor/rudo.py53
-rw-r--r--youtube_dl/extractor/ruleporn.py44
-rw-r--r--youtube_dl/extractor/rutube.py114
-rw-r--r--youtube_dl/extractor/ruutu.py27
-rw-r--r--youtube_dl/extractor/safari.py100
-rw-r--r--youtube_dl/extractor/savefrom.py7
-rw-r--r--youtube_dl/extractor/sbs.py4
-rw-r--r--youtube_dl/extractor/screencast.py9
-rw-r--r--youtube_dl/extractor/scrippsnetworks.py77
-rw-r--r--youtube_dl/extractor/scte.py144
-rw-r--r--youtube_dl/extractor/seeker.py45
-rw-r--r--youtube_dl/extractor/servingsys.py72
-rw-r--r--youtube_dl/extractor/servus.py42
-rw-r--r--youtube_dl/extractor/shared.py68
-rw-r--r--youtube_dl/extractor/sixplay.py36
-rw-r--r--youtube_dl/extractor/sky.py (renamed from youtube_dl/extractor/skysports.py)57
-rw-r--r--youtube_dl/extractor/skylinewebcams.py2
-rw-r--r--youtube_dl/extractor/slideslive.py42
-rw-r--r--youtube_dl/extractor/soundcloud.py699
-rw-r--r--youtube_dl/extractor/spankbang.py138
-rw-r--r--youtube_dl/extractor/spankwire.py211
-rw-r--r--youtube_dl/extractor/spike.py27
-rw-r--r--youtube_dl/extractor/sportbox.py48
-rw-r--r--youtube_dl/extractor/sportdeutschland.py34
-rw-r--r--youtube_dl/extractor/srgssr.py19
-rw-r--r--youtube_dl/extractor/srmediathek.py4
-rw-r--r--youtube_dl/extractor/streamango.py122
-rw-r--r--youtube_dl/extractor/streamcloud.py2
-rw-r--r--youtube_dl/extractor/stretchinternet.py38
-rw-r--r--youtube_dl/extractor/stv.py67
-rw-r--r--youtube_dl/extractor/sverigesradio.py115
-rw-r--r--youtube_dl/extractor/svt.py151
-rw-r--r--youtube_dl/extractor/tbs.py6
-rw-r--r--youtube_dl/extractor/teachable.py298
-rw-r--r--youtube_dl/extractor/teachingchannel.py26
-rw-r--r--youtube_dl/extractor/teamcoco.py76
-rw-r--r--youtube_dl/extractor/teamtreehouse.py140
-rw-r--r--youtube_dl/extractor/ted.py129
-rw-r--r--youtube_dl/extractor/tele5.py82
-rw-r--r--youtube_dl/extractor/telecinco.py44
-rw-r--r--youtube_dl/extractor/telegraaf.py75
-rw-r--r--youtube_dl/extractor/telequebec.py60
-rw-r--r--youtube_dl/extractor/tenplay.py58
-rw-r--r--youtube_dl/extractor/testurl.py6
-rw-r--r--youtube_dl/extractor/tf1.py44
-rw-r--r--youtube_dl/extractor/tfo.py6
-rw-r--r--youtube_dl/extractor/theplatform.py23
-rw-r--r--youtube_dl/extractor/thesun.py14
-rw-r--r--youtube_dl/extractor/thisoldhouse.py37
-rw-r--r--youtube_dl/extractor/tiktok.py138
-rw-r--r--youtube_dl/extractor/tnaflix.py10
-rw-r--r--youtube_dl/extractor/toggle.py21
-rw-r--r--youtube_dl/extractor/toutv.py111
-rw-r--r--youtube_dl/extractor/trunews.py34
-rw-r--r--youtube_dl/extractor/trutv.py84
-rw-r--r--youtube_dl/extractor/tumblr.py3
-rw-r--r--youtube_dl/extractor/tutv.py36
-rw-r--r--youtube_dl/extractor/tv2.py105
-rw-r--r--youtube_dl/extractor/tv2dk.py154
-rw-r--r--youtube_dl/extractor/tv3.py34
-rw-r--r--youtube_dl/extractor/tv4.py17
-rw-r--r--youtube_dl/extractor/tv5mondeplus.py84
-rw-r--r--youtube_dl/extractor/tva.py9
-rw-r--r--youtube_dl/extractor/tvigle.py53
-rw-r--r--youtube_dl/extractor/tvland.py27
-rw-r--r--youtube_dl/extractor/tvn24.py44
-rw-r--r--youtube_dl/extractor/tvnow.py390
-rw-r--r--youtube_dl/extractor/tvp.py113
-rw-r--r--youtube_dl/extractor/tvplay.py143
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py25
-rw-r--r--youtube_dl/extractor/twitcasting.py81
-rw-r--r--youtube_dl/extractor/twitch.py208
-rw-r--r--youtube_dl/extractor/twitter.py600
-rw-r--r--youtube_dl/extractor/udemy.py38
-rw-r--r--youtube_dl/extractor/ufctv.py73
-rw-r--r--youtube_dl/extractor/uol.py127
-rw-r--r--youtube_dl/extractor/upskill.py176
-rw-r--r--youtube_dl/extractor/urplay.py18
-rw-r--r--youtube_dl/extractor/usanetwork.py28
-rw-r--r--youtube_dl/extractor/usatoday.py29
-rw-r--r--youtube_dl/extractor/ustream.py2
-rw-r--r--youtube_dl/extractor/veehd.py2
-rw-r--r--youtube_dl/extractor/veoh.py99
-rw-r--r--youtube_dl/extractor/vessel.py157
-rw-r--r--youtube_dl/extractor/vevo.py10
-rw-r--r--youtube_dl/extractor/vice.py216
-rw-r--r--youtube_dl/extractor/viddler.py22
-rw-r--r--youtube_dl/extractor/videodetective.py11
-rw-r--r--youtube_dl/extractor/videomega.py60
-rw-r--r--youtube_dl/extractor/videomore.py96
-rw-r--r--youtube_dl/extractor/videopremium.py46
-rw-r--r--youtube_dl/extractor/viewlift.py312
-rw-r--r--youtube_dl/extractor/viewster.py217
-rw-r--r--youtube_dl/extractor/viki.py4
-rw-r--r--youtube_dl/extractor/vimeo.py584
-rw-r--r--youtube_dl/extractor/vk.py422
-rw-r--r--youtube_dl/extractor/vlive.py200
-rw-r--r--youtube_dl/extractor/vodplatform.py9
-rw-r--r--youtube_dl/extractor/voicerepublic.py76
-rw-r--r--youtube_dl/extractor/voxmedia.py101
-rw-r--r--youtube_dl/extractor/vporn.py123
-rw-r--r--youtube_dl/extractor/vrt.py197
-rw-r--r--youtube_dl/extractor/vrv.py173
-rw-r--r--youtube_dl/extractor/vshare.py2
-rw-r--r--youtube_dl/extractor/vvvvid.py2
-rw-r--r--youtube_dl/extractor/vzaar.py37
-rw-r--r--youtube_dl/extractor/wakanim.py66
-rw-r--r--youtube_dl/extractor/weibo.py2
-rw-r--r--youtube_dl/extractor/wimp.py58
-rw-r--r--youtube_dl/extractor/wistia.py108
-rw-r--r--youtube_dl/extractor/wrzuta.py158
-rw-r--r--youtube_dl/extractor/wwe.py140
-rw-r--r--youtube_dl/extractor/xfileshare.py192
-rw-r--r--youtube_dl/extractor/xhamster.py91
-rw-r--r--youtube_dl/extractor/xiami.py2
-rw-r--r--youtube_dl/extractor/xtube.py48
-rw-r--r--youtube_dl/extractor/xvideos.py58
-rw-r--r--youtube_dl/extractor/yahoo.py884
-rw-r--r--youtube_dl/extractor/yandexmusic.py117
-rw-r--r--youtube_dl/extractor/yandexvideo.py104
-rw-r--r--youtube_dl/extractor/youjizz.py2
-rw-r--r--youtube_dl/extractor/youku.py4
-rw-r--r--youtube_dl/extractor/youporn.py37
-rw-r--r--youtube_dl/extractor/yourporn.py40
-rw-r--r--youtube_dl/extractor/youtube.py1155
-rw-r--r--youtube_dl/extractor/zapiks.py1
-rw-r--r--youtube_dl/extractor/zattoo.py36
-rw-r--r--youtube_dl/extractor/zdf.py5
-rw-r--r--youtube_dl/extractor/zype.py134
-rw-r--r--youtube_dl/options.py6
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py114
-rw-r--r--youtube_dl/postprocessor/xattrpp.py4
-rw-r--r--youtube_dl/update.py7
-rw-r--r--youtube_dl/utils.py1923
-rw-r--r--youtube_dl/version.py2
462 files changed, 25079 insertions, 15494 deletions
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
deleted file mode 100644
index a4602287a..000000000
--- a/.github/ISSUE_TEMPLATE.md
+++ /dev/null
@@ -1,61 +0,0 @@
-## Please follow the guide below
-
-- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
-- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
-- Use the *Preview* tab to see what your issue will actually look like
-
----
-
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.09.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.09.18**
-
-### Before submitting an *issue* make sure you have:
-- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
-- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
-- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
-
-### What is the purpose of your *issue*?
-- [ ] Bug report (encountered problems with youtube-dl)
-- [ ] Site support request (request for adding support for a new site)
-- [ ] Feature request (request for a new functionality)
-- [ ] Question
-- [ ] Other
-
----
-
-### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue*
-
----
-
-### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
-
-Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
-
-```
-[debug] System config: []
-[debug] User config: []
-[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
-[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.09.18
-[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
-[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
-[debug] Proxy map: {}
-...
-<end of log>
-```
-
----
-
-### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**):
-- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
-- Single video: https://youtu.be/BaW_jenozKc
-- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
-
-Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
-
----
-
-### Description of your *issue*, suggested solution and other information
-
-Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
-If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md
new file mode 100644
index 000000000..f2260db46
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.md
@@ -0,0 +1,63 @@
+---
+name: Broken site support
+about: Report broken or misfunctioning site
+title: ''
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
+- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
+- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a broken site support
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
+- [ ] I've checked that all provided URLs are alive and playable in a browser
+- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
+- [ ] I've searched the bugtracker for similar issues including closed ones
+
+
+## Verbose log
+
+<!--
+Provide the complete verbose output of youtube-dl that clearly demonstrates the problem.
+Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
+ [debug] System config: []
+ [debug] User config: []
+ [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+ [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+ [debug] youtube-dl version 2020.07.28
+ [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
+ [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+ [debug] Proxy map: {}
+ <more lines>
+-->
+
+```
+PASTE VERBOSE LOG HERE
+```
+
+
+## Description
+
+<!--
+Provide an explanation of your issue in an arbitrary form. Provide any additional information, suggested solution and as much context and examples as possible.
+If work on your issue requires account credentials please provide them or explain how one can obtain them.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md
new file mode 100644
index 000000000..8bc05c4ba
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md
@@ -0,0 +1,54 @@
+---
+name: Site support request
+about: Request support for a new site
+title: ''
+labels: 'site-support-request'
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
+- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
+- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a new site support request
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
+- [ ] I've checked that all provided URLs are alive and playable in a browser
+- [ ] I've checked that none of provided URLs violate any copyrights
+- [ ] I've searched the bugtracker for similar site support requests including closed ones
+
+
+## Example URLs
+
+<!--
+Provide all kinds of example URLs support for which should be included. Replace following example URLs by yours.
+-->
+
+- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
+- Single video: https://youtu.be/BaW_jenozKc
+- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
+
+
+## Description
+
+<!--
+Provide any additional information.
+If work on your issue requires account credentials please provide them or explain how one can obtain them.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
new file mode 100644
index 000000000..98348e0cd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -0,0 +1,37 @@
+---
+name: Site feature request
+about: Request a new functionality for a site
+title: ''
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a site feature request
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
+- [ ] I've searched the bugtracker for similar site feature requests including closed ones
+
+
+## Description
+
+<!--
+Provide an explanation of your site feature request in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md
new file mode 100644
index 000000000..86706f528
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.md
@@ -0,0 +1,65 @@
+---
+name: Bug report
+about: Report a bug unrelated to any particular site or extractor
+title: ''
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
+- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
+- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Read bugs section in FAQ: http://yt-dl.org/reporting
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a broken site support issue
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
+- [ ] I've checked that all provided URLs are alive and playable in a browser
+- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
+- [ ] I've searched the bugtracker for similar bug reports including closed ones
+- [ ] I've read bugs section in FAQ
+
+
+## Verbose log
+
+<!--
+Provide the complete verbose output of youtube-dl that clearly demonstrates the problem.
+Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
+ [debug] System config: []
+ [debug] User config: []
+ [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+ [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+ [debug] youtube-dl version 2020.07.28
+ [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
+ [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+ [debug] Proxy map: {}
+ <more lines>
+-->
+
+```
+PASTE VERBOSE LOG HERE
+```
+
+
+## Description
+
+<!--
+Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible.
+If work on your issue requires account credentials please provide them or explain how one can obtain them.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md
new file mode 100644
index 000000000..52c2709f9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.md
@@ -0,0 +1,38 @@
+---
+name: Feature request
+about: Request a new functionality unrelated to any particular site or extractor
+title: ''
+labels: 'request'
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a feature request
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
+- [ ] I've searched the bugtracker for similar feature requests including closed ones
+
+
+## Description
+
+<!--
+Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md
new file mode 100644
index 000000000..1fd7cd5dc
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/6_question.md
@@ -0,0 +1,38 @@
+---
+name: Ask question
+about: Ask youtube-dl related question
+title: ''
+labels: 'question'
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- Look through the README (http://yt-dl.org/readme) and FAQ (http://yt-dl.org/faq) for similar questions
+- Search the bugtracker for similar questions: http://yt-dl.org/search-issues
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm asking a question
+- [ ] I've looked through the README and FAQ for similar questions
+- [ ] I've searched the bugtracker for similar questions including closed ones
+
+
+## Question
+
+<!--
+Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient.
+-->
+
+WRITE QUESTION HERE
diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md
deleted file mode 100644
index 8edbd5a0f..000000000
--- a/.github/ISSUE_TEMPLATE_tmpl.md
+++ /dev/null
@@ -1,61 +0,0 @@
-## Please follow the guide below
-
-- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly
-- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`)
-- Use the *Preview* tab to see what your issue will actually look like
-
----
-
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s**
-
-### Before submitting an *issue* make sure you have:
-- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
-- [ ] [Searched](https://github.com/rg3/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones
-- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser
-
-### What is the purpose of your *issue*?
-- [ ] Bug report (encountered problems with youtube-dl)
-- [ ] Site support request (request for adding support for a new site)
-- [ ] Feature request (request for a new functionality)
-- [ ] Question
-- [ ] Other
-
----
-
-### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue*
-
----
-
-### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows:
-
-Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v <your command line>`), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```):
-
-```
-[debug] System config: []
-[debug] User config: []
-[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
-[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version %(version)s
-[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
-[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
-[debug] Proxy map: {}
-...
-<end of log>
-```
-
----
-
-### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**):
-- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
-- Single video: https://youtu.be/BaW_jenozKc
-- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
-
-Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/rg3/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
-
----
-
-### Description of your *issue*, suggested solution and other information
-
-Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible.
-If work on your *issue* requires account credentials please provide them or explain how one can obtain them.
diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
new file mode 100644
index 000000000..c7600d5b5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md
@@ -0,0 +1,63 @@
+---
+name: Broken site support
+about: Report broken or misfunctioning site
+title: ''
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
+- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
+- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a broken site support
+- [ ] I've verified that I'm running youtube-dl version **%(version)s**
+- [ ] I've checked that all provided URLs are alive and playable in a browser
+- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
+- [ ] I've searched the bugtracker for similar issues including closed ones
+
+
+## Verbose log
+
+<!--
+Provide the complete verbose output of youtube-dl that clearly demonstrates the problem.
+Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
+ [debug] System config: []
+ [debug] User config: []
+ [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+ [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+ [debug] youtube-dl version %(version)s
+ [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
+ [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+ [debug] Proxy map: {}
+ <more lines>
+-->
+
+```
+PASTE VERBOSE LOG HERE
+```
+
+
+## Description
+
+<!--
+Provide an explanation of your issue in an arbitrary form. Provide any additional information, suggested solution and as much context and examples as possible.
+If work on your issue requires account credentials please provide them or explain how one can obtain them.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md
new file mode 100644
index 000000000..d4988e639
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md
@@ -0,0 +1,54 @@
+---
+name: Site support request
+about: Request support for a new site
+title: ''
+labels: 'site-support-request'
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
+- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
+- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a new site support request
+- [ ] I've verified that I'm running youtube-dl version **%(version)s**
+- [ ] I've checked that all provided URLs are alive and playable in a browser
+- [ ] I've checked that none of provided URLs violate any copyrights
+- [ ] I've searched the bugtracker for similar site support requests including closed ones
+
+
+## Example URLs
+
+<!--
+Provide all kinds of example URLs support for which should be included. Replace following example URLs by yours.
+-->
+
+- Single video: https://www.youtube.com/watch?v=BaW_jenozKc
+- Single video: https://youtu.be/BaW_jenozKc
+- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc
+
+
+## Description
+
+<!--
+Provide any additional information.
+If work on your issue requires account credentials please provide them or explain how one can obtain them.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md
new file mode 100644
index 000000000..65f0a32f3
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md
@@ -0,0 +1,37 @@
+---
+name: Site feature request
+about: Request a new functionality for a site
+title: ''
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a site feature request
+- [ ] I've verified that I'm running youtube-dl version **%(version)s**
+- [ ] I've searched the bugtracker for similar site feature requests including closed ones
+
+
+## Description
+
+<!--
+Provide an explanation of your site feature request in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
new file mode 100644
index 000000000..41fb14b72
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md
@@ -0,0 +1,65 @@
+---
+name: Bug report
+about: Report a bug unrelated to any particular site or extractor
+title: ''
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
+- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
+- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Read bugs section in FAQ: http://yt-dl.org/reporting
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a broken site support issue
+- [ ] I've verified that I'm running youtube-dl version **%(version)s**
+- [ ] I've checked that all provided URLs are alive and playable in a browser
+- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
+- [ ] I've searched the bugtracker for similar bug reports including closed ones
+- [ ] I've read bugs section in FAQ
+
+
+## Verbose log
+
+<!--
+Provide the complete verbose output of youtube-dl that clearly demonstrates the problem.
+Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this:
+ [debug] System config: []
+ [debug] User config: []
+ [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
+ [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
+ [debug] youtube-dl version %(version)s
+ [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
+ [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
+ [debug] Proxy map: {}
+ <more lines>
+-->
+
+```
+PASTE VERBOSE LOG HERE
+```
+
+
+## Description
+
+<!--
+Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible.
+If work on your issue requires account credentials please provide them or explain how one can obtain them.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md
new file mode 100644
index 000000000..b3431a7f0
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md
@@ -0,0 +1,38 @@
+---
+name: Feature request
+about: Request a new functionality unrelated to any particular site or extractor
+title: ''
+labels: 'request'
+---
+
+<!--
+
+######################################################################
+ WARNING!
+ IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE
+######################################################################
+
+-->
+
+
+## Checklist
+
+<!--
+Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is %(version)s. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
+- Finally, put x into all relevant boxes (like this [x])
+-->
+
+- [ ] I'm reporting a feature request
+- [ ] I've verified that I'm running youtube-dl version **%(version)s**
+- [ ] I've searched the bugtracker for similar feature requests including closed ones
+
+
+## Description
+
+<!--
+Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible.
+-->
+
+WRITE DESCRIPTION HERE
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index ba4ca7553..e69b907d8 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -7,8 +7,8 @@
---
### Before submitting a *pull request* make sure you have:
-- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/rg3/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/rg3/youtube-dl#youtube-dl-coding-conventions) sections
-- [ ] [Searched](https://github.com/rg3/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
+- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections
+- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests
- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8)
### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options:
diff --git a/.travis.yml b/.travis.yml
index 92f326860..51afd469a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,14 +9,37 @@ python:
- "3.6"
- "pypy"
- "pypy3"
-sudo: false
+dist: trusty
env:
- YTDL_TEST_SET=core
- YTDL_TEST_SET=download
-matrix:
+jobs:
include:
+ - python: 3.7
+ dist: xenial
+ env: YTDL_TEST_SET=core
+ - python: 3.7
+ dist: xenial
+ env: YTDL_TEST_SET=download
+ - python: 3.8
+ dist: xenial
+ env: YTDL_TEST_SET=core
+ - python: 3.8
+ dist: xenial
+ env: YTDL_TEST_SET=download
+ - python: 3.8-dev
+ dist: xenial
+ env: YTDL_TEST_SET=core
+ - python: 3.8-dev
+ dist: xenial
+ env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=download
+ - name: flake8
+ python: 3.8
+ dist: xenial
+ install: pip install flake8
+ script: flake8 .
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 333acee80..58ab3a4b8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -42,11 +42,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're
### Is the issue already documented?
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
@@ -70,7 +70,7 @@ It may sound strange, but some bug reports we receive are completely unrelated t
# DEVELOPER INSTRUCTIONS
-Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -98,7 +98,7 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
-1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
+1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork)
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
@@ -150,18 +150,22 @@ After you have ensured this site is distributing its content legally, you can fo
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
-5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+
+ $ flake8 youtube_dl/extractor/yourextractor.py
+
+9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
-10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
@@ -173,7 +177,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
### Mandatory and optional metafields
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
@@ -181,7 +185,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
@@ -257,11 +261,33 @@ title = meta.get('title') or self._og_search_title(webpage)
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
-### Make regular expressions flexible
+### Regular expressions
+
+#### Don't capture groups you don't use
+
+Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
+
+##### Example
+
+Don't capture id attribute name here since you can't use it for anything anyway.
+
+Correct:
+
+```python
+r'(?:id|ID)=(?P<id>\d+)'
+```
-When using regular expressions try to write them fuzzy and flexible.
+Incorrect:
+```python
+r'(id|ID)=(?P<id>\d+)'
+```
+
+
+#### Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
-#### Example
+##### Example
Say you need to extract `title` from the following HTML code:
@@ -294,7 +320,115 @@ title = self._search_regex(
webpage, 'title', group='title')
```
-### Use safe conversion functions
+### Long lines policy
+
+There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
+
+For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+
+Correct:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Incorrect:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'<title>([^<]+)</title>'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+```
+
+Incorrect:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list,
+)
+```
+
+### Use convenience conversion and parsing functions
+
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+
+Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
diff --git a/ChangeLog b/ChangeLog
index 800ece790..bf515f784 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,1492 @@
+version 2020.07.28
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
+* [youtube] Improve description extraction (#25937, #25980)
+* [wistia] Restrict embed regular expression (#25969)
+* [youtube] Prevent excess HTTP 301 (#25786)
++ [youtube:playlists] Extend URL regular expression (#25810)
++ [bellmedia] Add support for cp24.com clip URLs (#25764)
+* [brightcove] Improve embed detection (#25674)
+
+
+version 2020.06.16.1
+
+Extractors
+* [youtube] Force old layout (#25682, #25683, #25680, #25686)
+* [youtube] Fix categories and improve tags extraction
+
+
+version 2020.06.16
+
+Extractors
+* [youtube] Fix uploader id and uploader URL extraction
+* [youtube] Improve view count extraction
+* [youtube] Fix upload date extraction (#25677)
+* [youtube] Fix thumbnails extraction (#25676)
+* [youtube] Fix playlist and feed extraction (#25675)
++ [facebook] Add support for single-video ID links
++ [youtube] Extract chapters from JSON (#24819)
++ [kaltura] Add support for multiple embeds on a webpage (#25523)
+
+
+version 2020.06.06
+
+Extractors
+* [tele5] Bypass geo restriction
++ [jwplatform] Add support for bypass geo restriction
+* [tele5] Prefer jwplatform over nexx (#25533)
+* [twitch:stream] Expect 400 and 410 HTTP errors from API
+* [twitch:stream] Fix extraction (#25528)
+* [twitch] Fix thumbnails extraction (#25531)
++ [twitch] Pass v5 Accept HTTP header (#25531)
+* [brightcove] Fix subtitles extraction (#25540)
++ [malltv] Add support for sk.mall.tv (#25445)
+* [periscope] Fix untitled broadcasts (#25482)
+* [jwplatform] Improve embeds extraction (#25467)
+
+
+version 2020.05.29
+
+Core
+* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
+* [utils] Fix file permissions in write_json_file (#12471, #25122)
+
+Extractors
+* [ard:beta] Extend URL regular expression (#25405)
++ [youtube] Add support for more invidious instances (#25417)
+* [giantbomb] Extend URL regular expression (#25222)
+* [ard] Improve URL regular expression (#25134, #25198)
+* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
+ #25321)
+* [indavideo] Switch to HTTPS for API request (#25191)
+* [redtube] Improve title extraction (#25208)
+* [vimeo] Improve format extraction and sorting (#25285)
+* [soundcloud] Reduce API playlist page limit (#25274)
++ [youtube] Add support for yewtu.be (#25226)
+* [mailru] Fix extraction (#24530, #25239)
+* [bellator] Fix mgid extraction (#25195)
+
+
+version 2020.05.08
+
+Core
+* [downloader/http] Request last data block of exact remaining size
+* [downloader/http] Finish downloading once received data length matches
+ expected
+* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
+ ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
++ [compat] Introduce compat_cookiejar_Cookie
+* [utils] Improve cookie files support
+ + Add support for UTF-8 in cookie files
+ * Skip malformed cookie file entries instead of crashing (invalid entry
+ length, invalid expires at)
+
+Extractors
+* [youtube] Improve signature cipher extraction (#25187, #25188)
+* [iprima] Improve extraction (#25138)
+* [uol] Fix extraction (#22007)
++ [orf] Add support for more radio stations (#24938, #24968)
+* [dailymotion] Fix typo
+- [puhutv] Remove no longer available HTTP formats (#25124)
+
+
+version 2020.05.03
+
+Core
++ [extractor/common] Extract multiple JSON-LD entries
+* [options] Clarify doc on --exec command (#19087, #24883)
+* [extractor/common] Skip malformed ISM manifest XMLs while extracting
+ ISM formats (#24667)
+
+Extractors
+* [crunchyroll] Fix and improve extraction (#25096, #25060)
+* [youtube] Improve player id extraction
+* [youtube] Use redirected video id if any (#25063)
+* [yahoo] Fix GYAO Player extraction and relax URL regular expression
+ (#24178, #24778)
+* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
+* [tenplay] Relax URL regular expression (#25001)
++ [prosiebensat1] Extract series metadata
+* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
+- [prosiebensat1] Remove 7tv.de support (#24948)
+* [youtube] Fix DRM videos detection (#24736)
+* [thisoldhouse] Fix video id extraction (#24548, #24549)
++ [soundcloud] Extract AAC format (#19173, #24708)
+* [youtube] Skip broken multifeed videos (#24711)
+* [nova:embed] Fix extraction (#24700)
+* [motherless] Fix extraction (#24699)
+* [twitch:clips] Extend URL regular expression (#24290, #24642)
+* [tv4] Fix ISM formats extraction (#24667)
+* [tele5] Fix extraction (#24553)
++ [mofosex] Add support for generic embeds (#24633)
++ [youporn] Add support for generic embeds
++ [spankwire] Add support for generic embeds (#24633)
+* [spankwire] Fix extraction (#18924, #20648)
+
+
+version 2020.03.24
+
+Core
+- [utils] Revert support for cookie files with spaces used instead of tabs
+
+Extractors
+* [teachable] Update upskillcourses and gns3 domains
+* [generic] Look for teachable embeds before wistia
++ [teachable] Extract chapter metadata (#24421)
++ [bilibili] Add support for player.bilibili.com (#24402)
++ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
+* [limelight] Remove disabled API requests (#24255)
+* [soundcloud] Fix download URL extraction (#24394)
++ [cbc:watch] Add support for authentication (#19160)
+* [hellporno] Fix extraction (#24399)
+* [xtube] Fix formats extraction (#24348)
+* [ndr] Fix extraction (#24326)
+* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
+- [nhk] Remove obsolete rtmp formats (#24329)
+* [nhk] Relax URL regular expression (#24329)
+- [vimeo] Revert fix showcase password protected video extraction (#24224)
+
+
+version 2020.03.08
+
+Core
++ [utils] Add support for cookie files with spaces used instead of tabs
+
+Extractors
++ [pornhub] Add support for pornhubpremium.com (#24288)
+- [youtube] Remove outdated code and unnecessary requests
+* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
+* [nhk] Update API version (#24270)
+
+
+version 2020.03.06
+
+Extractors
+* [youtube] Fix age-gated videos support without login (#24248)
+* [vimeo] Fix showcase password protected video extraction (#24224)
+* [pornhub] Improve title extraction (#24184)
+* [peertube] Improve extraction (#23657)
++ [servus] Add support for new URL schema (#23475, #23583, #24142)
+* [vimeo] Fix subtitles URLs (#24209)
+
+
+version 2020.03.01
+
+Core
+* [YoutubeDL] Force redirect URL to unicode on python 2
+- [options] Remove duplicate short option -v for --version (#24162)
+
+Extractors
+* [xhamster] Fix extraction (#24205)
+* [franceculture] Fix extraction (#24204)
++ [telecinco] Add support for article opening videos
+* [telecinco] Fix extraction (#24195)
+* [xtube] Fix metadata extraction (#21073, #22455)
+* [youjizz] Fix extraction (#24181)
+- Remove no longer needed compat_str around geturl
+* [pornhd] Fix extraction (#24128)
++ [teachable] Add support for multiple videos per lecture (#24101)
++ [wistia] Add support for multiple generic embeds (#8347, 11385)
+* [imdb] Fix extraction (#23443)
+* [tv2dk:bornholm:play] Fix extraction (#24076)
+
+
+version 2020.02.16
+
+Core
+* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
+ #10622)
+* [update] Fix updating via symlinks (#23991)
++ [compat] Introduce compat_realpath (#23991)
+
+Extractors
++ [npr] Add support for streams (#24042)
++ [24video] Add support for porn.24video.net (#23779, #23784)
+- [jpopsuki] Remove extractor (#23858)
+* [nova] Improve extraction (#23690)
+* [nova:embed] Improve (#23690)
+* [nova:embed] Fix extraction (#23672)
++ [abc:iview] Add support for 720p (#22907, #22921)
+* [nytimes] Improve format sorting (#24010)
++ [toggle] Add support for mewatch.sg (#23895, #23930)
+* [thisoldhouse] Fix extraction (#23951)
++ [popcorntimes] Add support for popcorntimes.tv (#23949)
+* [sportdeutschland] Update to new API
+* [twitch:stream] Lowercase channel id for stream request (#23917)
+* [tv5mondeplus] Fix extraction (#23907, #23911)
+* [tva] Relax URL regular expression (#23903)
+* [vimeo] Fix album extraction (#23864)
+* [viewlift] Improve extraction
+ * Fix extraction (#23851)
+ + Add support for authentication
+ + Add support for more domains
+* [svt] Fix series extraction (#22297)
+* [svt] Fix article extraction (#22897, #22919)
+* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
+
+
+version 2020.01.24
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#23819)
+* [stretchinternet] Fix extraction (#4319)
+* [voicerepublic] Fix extraction
+* [azmedien] Fix extraction (#23783)
+* [businessinsider] Fix jwplatform id extraction (#22929, #22954)
++ [24video] Add support for 24video.vip (#23753)
+* [ivi:compilation] Fix entries extraction (#23770)
+* [ard] Improve extraction (#23761)
+ * Simplify extraction
+ + Extract age limit and series
+ * Bypass geo-restriction
++ [nbc] Add support for nbc multi network URLs (#23049)
+* [americastestkitchen] Fix extraction
+* [zype] Improve extraction
+ + Extract subtitles (#21258)
+ + Support URLs with alternative keys/tokens (#21258)
+ + Extract more metadata
+* [orf:tvthek] Improve geo restricted videos detection (#23741)
+* [soundcloud] Restore previews extraction (#23739)
+
+
+version 2020.01.15
+
+Extractors
+* [yourporn] Fix extraction (#21645, #22255, #23459)
++ [canvas] Add support for new API endpoint (#17680, #18629)
+* [ndr:base:embed] Improve thumbnails extraction (#23731)
++ [vodplatform] Add support for embed.kwikmotion.com domain
++ [twitter] Add support for promo_video_website cards (#23711)
+* [orf:radio] Clean description and improve extraction
+* [orf:fm4] Fix extraction (#23599)
+* [safari] Fix kaltura session extraction (#23679, #23670)
+* [lego] Fix extraction and extract subtitle (#23687)
+* [cloudflarestream] Improve extraction
+ + Add support for bytehighway.net domain
+ + Add support for signed URLs
+ + Extract thumbnail
+* [naver] Improve extraction
+ * Improve geo-restriction handling
+ + Extract automatic captions
+ + Extract uploader metadata
+ + Extract VLive HLS formats
+ * Improve metadata extraction
+- [pandatv] Remove extractor (#23630)
+* [dctp] Fix format extraction (#23656)
++ [scrippsnetworks] Add support for www.discovery.com videos
+* [discovery] Fix anonymous token extraction (#23650)
+* [nrktv:seriebase] Fix extraction (#23625, #23537)
+* [wistia] Improve format extraction and extract subtitles (#22590)
+* [vice] Improve extraction (#23631)
+* [redtube] Detect private videos (#23518)
+
+
+version 2020.01.01
+
+Extractors
+* [brightcove] Invalidate policy key cache on failing requests
+* [pornhub] Improve locked videos detection (#22449, #22780)
++ [pornhub] Add support for m3u8 formats
+* [pornhub] Fix extraction (#22749, #23082)
+* [brightcove] Update policy key on failing requests
+* [spankbang] Improve removed video detection (#23423)
+* [spankbang] Fix extraction (#23307, #23423, #23444)
+* [soundcloud] Automatically update client id on failing requests
+* [prosiebensat1] Improve geo restriction handling (#23571)
+* [brightcove] Cache brightcove player policy keys
+* [teachable] Fail with error message if no video URL found
+* [teachable] Improve locked lessons detection (#23528)
++ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
+* [mitele] Fix extraction (#21354, #23456)
+* [soundcloud] Update client id (#23516)
+* [mailru] Relax URL regular expressions (#23509)
+
+
+version 2019.12.25
+
+Core
+* [utils] Improve str_to_int
++ [downloader/hls] Add ability to override AES decryption key URL (#17521)
+
+Extractors
+* [mediaset] Fix parse formats (#23508)
++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
++ [slideslive] Add support for url and vimeo service names (#23414)
+* [slideslive] Fix extraction (#23413)
+* [twitch:clips] Fix extraction (#23375)
++ [soundcloud] Add support for token protected embeds (#18954)
+* [vk] Improve extraction
+ * Fix User Videos extraction (#23356)
+ * Extract all videos for lists with more than 1000 videos (#23356)
+ + Add support for video albums (#14327, #14492)
+- [kontrtube] Remove extractor
+- [videopremium] Remove extractor
+- [musicplayon] Remove extractor (#9225)
++ [ufctv] Add support for ufcfightpass.imgdge.com and
+ ufcfightpass.imggaming.com (#23343)
++ [twitch] Extract m3u8 formats frame rate (#23333)
++ [imggaming] Add support for playlists and extract subtitles
++ [ufcarabia] Add support for UFC Arabia (#23312)
+* [ufctv] Fix extraction
+* [yahoo] Fix gyao brightcove player id (#23303)
+* [vzaar] Override AES decryption key URL (#17521)
++ [vzaar] Add support for AES HLS manifests (#17521, #23299)
+* [nrl] Fix extraction
+* [teachingchannel] Fix extraction
+* [nintendo] Fix extraction and partially add support for Nintendo Direct
+ videos (#4592)
++ [ooyala] Add better fallback values for domain and streams variables
++ [youtube] Add support youtubekids.com (#23272)
+* [tv2] Detect DRM protection
++ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
+* [tv2] Fix tv2.no article extraction
+* [msn] Improve extraction
+ + Add support for YouTube and NBCSports embeds
+ + Add support for articles with multiple videos
+ * Improve AOL embed support
+ * Improve format extraction
+* [abcotvs] Relax URL regular expression and improve metadata extraction
+ (#18014)
+* [channel9] Reduce response size
+* [adobetv] Improve extaction
+ * Use OnDemandPagedList for list extractors
+ * Reduce show extraction requests
+ * Extract original video format and subtitles
+ + Add support for adobe tv embeds
+
+
+version 2019.11.28
+
+Core
++ [utils] Add generic caesar cipher and rot47
+* [utils] Handle rd-suffixed day parts in unified_strdate (#23199)
+
+Extractors
+* [vimeo] Improve extraction
+ * Fix review extraction
+ * Fix ondemand extraction
+ * Make password protected player case as an expected error (#22896)
+ * Simplify channel based extractors code
+- [openload] Remove extractor (#11999)
+- [verystream] Remove extractor
+- [streamango] Remove extractor (#15406)
+* [dailymotion] Improve extraction
+ * Extract http formats included in m3u8 manifest
+ * Fix user extraction (#3553, #21415)
+ + Add suport for User Authentication (#11491)
+ * Fix password protected videos extraction (#23176)
+ * Respect age limit option and family filter cookie value (#18437)
+ * Handle video url playlist query param
+ * Report allowed countries for geo-restricted videos
+* [corus] Improve extraction
+ + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com
+ and disneylachaine.ca (#20861)
+ + Add support for self hosted videos (#22075)
+ * Detect DRM protection (#14910, #9164)
+* [vivo] Fix extraction (#22328, #22279)
++ [bitchute] Extract upload date (#22990, #23193)
+* [soundcloud] Update client id (#23214)
+
+
+version 2019.11.22
+
+Core
++ [extractor/common] Clean jwplayer description HTML tags
++ [extractor/common] Add data, headers and query to all major extract formats
+ methods
+
+Extractors
+* [chaturbate] Fix extraction (#23010, #23012)
++ [ntvru] Add support for non relative file URLs (#23140)
+* [vk] Fix wall audio thumbnails extraction (#23135)
+* [ivi] Fix format extraction (#21991)
+- [comcarcoff] Remove extractor
++ [drtv] Add support for new URL schema (#23059)
++ [nexx] Add support for Multi Player JS Setup (#23052)
++ [teamcoco] Add support for new videos (#23054)
+* [soundcloud] Check if the soundtrack has downloads left (#23045)
+* [facebook] Fix posts video data extraction (#22473)
+- [addanime] Remove extractor
+- [minhateca] Remove extractor
+- [daisuki] Remove extractor
+* [seeker] Fix extraction
+- [revision3] Remove extractors
+* [twitch] Fix video comments URL (#18593, #15828)
+* [twitter] Improve extraction
+ + Add support for generic embeds (#22168)
+ * Always extract http formats for native videos (#14934)
+ + Add support for Twitter Broadcasts (#21369)
+ + Extract more metadata
+ * Improve VMap format extraction
+ * Unify extraction code for both twitter statuses and cards
++ [twitch] Add support for Clip embed URLs
+* [lnkgo] Fix extraction (#16834)
+* [mixcloud] Improve extraction
+ * Improve metadata extraction (#11721)
+ * Fix playlist extraction (#22378)
+ * Fix user mixes extraction (#15197, #17865)
++ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384)
+* [onionstudios] Fix extraction
++ [hotstar] Pass Referer header to format requests (#22836)
+* [dplay] Minimize response size
++ [patreon] Extract uploader_id and filesize
+* [patreon] Minimize response size
+* [roosterteeth] Fix login request (#16094, #22689)
+
+
+version 2019.11.05
+
+Extractors
++ [scte] Add support for learning.scte.org (#22975)
++ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
+* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
+* [jamendo] Improve extraction
+ * Fix album extraction (#18564)
+ * Improve metadata extraction (#18565, #21379)
+* [mediaset] Relax URL guid matching (#18352)
++ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
+* [telegraaf] Fix extraction
++ [bellmedia] Add support for marilyn.ca videos (#22193)
+* [stv] Fix extraction (#22928)
+- [iconosquare] Remove extractor
+- [keek] Remove extractor
+- [gameone] Remove extractor (#21778)
+- [flipagram] Remove extractor
+- [bambuser] Remove extractor
+* [wistia] Reduce embed extraction false positives
++ [wistia] Add support for inline embeds (#22931)
+- [go90] Remove extractor
+* [kakao] Remove raw request
++ [kakao] Extract format total bitrate
+* [daum] Fix VOD and Clip extracton (#15015)
+* [kakao] Improve extraction
+ + Add support for embed URLs
+ + Add support for Kakao Legacy vid based embed URLs
+ * Only extract fields used for extraction
+ * Strip description and extract tags
+* [mixcloud] Fix cloudcast data extraction (#22821)
+* [yahoo] Improve extraction
+ + Add support for live streams (#3597, #3779, #22178)
+ * Bypass cookie consent page for european domains (#16948, #22576)
+ + Add generic support for embeds (#20332)
+* [tv2] Fix and improve extraction (#22787)
++ [tv2dk] Add support for TV2 DK sites
+* [onet] Improve extraction …
+ + Add support for onet100.vod.pl
+ + Extract m3u8 formats
+ * Correct audio only format info
+* [fox9] Fix extraction
+
+
+version 2019.10.29
+
+Core
+* [utils] Actualize major IPv4 address blocks per country
+
+Extractors
++ [go] Add support for abc.com and freeform.com (#22823, #22864)
++ [mtv] Add support for mtvjapan.com
+* [mtv] Fix extraction for mtv.de (#22113)
+* [videodetective] Fix extraction
+* [internetvideoarchive] Fix extraction
+* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
+- [hark] Remove extractor
+- [tutv] Remove extractor
+- [learnr] Remove extractor
+- [macgamestore] Remove extractor
+* [la7] Update Kaltura service URL (#22358)
+* [thesun] Fix extraction (#16966)
+- [makertv] Remove extractor
++ [tenplay] Add support for 10play.com.au (#21446)
+* [soundcloud] Improve extraction
+ * Improve format extraction (#22123)
+ + Extract uploader_id and uploader_url (#21916)
+ + Extract all known thumbnails (#19071, #20659)
+ * Fix extration for private playlists (#20976)
+ + Add support for playlist embeds (#20976)
+ * Skip preview formats (#22806)
+* [dplay] Improve extraction
+ + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
+ * Fix it.dplay.com extraction (#22826)
+ + Extract creator, tags and thumbnails
+ * Handle playback API call errors
++ [discoverynetworks] Add support for dplay.co.uk
+* [vk] Improve extraction
+ + Add support for Odnoklassniki embeds
+ + Extract more videos from user lists (#4470)
+ + Fix wall post audio extraction (#18332)
+ * Improve error detection (#22568)
++ [odnoklassniki] Add support for embeds
+* [puhutv] Improve extraction
+ * Fix subtitles extraction
+ * Transform HLS URLs to HTTP URLs
+ * Improve metadata extraction
+* [ceskatelevize] Skip DRM media
++ [facebook] Extract subtitles (#22777)
+* [globo] Handle alternative hash signing method
+
+
+version 2019.10.22
+
+Core
+* [utils] Improve subtitles_filename (#22753)
+
+Extractors
+* [facebook] Bypass download rate limits (#21018)
++ [contv] Add support for contv.com
+- [viewster] Remove extractor
+* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
+ * Update the list of domains
+ + Add support for aa-encoded video data
+ * Improve jwplayer format extraction
+ + Add support for Clappr sources
+* [mangomolo] Fix video format extraction and add support for player URLs
+* [audioboom] Improve metadata extraction
+* [twitch] Update VOD URL matching (#22395, #22727)
+- [mit] Remove support for video.mit.edu (#22403)
+- [servingsys] Remove extractor (#22639)
+* [dumpert] Fix extraction (#22428, #22564)
+* [atresplayer] Fix extraction (#16277, #16716)
+
+
+version 2019.10.16
+
+Core
+* [extractor/common] Make _is_valid_url more relaxed
+
+Extractors
+* [vimeo] Improve album videos id extraction (#22599)
++ [globo] Extract subtitles (#22713)
+* [bokecc] Improve player params extraction (#22638)
+* [nexx] Handle result list (#22666)
+* [vimeo] Fix VHX embed extraction
+* [nbc] Switch to graphql API (#18581, #22693, #22701)
+- [vessel] Remove extractor
+- [promptfile] Remove extractor (#6239)
+* [kaltura] Fix service URL extraction (#22658)
+* [kaltura] Fix embed info strip (#22658)
+* [globo] Fix format extraction (#20319)
+* [redtube] Improve metadata extraction (#22492, #22615)
+* [pornhub:uservideos:upload] Fix extraction (#22619)
++ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
+- [wimp] Remove extractor (#22088, #22091)
++ [gfycat] Extend URL regular expression (#22225)
++ [chaturbate] Extend URL regular expression (#22309)
+* [peertube] Update instances (#22414)
++ [telequebec] Add support for coucou.telequebec.tv (#22482)
++ [xvideos] Extend URL regular expression (#22471)
+- [youtube] Remove support for invidious.enkirton.net (#22543)
++ [openload] Add support for oload.monster (#22592)
+* [nrktv:seriebase] Fix extraction (#22596)
++ [youtube] Add support for yt.lelux.fi (#22597)
+* [orf:tvthek] Make manifest requests non fatal (#22578)
+* [teachable] Skip login when already logged in (#22572)
+* [viewlift] Improve extraction (#22545)
+* [nonktube] Fix extraction (#22544)
+
+
+version 2019.09.28
+
+Core
+* [YoutubeDL] Honour all --get-* options with --flat-playlist (#22493)
+
+Extractors
+* [vk] Fix extraction (#22522)
+* [heise] Fix kaltura embeds extraction (#22514)
+* [ted] Check for resources validity and extract subtitled downloads (#22513)
++ [youtube] Add support for
+ owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya.b32.i2p (#22292)
++ [nhk] Add support for clips
+* [nhk] Fix video extraction (#22249, #22353)
+* [byutv] Fix extraction (#22070)
++ [openload] Add support for oload.online (#22304)
++ [youtube] Add support for invidious.drycat.fr (#22451)
+* [jwplatfom] Do not match video URLs (#20596, #22148)
+* [youtube:playlist] Unescape playlist uploader (#22483)
++ [bilibili] Add support audio albums and songs (#21094)
++ [instagram] Add support for tv URLs
++ [mixcloud] Allow uppercase letters in format URLs (#19280)
+* [brightcove] Delegate all supported legacy URLs to new extractor (#11523,
+ #12842, #13912, #15669, #16303)
+* [hotstar] Use native HLS downloader by default
++ [hotstar] Extract more formats (#22323)
+* [9now] Fix extraction (#22361)
+* [zdf] Bypass geo restriction
++ [tv4] Extract series metadata
+* [tv4] Fix extraction (#22443)
+
+
+version 2019.09.12.1
+
+Extractors
+* [youtube] Remove quality and tbr for itag 43 (#22372)
+
+
+version 2019.09.12
+
+Extractors
+* [youtube] Quick extraction tempfix (#22367, #22163)
+
+
+version 2019.09.01
+
+Core
++ [extractor/generic] Add support for squarespace embeds (#21294, #21802,
+ #21859)
++ [downloader/external] Respect mtime option for aria2c (#22242)
+
+Extractors
++ [xhamster:user] Add support for user pages (#16330, #18454)
++ [xhamster] Add support for more domains
++ [verystream] Add support for woof.tube (#22217)
++ [dailymotion] Add support for lequipe.fr (#21328, #22152)
++ [openload] Add support for oload.vip (#22205)
++ [bbccouk] Extend URL regular expression (#19200)
++ [youtube] Add support for invidious.nixnet.xyz and yt.elukerio.org (#22223)
+* [safari] Fix authentication (#22161, #22184)
+* [usanetwork] Fix extraction (#22105)
++ [einthusan] Add support for einthusan.ca (#22171)
+* [youtube] Improve unavailable message extraction (#22117)
++ [piksel] Extract subtitles (#20506)
+
+
+version 2019.08.13
+
+Core
+* [downloader/fragment] Fix ETA calculation of resumed download (#21992)
+* [YoutubeDL] Check annotations availability (#18582)
+
+Extractors
+* [youtube:playlist] Improve flat extraction (#21927)
+* [youtube] Fix annotations extraction (#22045)
++ [discovery] Extract series meta field (#21808)
+* [youtube] Improve error detection (#16445)
+* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986)
++ [roosterteeth] Add support for watch URLs
+* [discovery] Limit video data by show slug (#21980)
+
+
+version 2019.08.02
+
+Extractors
++ [tvigle] Add support for HLS and DASH formats (#21967)
+* [tvigle] Fix extraction (#21967)
++ [yandexvideo] Add support for DASH formats (#21971)
+* [discovery] Use API call for video data extraction (#21808)
++ [mgtv] Extract format_note (#21881)
+* [tvn24] Fix metadata extraction (#21833, #21834)
+* [dlive] Relax URL regular expression (#21909)
++ [openload] Add support for oload.best (#21913)
+* [youtube] Improve metadata extraction for age gate content (#21943)
+
+
+version 2019.07.30
+
+Extractors
+* [youtube] Fix and improve title and description extraction (#21934)
+
+
+version 2019.07.27
+
+Extractors
++ [yahoo:japannews] Add support for yahoo.co.jp (#21698, #21265)
++ [discovery] Add support go.discovery.com URLs
+* [youtube:playlist] Relax video regular expression (#21844)
+* [generic] Restrict --default-search schemeless URLs detection pattern
+ (#21842)
+* [vrv] Fix CMS signing query extraction (#21809)
+
+
+version 2019.07.16
+
+Extractors
++ [asiancrush] Add support for yuyutv.com, midnightpulp.com and cocoro.tv
+ (#21281, #21290)
+* [kaltura] Check source format URL (#21290)
+* [ctsnews] Fix YouTube embeds extraction (#21678)
++ [einthusan] Add support for einthusan.com (#21748, #21775)
++ [youtube] Add support for invidious.mastodon.host (#21777)
++ [gfycat] Extend URL regular expression (#21779, #21780)
+* [youtube] Restrict is_live extraction (#21782)
+
+
+version 2019.07.14
+
+Extractors
+* [porn91] Fix extraction (#21312)
++ [yandexmusic] Extract track number and disk number (#21421)
++ [yandexmusic] Add support for multi disk albums (#21420, #21421)
+* [lynda] Handle missing subtitles (#20490, #20513)
++ [youtube] Add more invidious instances to URL regular expression (#21694)
+* [twitter] Improve uploader id extraction (#21705)
+* [spankbang] Fix and improve metadata extraction
+* [spankbang] Fix extraction (#21763, #21764)
++ [dlive] Add support for dlive.tv (#18080)
++ [livejournal] Add support for livejournal.com (#21526)
+* [roosterteeth] Fix free episode extraction (#16094)
+* [dbtv] Fix extraction
+* [bellator] Fix extraction
+- [rudo] Remove extractor (#18430, #18474)
+* [facebook] Fallback to twitter:image meta for thumbnail extraction (#21224)
+* [bleacherreport] Fix Bleacher Report CMS extraction
+* [espn] Fix fivethirtyeight.com extraction
+* [5tv] Relax video URL regular expression and support https URLs
+* [youtube] Fix is_live extraction (#21734)
+* [youtube] Fix authentication (#11270)
+
+
+version 2019.07.12
+
+Core
++ [adobepass] Add support for AT&T U-verse (mso ATT) (#13938, #21016)
+
+Extractors
++ [mgtv] Pass Referer HTTP header for format URLs (#21726)
++ [beeg] Add support for api/v6 v2 URLs without t argument (#21701)
+* [voxmedia:volume] Improvevox embed extraction (#16846)
+* [funnyordie] Move extraction to VoxMedia extractor (#16846)
+* [gameinformer] Fix extraction (#8895, #15363, #17206)
+* [funk] Fix extraction (#17915)
+* [packtpub] Relax lesson URL regular expression (#21695)
+* [packtpub] Fix extraction (#21268)
+* [philharmoniedeparis] Relax URL regular expression (#21672)
+* [peertube] Detect embed URLs in generic extraction (#21666)
+* [mixer:vod] Relax URL regular expression (#21657, #21658)
++ [lecturio] Add support id based URLs (#21630)
++ [go] Add site info for disneynow (#21613)
+* [ted] Restrict info regular expression (#21631)
+* [twitch:vod] Actualize m3u8 URL (#21538, #21607)
+* [vzaar] Fix videos with empty title (#21606)
+* [tvland] Fix extraction (#21384)
+* [arte] Clean extractor (#15583, #21614)
+
+
+version 2019.07.02
+
+Core
++ [utils] Introduce random_user_agent and use as default User-Agent (#21546)
+
+Extractors
++ [vevo] Add support for embed.vevo.com URLs (#21565)
++ [openload] Add support for oload.biz (#21574)
+* [xiami] Update API base URL (#21575)
+* [yourporn] Fix extraction (#21585)
++ [acast] Add support for URLs with episode id (#21444)
++ [dailymotion] Add support for DM.player embeds
+* [soundcloud] Update client id
+
+
+version 2019.06.27
+
+Extractors
++ [go] Add support for disneynow.com (#21528)
+* [mixer:vod] Relax URL regular expression (#21531, #21536)
+* [drtv] Relax URL regular expression
+* [fusion] Fix extraction (#17775, #21269)
+- [nfb] Remove extractor (#21518)
++ [beeg] Add support for api/v6 v2 URLs (#21511)
++ [brightcove:new] Add support for playlists (#21331)
++ [openload] Add support for oload.life (#21495)
+* [vimeo:channel,group] Make title extraction non fatal
+* [vimeo:likes] Implement extrator in terms of channel extractor (#21493)
++ [pornhub] Add support for more paged video sources
++ [pornhub] Add support for downloading single pages and search pages (#15570)
+* [pornhub] Rework extractors (#11922, #16078, #17454, #17936)
++ [youtube] Add another signature function pattern
+* [tf1] Fix extraction (#21365, #21372)
+* [crunchyroll] Move Accept-Language workaround to video extractor since
+ it causes playlists not to list any videos
+* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443)
+
+
+version 2019.06.21
+
+Core
+* [utils] Restrict parse_codecs and add theora as known vcodec (#21381)
+
+Extractors
+* [youtube] Update signature function patterns (#21469, #21476)
+* [youtube] Make --write-annotations non fatal (#21452)
++ [sixplay] Add support for rtlmost.hu (#21405)
+* [youtube] Hardcode codec metadata for av01 video only formats (#21381)
+* [toutv] Update client key (#21370)
++ [biqle] Add support for new embed domain
+* [cbs] Improve DRM protected videos detection (#21339)
+
+
+version 2019.06.08
+
+Core
+* [downloader/common] Improve rate limit (#21301)
+* [utils] Improve strip_or_none
+* [extractor/common] Strip src attribute for HTML5 entries code (#18485,
+ #21169)
+
+Extractors
+* [ted] Fix playlist extraction (#20844, #21032)
+* [vlive:playlist] Fix video extraction when no playlist is found (#20590)
++ [vlive] Add CH+ support (#16887, #21209)
++ [openload] Add support for oload.website (#21329)
++ [tvnow] Extract HD formats (#21201)
++ [redbulltv] Add support for rrn:content URLs (#21297)
+* [youtube] Fix average rating extraction (#21304)
++ [bitchute] Extract HTML5 formats (#21306)
+* [cbsnews] Fix extraction (#9659, #15397)
+* [vvvvid] Relax URL regular expression (#21299)
++ [prosiebensat1] Add support for new API (#21272)
++ [vrv] Extract adaptive_hls formats (#21243)
+* [viki] Switch to HTTPS (#21001)
+* [LiveLeak] Check if the original videos exist (#21206, #21208)
+* [rtp] Fix extraction (#15099)
+* [youtube] Improve DRM protected videos detection (#1774)
++ [srgssrplay] Add support for popupvideoplayer URLs (#21155)
++ [24video] Add support for porno.24video.net (#21194)
++ [24video] Add support for 24video.site (#21193)
+- [pornflip] Remove extractor
+- [criterion] Remove extractor (#21195)
+* [pornhub] Use HTTPS (#21061)
+* [bitchute] Fix uploader extraction (#21076)
+* [streamcloud] Reduce waiting time to 6 seconds (#21092)
+- [novamov] Remove extractors (#21077)
++ [openload] Add support for oload.press (#21135)
+* [vivo] Fix extraction (#18906, #19217)
+
+
+version 2019.05.20
+
+Core
++ [extractor/common] Move workaround for applying first Set-Cookie header
+ into a separate _apply_first_set_cookie_header method
+
+Extractors
+* [safari] Fix authentication (#21090)
+* [vk] Use _apply_first_set_cookie_header
+* [vrt] Fix extraction (#20527)
++ [canvas] Add support for vrtnieuws and sporza site ids and extract
+ AES HLS formats
++ [vrv] Extract captions (#19238)
+* [tele5] Improve video id extraction
+* [tele5] Relax URL regular expression (#21020, #21063)
+* [svtplay] Update API URL (#21075)
++ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071)
+
+
+version 2019.05.11
+
+Core
+* [utils] Transliterate "þ" as "th" (#20897)
+
+Extractors
++ [cloudflarestream] Add support for videodelivery.net (#21049)
++ [byutv] Add support for DVR videos (#20574, #20676)
++ [gfycat] Add support for URLs with tags (#20696, #20731)
++ [openload] Add support for verystream.com (#20701, #20967)
+* [youtube] Use sp field value for signature field name (#18841, #18927,
+ #21028)
++ [yahoo:gyao] Extend URL regular expression (#21008)
+* [youtube] Fix channel id extraction (#20982, #21003)
++ [sky] Add support for news.sky.com (#13055)
++ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965)
++ [francetvinfo] Extend video id extraction (#20619, #20740)
+* [4tube] Update token hosts (#20918)
+* [hotstar] Move to API v2 (#20931)
+* [fox] Fix API error handling under python 2 (#20925)
++ [redbulltv] Extend URL regular expression (#20922)
+
+
+version 2019.04.30
+
+Extractors
+* [openload] Use real Chrome versions (#20902)
+- [youtube] Remove info el for get_video_info request
+* [youtube] Improve extraction robustness
+- [dramafever] Remove extractor (#20868)
+* [adn] Fix subtitle extraction (#12724)
++ [ccc] Extract creator (#20355)
++ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355)
++ [sverigesradio] Add support for sverigesradio.se (#18635)
++ [cinemax] Add support for cinemax.com
+* [sixplay] Try extracting non-DRM protected manifests (#20849)
++ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742)
+- [wrzuta] Remove extractor (#20684, #20801)
+* [twitch] Prefer source format (#20850)
++ [twitcasting] Add support for private videos (#20843)
+* [reddit] Validate thumbnail URL (#20030)
+* [yandexmusic] Fix track URL extraction (#20820)
+
+
+version 2019.04.24
+
+Extractors
+* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766,
+ #20767, #20769, #20771, #20768, #20770)
+* [toutv] Fix extraction and extract series info (#20757)
++ [vrv] Add support for movie listings (#19229)
++ [youtube] Print error when no data is available (#20737)
++ [soundcloud] Add support for new rendition and improve extraction (#20699)
++ [ooyala] Add support for geo verification proxy
++ [nrl] Add support for nrl.com (#15991)
++ [vimeo] Extract live archive source format (#19144)
++ [vimeo] Add support for live streams and improve info extraction (#19144)
++ [ntvcojp] Add support for cu.ntv.co.jp
++ [nhk] Extract RTMPT format
++ [nhk] Add support for audio URLs
++ [udemy] Add another course id extraction pattern (#20491)
++ [openload] Add support for oload.services (#20691)
++ [openload] Add support for openloed.co (#20691, #20693)
+* [bravotv] Fix extraction (#19213)
+
+
+version 2019.04.17
+
+Extractors
+* [openload] Randomize User-Agent (#20688)
++ [openload] Add support for oladblock domains (#20471)
+* [adn] Fix subtitle extraction (#12724)
++ [aol] Add support for localized websites
++ [yahoo] Add support GYAO episode URLs
++ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098)
++ [yahoo] Add support for gyao.yahoo.co.jp
+* [aenetworks] Fix history topic extraction and extract more formats
++ [cbs] Extract smpte and vtt subtitles
++ [streamango] Add support for streamcherry.com (#20592)
++ [yourporn] Add support for sxyprn.com (#20646)
+* [mgtv] Fix extraction (#20650)
+* [linkedin:learning] Use urljoin for form action URL (#20431)
++ [gdc] Add support for kaltura embeds (#20575)
+* [dispeak] Improve mp4 bitrate extraction
+* [kaltura] Sanitize embed URLs
+* [jwplatfom] Do not match manifest URLs (#20596)
+* [aol] Restrict URL regular expression and improve format extraction
++ [tiktok] Add support for new URL schema (#20573)
++ [stv:player] Add support for player.stv.tv (#20586)
+
+
+version 2019.04.07
+
+Core
++ [downloader/external] Pass rtmp_conn to ffmpeg
+
+Extractors
++ [ruutu] Add support for audio podcasts (#20473, #20545)
++ [xvideos] Extract all thumbnails (#20432)
++ [platzi] Add support for platzi.com (#20562)
+* [dvtv] Fix extraction (#18514, #19174)
++ [vrv] Add basic support for individual movie links (#19229)
++ [bfi:player] Add support for player.bfi.org.uk (#19235)
+* [hbo] Fix extraction and extract subtitles (#14629, #13709)
+* [youtube] Extract srv[1-3] subtitle formats (#20566)
+* [adultswim] Fix extraction (#18025)
+* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339)
+* [adn] Fix subtitle compatibility with ffmpeg
+* [adn] Fix extraction and add support for positioning styles (#20549)
+* [vk] Use unique video id (#17848)
+* [newstube] Fix extraction
+* [rtl2] Actualize extraction
++ [adobeconnect] Add support for adobeconnect.com (#20283)
++ [gaia] Add support for authentication (#14605)
++ [mediasite] Add support for dashed ids and named catalogs (#20531)
+
+
+version 2019.04.01
+
+Core
+* [utils] Improve int_or_none and float_or_none (#20403)
+* Check for valid --min-sleep-interval when --max-sleep-interval is specified
+ (#20435)
+
+Extractors
++ [weibo] Extend URL regular expression (#20496)
++ [xhamster] Add support for xhamster.one (#20508)
++ [mediasite] Add support for catalogs (#20507)
++ [teamtreehouse] Add support for teamtreehouse.com (#9836)
++ [ina] Add support for audio URLs
+* [ina] Improve extraction
+* [cwtv] Fix episode number extraction (#20461)
+* [npo] Improve DRM detection
++ [pornhub] Add support for DASH formats (#20403)
+* [svtplay] Update API endpoint (#20430)
+
+
+version 2019.03.18
+
+Core
+* [extractor/common] Improve HTML5 entries extraction
++ [utils] Introduce parse_bitrate
+* [update] Hide update URLs behind redirect
+* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346)
+
+Extractors
++ [yandexvideo] Add extractor
+* [openload] Improve embed detection
++ [corus] Add support for bigbrothercanada.ca (#20357)
++ [orf:radio] Extract series (#20012)
++ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359)
+- [anysex] Remove extractor (#19279)
++ [ciscolive] Add support for new URL schema (#20320, #20351)
++ [youtube] Add support for invidiou.sh (#20309)
+- [anitube] Remove extractor (#20334)
+- [ruleporn] Remove extractor (#15344, #20324)
+* [npr] Fix extraction (#10793, #13440)
+* [biqle] Fix extraction (#11471, #15313)
+* [viddler] Modernize
+* [moevideo] Fix extraction
+* [primesharetv] Remove extractor
+* [hypem] Modernize and extract more metadata (#15320)
+* [veoh] Fix extraction
+* [escapist] Modernize
+- [videomega] Remove extractor (#10108)
++ [beeg] Add support for beeg.porn (#20306)
+* [vimeo:review] Improve config url extraction and extract original format
+ (#20305)
+* [fox] Detect geo restriction and authentication errors (#20208)
+
+
+version 2019.03.09
+
+Core
+* [extractor/common] Use compat_etree_Element
++ [compat] Introduce compat_etree_Element
+* [extractor/common] Fallback url to base URL for DASH formats
+* [extractor/common] Do not fail on invalid data while parsing F4M manifest
+ in non fatal mode
+* [extractor/common] Return MPD manifest as format's url meta field (#20242)
+* [utils] Strip #HttpOnly_ prefix from cookies files (#20219)
+
+Extractors
+* [francetv:site] Relax video id regular expression (#20268)
+* [toutv] Detect invalid login error
+* [toutv] Fix authentication (#20261)
++ [urplay] Extract timestamp (#20235)
++ [openload] Add support for oload.space (#20246)
+* [facebook] Improve uploader extraction (#20250)
+* [bbc] Use compat_etree_Element
+* [crunchyroll] Use compat_etree_Element
+* [npo] Improve ISM extraction
+* [rai] Improve extraction (#20253)
+* [paramountnetwork] Fix mgid extraction (#20241)
+* [libsyn] Improve extraction (#20229)
++ [youtube] Add more invidious instances to URL regular expression (#20228)
+* [spankbang] Fix extraction (#20023)
+* [espn] Extend URL regular expression (#20013)
+* [sixplay] Handle videos with empty assets (#20016)
++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070)
+
+
+version 2019.03.01
+
+Core
++ [downloader/external] Add support for rate limit and retries for wget
+* [downloader/external] Fix infinite retries for curl (#19303)
+
+Extractors
+* [npo] Fix extraction (#20084)
+* [francetv:site] Extend video id regex (#20029, #20071)
++ [periscope] Extract width and height (#20015)
+* [servus] Fix extraction (#19297)
+* [bbccouk] Make subtitles non fatal (#19651)
+* [metacafe] Fix family filter bypass (#19287)
+
+
+version 2019.02.18
+
+Extractors
+* [tvp:website] Fix and improve extraction
++ [tvp] Detect unavailable videos
+* [tvp] Fix description extraction and make thumbnail optional
++ [linuxacademy] Add support for linuxacademy.com (#12207)
+* [bilibili] Update keys (#19233)
+* [udemy] Extend URL regular expressions (#14330, #15883)
+* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126)
+* [noovo] Fix extraction (#19230)
+* [rai] Relax URL regular expression (#19232)
++ [vshare] Pass Referer to download request (#19205, #19221)
++ [openload] Add support for oload.live (#19222)
+* [imgur] Use video id as title fallback (#18590)
++ [twitch] Add new source format detection approach (#19193)
+* [tvplayhome] Fix video id extraction (#19190)
+* [tvplayhome] Fix episode metadata extraction (#19190)
+* [rutube:embed] Fix extraction (#19163)
++ [rutube:embed] Add support private videos (#19163)
++ [soundcloud] Extract more metadata
++ [trunews] Add support for trunews.com (#19153)
++ [linkedin:learning] Extract chapter_number and chapter_id (#19162)
+
+
+version 2019.02.08
+
+Core
+* [utils] Improve JSON-LD regular expression (#18058)
+* [YoutubeDL] Fallback to ie_key of matching extractor while making
+ download archive id when no explicit ie_key is provided (#19022)
+
+Extractors
++ [malltv] Add support for mall.tv (#18058, #17856)
++ [spankbang:playlist] Add support for playlists (#19145)
+* [spankbang] Extend URL regular expression
+* [trutv] Fix extraction (#17336)
+* [toutv] Fix authentication (#16398, #18700)
+* [pornhub] Fix tags and categories extraction (#13720, #19135)
+* [pornhd] Fix formats extraction
++ [pornhd] Extract like count (#19123, #19125)
+* [radiocanada] Switch to the new media requests (#19115)
++ [teachable] Add support for courses.workitdaily.com (#18871)
+- [vporn] Remove extractor (#16276)
++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086)
++ [drtuber] Extract duration (#19078)
+* [soundcloud] Fix paged playlists extraction, add support for albums and update client id
+* [soundcloud] Update client id
+* [drtv] Improve preference (#19079)
++ [openload] Add support for openload.pw and oload.pw (#18930)
++ [openload] Add support for oload.info (#19073)
+* [crackle] Authorize media detail request (#16931)
+
+
+version 2019.01.30.1
+
+Core
+* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067)
+
+
+version 2019.01.30
+
+Core
+* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding
+ subtitles (#19024, #19042)
+* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025)
+
+Extractors
+* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061)
+* [drtv] Improve extraction (#19039)
+ + Add support for EncryptedUri videos
+ + Extract more metadata
+ * Fix subtitles extraction
++ [fox] Add support for locked videos using cookies (#19060)
+* [fox] Fix extraction for free videos (#19060)
++ [zattoo] Add support for tv.salt.ch (#19059)
+
+
+version 2019.01.27
+
+Core
++ [extractor/common] Extract season in _json_ld
+* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection
+ (#681)
+
+Extractors
+* [vice] Fix extraction for locked videos (#16248)
++ [wakanim] Detect DRM protected videos
++ [wakanim] Add support for wakanim.tv (#14374)
+* [usatoday] Fix extraction for videos with custom brightcove partner id
+ (#18990)
+* [drtv] Fix extraction (#18989)
+* [nhk] Extend URL regular expression (#18968)
+* [go] Fix Adobe Pass requests for Disney Now (#18901)
++ [openload] Add support for oload.club (#18969)
+
+
+version 2019.01.24
+
+Core
+* [YoutubeDL] Fix negation for string operators in format selection (#18961)
+
+
+version 2019.01.23
+
+Core
+* [utils] Fix urljoin for paths with non-http(s) schemes
+* [extractor/common] Improve jwplayer relative URL handling (#18892)
++ [YoutubeDL] Add negation support for string comparisons in format selection
+ expressions (#18600, #18805)
+* [extractor/common] Improve HLS video-only format detection (#18923)
+
+Extractors
+* [crunchyroll] Extend URL regular expression (#18955)
+* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722,
+ #17197, #18338 #18842, #18899)
++ [vrv] Add support for authentication (#14307)
+* [videomore:season] Fix extraction
+* [videomore] Improve extraction (#18908)
++ [tnaflix] Pass Referer in metadata request (#18925)
+* [radiocanada] Relax DRM check (#18608, #18609)
+* [vimeo] Fix video password verification for videos protected by
+ Referer HTTP header
++ [hketv] Add support for hkedcity.net (#18696)
++ [streamango] Add support for fruithosts.net (#18710)
++ [instagram] Add support for tags (#18757)
++ [odnoklassniki] Detect paid videos (#18876)
+* [ted] Correct acodec for HTTP formats (#18923)
+* [cartoonnetwork] Fix extraction (#15664, #17224)
+* [vimeo] Fix extraction for password protected player URLs (#18889)
+
+
+version 2019.01.17
+
+Extractors
+* [youtube] Extend JS player signature function name regular expressions
+ (#18890, #18891, #18893)
+
+
+version 2019.01.16
+
+Core
++ [test/helper] Add support for maxcount and count collection len checkers
+* [downloader/hls] Fix uplynk ad skipping (#18824)
+* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813)
+
+Extractors
+* [youtube] Skip unsupported adaptive stream type (#18804)
++ [youtube] Extract DASH formats from player response (#18804)
+* [funimation] Fix extraction (#14089)
+* [skylinewebcams] Fix extraction (#18853)
++ [curiositystream] Add support for non app URLs
++ [bitchute] Check formats (#18833)
+* [wistia] Extend URL regular expression (#18823)
++ [playplustv] Add support for playplus.com (#18789)
+
+
+version 2019.01.10
+
+Core
+* [extractor/common] Use episode name as title in _json_ld
++ [extractor/common] Add support for movies in _json_ld
+* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes
+ (#18765)
++ [utils] Add language codes replaced in 1989 revision of ISO 639
+ to ISO639Utils (#18765)
+
+Extractors
+* [youtube] Extract live HLS URL from player response (#18799)
++ [outsidetv] Add support for outsidetv.com (#18774)
+* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs
++ [fox] Add support National Geographic (#17985, #15333, #14698)
++ [playplustv] Add support for playplus.tv (#18789)
+* [globo] Set GLBID cookie manually (#17346)
++ [gaia] Add support for gaia.com (#14605)
+* [youporn] Fix title and description extraction (#18748)
++ [hungama] Add support for hungama.com (#17402, #18771)
+* [dtube] Fix extraction (#18741)
+* [tvnow] Fix and rework extractors and prepare for a switch to the new API
+ (#17245, #18499)
+* [carambatv:page] Fix extraction (#18739)
+
+
+version 2019.01.02
+
+Extractors
+* [discovery] Use geo verification headers (#17838)
++ [packtpub] Add support for subscription.packtpub.com (#18718)
+* [yourporn] Fix extraction (#18583)
++ [acast:channel] Add support for play.acast.com (#18587)
++ [extractors] Add missing age limits (#18621)
++ [rmcdecouverte] Add support for live stream
+* [rmcdecouverte] Bypass geo restriction
+* [rmcdecouverte] Update URL regular expression (#18595, 18697)
+* [manyvids] Fix extraction (#18604, #18614)
+* [bitchute] Fix extraction (#18567)
+
+
+version 2018.12.31
+
+Extractors
++ [bbc] Add support for another embed pattern (#18643)
++ [npo:live] Add support for npostart.nl (#18644)
+* [beeg] Fix extraction (#18610, #18626)
+* [youtube] Unescape HTML for series (#18641)
++ [youtube] Extract more format metadata
+* [youtube] Detect DRM protected videos (#1774)
+* [youtube] Relax HTML5 player regular expressions (#18465, #18466)
+* [youtube] Extend HTML5 player regular expression (#17516)
++ [liveleak] Add support for another embed type and restore original
+ format extraction
++ [crackle] Extract ISM and HTTP formats
++ [twitter] Pass Referer with card request (#18579)
+* [mediasite] Extend URL regular expression (#18558)
++ [lecturio] Add support for lecturio.de (#18562)
++ [discovery] Add support for Scripps Networks watch domains (#17947)
+
+
+version 2018.12.17
+
+Extractors
+* [ard:beta] Improve geo restricted videos extraction
+* [ard:beta] Fix subtitles extraction
+* [ard:beta] Improve extraction robustness
+* [ard:beta] Relax URL regular expression (#18441)
+* [acast] Add support for embed.acast.com and play.acast.com (#18483)
+* [iprima] Relax URL regular expression (#18515, #18540)
+* [vrv] Fix initial state extraction (#18553)
+* [youtube] Fix mark watched (#18546)
++ [safari] Add support for learning.oreilly.com (#18510)
+* [youtube] Fix multifeed extraction (#18531)
+* [lecturio] Improve subtitles extraction (#18488)
+* [uol] Fix format URL extraction (#18480)
++ [ard:mediathek] Add support for classic.ardmediathek.de (#18473)
+
+
+version 2018.12.09
+
+Core
+* [YoutubeDL] Keep session cookies in cookie file between runs
+* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929)
+
+Extractors
++ [teachable] Add support for teachable platform sites (#5451, #18150, #18272)
++ [aenetworks] Add support for historyvault.com (#18460)
+* [imgur] Improve gallery and album detection and extraction (#9133, #16577,
+ #17223, #18404)
+* [iprima] Relax URL regular expression (#18453)
+* [hotstar] Fix video data extraction (#18386)
+* [ard:mediathek] Fix title and description extraction (#18349, #18371)
+* [xvideos] Switch to HTTPS (#18422, #18427)
++ [lecturio] Add support for lecturio.com (#18405)
++ [nrktv:series] Add support for extra materials
+* [nrktv:season,series] Fix extraction (#17159, #17258)
+* [nrktv] Relax URL regular expression (#18304, #18387)
+* [yourporn] Fix extraction (#18424, #18425)
+* [tbs] Fix info extraction (#18403)
++ [gamespot] Add support for review URLs
+
+
+version 2018.12.03
+
+Core
+* [utils] Fix random_birthday to generate existing dates only (#18284)
+
+Extractors
++ [tiktok] Add support for tiktok.com (#18108, #18135)
+* [pornhub] Use actual URL host for requests (#18359)
+* [lynda] Fix authentication (#18158, #18217)
+* [gfycat] Update API endpoint (#18333, #18343)
++ [hotstar] Add support for alternative app state layout (#18320)
+* [azmedien] Fix extraction (#18334, #18336)
++ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
+* [joj] Fix extraction (#18280, #18281)
++ [wistia] Add support for fast.wistia.com (#18287)
+
+
+version 2018.11.23
+
+Core
++ [setup.py] Add more relevant classifiers
+
+Extractors
+* [mixcloud] Fallback to hardcoded decryption key (#18016)
+* [nbc:news] Fix article extraction (#16194)
+* [foxsports] Fix extraction (#17543)
+* [loc] Relax regular expression and improve formats extraction
++ [ciscolive] Add support for ciscolive.cisco.com (#17984)
+* [nzz] Relax kaltura regex (#18228)
+* [sixplay] Fix formats extraction
+* [bitchute] Improve title extraction
+* [kaltura] Limit requested MediaEntry fields
++ [americastestkitchen] Add support for zype embeds (#18225)
++ [pornhub] Add pornhub.net alias
+* [nova:embed] Fix extraction (#18222)
+
+
+version 2018.11.18
+
+Extractors
++ [wwe] Extract subtitles
++ [wwe] Add support for playlistst (#14781)
++ [wwe] Add support for wwe.com (#14781, #17450)
+* [vk] Detect geo restriction (#17767)
+* [openload] Use original host during extraction (#18211)
+* [atvat] Fix extraction (#18041)
++ [rte] Add support for new API endpoint (#18206)
+* [tnaflixnetwork:embed] Fix extraction (#18205)
+* [picarto] Use API and add token support (#16518)
++ [zype] Add support for player.zype.com (#18143)
+* [vivo] Fix extraction (#18139)
+* [ruutu] Update API endpoint (#18138)
+
+
+version 2018.11.07
+
+Extractors
++ [youtube] Add another JS signature function name regex (#18091, #18093,
+ #18094)
+* [facebook] Fix tahoe request (#17171)
+* [cliphunter] Fix extraction (#18083)
++ [youtube:playlist] Add support for invidio.us (#18077)
+* [zattoo] Arrange API hosts for derived extractors (#18035)
++ [youtube] Add fallback metadata extraction from videoDetails (#18052)
+
+
+version 2018.11.03
+
+Core
+* [extractor/common] Ensure response handle is not prematurely closed before
+ it can be read if it matches expected_status (#17195, #17846, #17447)
+
+Extractors
+* [laola1tv:embed] Set correct stream access URL scheme (#16341)
++ [ehftv] Add support for ehftv.com (#15408)
+* [azmedien] Adopt to major site redesign (#17745, #17746)
++ [twitcasting] Add support for twitcasting.tv (#17981)
+* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
++ [openload] Add support for oload.fun (#18045)
+* [njpwworld] Fix authentication (#17427)
++ [linkedin:learning] Add support for linkedin.com/learning (#13545)
+* [theplatform] Improve error detection (#13222)
+* [cnbc] Simplify extraction (#14280, #17110)
++ [cbnc] Add support for new URL schema (#14193)
+* [aparat] Improve extraction and extract more metadata (#17445, #18008)
+* [aparat] Fix extraction
+
+
+version 2018.10.29
+
+Core
++ [extractor/common] Add validation for JSON-LD URLs
+
+Extractors
++ [sportbox] Add support for matchtv.ru
+* [sportbox] Fix extraction (#17978)
+* [screencast] Fix extraction (#14590, #14617, #17990)
++ [openload] Add support for oload.icu
++ [ivi] Add support for ivi.tv
+* [crunchyroll] Improve extraction failsafeness (#17991)
+* [dailymail] Fix formats extraction (#17976)
+* [viewster] Reduce format requests
+* [cwtv] Handle API errors (#17905)
++ [rutube] Use geo verification headers (#17897)
++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
+- [tv3] Remove extractor (#10461, #15339)
+* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
++ [openload] Add support for oload.cc (#17823)
++ [patreon] Extract post_file URL (#17792)
+* [patreon] Fix extraction (#14502, #10471)
+
+
+version 2018.10.05
+
+Extractors
+* [pluralsight] Improve authentication (#17762)
+* [dailymotion] Fix extraction (#17699)
+* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
+* [philharmoniedeparis] Fix extraction (#17705)
++ [jamendo] Add support for licensing.jamendo.com (#17724)
++ [openload] Add support for oload.cloud (#17710)
+* [pluralsight] Fix subtitles extraction (#17726, #17728)
++ [vimeo] Add another config regular expression (#17690)
+* [spike] Fix Paramount Network extraction (#17677)
+* [hotstar] Fix extraction (#14694, #14931, #17637)
+
+
+version 2018.09.26
+
+Extractors
+* [pluralsight] Fix subtitles extraction (#17671)
+* [mediaset] Improve embed support (#17668)
++ [youtube] Add support for invidio.us (#17613)
++ [zattoo] Add support for more zattoo platform sites
+* [zattoo] Fix extraction (#17175, #17542)
+
+
version 2018.09.18
Core
@@ -17,7 +1506,7 @@ Extractors
+ [youtube] Extract channel meta fields (#9676, #12939)
* [porntube] Fix extraction (#17541)
* [asiancrush] Fix extraction (#15630)
-+ [twitch:clips] Extend URL regular expression (closes #17559)
++ [twitch:clips] Extend URL regular expression (#17559)
+ [vzaar] Add support for HLS
* [tube8] Fix metadata extraction (#17520)
* [eporner] Extract JSON-LD (#17519)
diff --git a/Makefile b/Makefile
index 4a62f44bc..3e17365b8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean:
- rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
+ rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete
@@ -78,8 +78,12 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py
CONTRIBUTING.md: README.md
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
-.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md youtube_dl/version.py
- $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md
+issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md youtube_dl/version.py
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE/4_bug_report.md
+ $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md
supportedsites:
$(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md
diff --git a/README.md b/README.md
index fdd115c9b..45326c69e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![Build Status](https://travis-ci.org/rg3/youtube-dl.svg?branch=master)](https://travis-ci.org/rg3/youtube-dl)
+[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl)
youtube-dl - download videos from youtube.com or other video platforms
@@ -43,7 +43,7 @@ Or with [MacPorts](https://www.macports.org/):
sudo port install youtube-dl
-Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html).
+Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html).
# DESCRIPTION
**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
@@ -434,9 +434,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
either the path to the binary or its
containing directory.
--exec CMD Execute a command on the file after
- downloading, similar to find's -exec
- syntax. Example: --exec 'adb push {}
- /sdcard/Music/ && rm {}'
+ downloading and post-processing, similar to
+ find's -exec syntax. Example: --exec 'adb
+ push {} /sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt|lrc)
@@ -496,7 +496,7 @@ The `-o` option allows users to indicate a template for the output file names.
**tl;dr:** [navigate me to examples](#output-template-examples).
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by a formatting operations. Allowed names along with sequence type are:
+The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
- `id` (string): Video identifier
- `title` (string): Video title
@@ -642,6 +642,7 @@ The simplest case is requesting a specific format, for example with `-f 22` you
You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
You can also use special names to select particular edge case formats:
+
- `best`: Select the best quality format represented by a single file with video and audio.
- `worst`: Select the worst quality format represented by a single file with video and audio.
- `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
@@ -658,6 +659,7 @@ If you want to download several formats of the same video use a comma as a separ
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
+
- `filesize`: The number of bytes, if known in advance
- `width`: Width of the video, if known
- `height`: Height of the video, if known
@@ -667,7 +669,8 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
- `asr`: Audio sampling rate in Hertz
- `fps`: Frame rate
-Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
+Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
+
- `ext`: File extension
- `acodec`: Name of the audio codec in use
- `vcodec`: Name of the video codec in use
@@ -675,6 +678,8 @@ Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begin
- `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- `format_id`: A short description of the format
+Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
+
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
@@ -683,7 +688,7 @@ You can merge the video and audio of two formats into a single file using `-f <v
Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
-Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/rg3/youtube-dl/issues/5447), [#5456](https://github.com/rg3/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
+Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
@@ -695,7 +700,7 @@ Note that on Windows you may need to use double quotes instead of single.
# Download best mp4 format available or any other best if no mp4 available
$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
-# Download best format available but not better that 480p
+# Download best format available but no better than 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
# Download best video only format but no bigger than 50 MB
@@ -734,7 +739,7 @@ $ youtube-dl --dateafter 20000101 --datebefore 20091231
### How do I update youtube-dl?
-If you've followed [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
+If you've followed [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
@@ -744,11 +749,11 @@ As a last resort, you can also uninstall the version installed by your package m
sudo apt-get remove -y youtube-dl
-Afterwards, simply follow [our manual installation instructions](https://rg3.github.io/youtube-dl/download.html):
+Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
```
-sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
-sudo chmod a+x /usr/local/bin/youtube-dl
+sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
+sudo chmod a+rx /usr/local/bin/youtube-dl
hash -r
```
@@ -778,7 +783,7 @@ Most people asking this question are not aware that youtube-dl now defaults to d
### I get HTTP error 402 when trying to download a video. What's this?
-Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/rg3/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
+Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
### Do I need any other programs?
@@ -830,7 +835,9 @@ In February 2015, the new YouTube player contained a character sequence in a str
### HTTP Error 429: Too Many Requests or 402: Payment Required
-These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
+These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-youtube-dl) to youtube-dl. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
+
+If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
### SyntaxError: Non-ASCII character
@@ -843,7 +850,7 @@ means you're using an outdated version of Python. Please update to Python 2.6 or
### What is this binary file? Where has the code gone?
-Since June 2012 ([#342](https://github.com/rg3/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
+Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws an error due to missing `MSVCR100.dll`
@@ -902,7 +909,7 @@ When youtube-dl detects an HLS video, it can download it either with the built-i
When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://rg3.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
+In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
@@ -942,7 +949,7 @@ youtube-dl is an open-source project manned by too few volunteers, so we'd rathe
# DEVELOPER INSTRUCTIONS
-Most users do not need to build youtube-dl and can [download the builds](https://rg3.github.io/youtube-dl/download.html) or get them from their distribution.
+Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
To run youtube-dl as a developer, you don't need to build anything either. Simply execute
@@ -970,7 +977,7 @@ If you want to add support for a new site, first of all **make sure** this site
After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
-1. [Fork this repository](https://github.com/rg3/youtube-dl/fork)
+1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork)
2. Check out the source code with:
git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
@@ -1022,18 +1029,22 @@ After you have ensured this site is distributing its content legally, you can fo
# TODO more properties (see youtube_dl/extractor/common.py)
}
```
-5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
+5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L74-L252). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://pypi.python.org/pypi/flake8). Also make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-9. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
+7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
+
+ $ flake8 youtube_dl/extractor/yourextractor.py
+
+9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
+10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
$ git add youtube_dl/extractor/extractors.py
$ git add youtube_dl/extractor/yourextractor.py
$ git commit -m '[yourextractor] Add new extractor'
$ git push origin yourextractor
-10. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
+11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
In any case, thank you very much for your contributions!
@@ -1045,7 +1056,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou
### Mandatory and optional metafields
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L75-L257) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
+For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
- `id` (media identifier)
- `title` (media title)
@@ -1053,7 +1064,7 @@ For extraction to work youtube-dl relies on metadata your extractor extracts and
In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
-[Any field](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/extractor/common.py#L149-L257) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
+[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
#### Example
@@ -1129,11 +1140,33 @@ title = meta.get('title') or self._og_search_title(webpage)
This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
-### Make regular expressions flexible
+### Regular expressions
+
+#### Don't capture groups you don't use
+
+Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
+
+##### Example
+
+Don't capture id attribute name here since you can't use it for anything anyway.
-When using regular expressions try to write them fuzzy and flexible.
+Correct:
+
+```python
+r'(?:id|ID)=(?P<id>\d+)'
+```
+
+Incorrect:
+```python
+r'(id|ID)=(?P<id>\d+)'
+```
+
+
+#### Make regular expressions relaxed and flexible
+
+When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
-#### Example
+##### Example
Say you need to extract `title` from the following HTML code:
@@ -1166,13 +1199,121 @@ title = self._search_regex(
webpage, 'title', group='title')
```
-### Use safe conversion functions
+### Long lines policy
-Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
+
+For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
+
+Correct:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+Incorrect:
+
+```python
+'https://www.youtube.com/watch?v=FqZTN594JQw&list='
+'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
+```
+
+### Inline values
+
+Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
+
+#### Example
+
+Correct:
+
+```python
+title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
+```
+
+Incorrect:
+
+```python
+TITLE_RE = r'<title>([^<]+)</title>'
+# ...some lines of code...
+title = self._html_search_regex(TITLE_RE, webpage, 'title')
+```
+
+### Collapse fallbacks
+
+Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
+
+#### Example
+
+Good:
+
+```python
+description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+```
+
+Unwieldy:
+
+```python
+description = (
+ self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, default=None)
+ or self._html_search_meta('twitter:description', webpage, default=None))
+```
+
+Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
+
+### Trailing parentheses
+
+Always move trailing parentheses after the last argument.
+
+#### Example
+
+Correct:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+```
+
+Incorrect:
+
+```python
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list,
+)
+```
+
+### Use convenience conversion and parsing functions
+
+Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
+
+Use `url_or_none` for safe URL processing.
+
+Use `try_get` for safe metadata extraction from parsed JSON.
+
+Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
+
+Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
+
+#### More examples
+
+##### Safely extract optional description from parsed JSON
+```python
+description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
+```
+
+##### Safely extract more optional metadata
+```python
+video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
+description = video.get('summary')
+duration = float_or_none(video.get('durationMs'), scale=1000)
+view_count = int_or_none(video.get('views'))
+```
# EMBEDDING YOUTUBE-DL
-youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/rg3/youtube-dl/issues/new).
+youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
@@ -1185,7 +1326,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
```
-Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/rg3/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
+Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
@@ -1226,7 +1367,7 @@ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
# BUGS
-Bugs and suggestions should be reported at: <https://github.com/rg3/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
+Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
```
@@ -1272,11 +1413,11 @@ Before reporting any issue, type `youtube-dl -U`. This should report that you're
### Is the issue already documented?
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/rg3/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
+Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
### Why are existing options not enough?
-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
### Is there enough context in your bug report?
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
index 1344b4d87..4a4295ba9 100644
--- a/devscripts/buildserver.py
+++ b/devscripts/buildserver.py
@@ -322,7 +322,7 @@ class GITBuilder(GITInfoBuilder):
class YoutubeDLBuilder(object):
- authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
+ authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org']
def __init__(self, **kwargs):
if self.repoName != 'youtube-dl':
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
index 72b2ee422..740f04de0 100644
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -45,12 +45,12 @@ for test in gettestcases():
RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST)
- if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or
- test['info_dict']['age_limit'] != 18):
+ if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict']
+ or test['info_dict']['age_limit'] != 18):
print('\nPotential missing age_limit check: {0}'.format(test['name']))
- elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and
- test['info_dict']['age_limit'] == 18):
+ elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict']
+ and test['info_dict']['age_limit'] == 18):
print('\nPotential false negative: {0}'.format(test['name']))
else:
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
index 30716ad8e..2ddfa1096 100644
--- a/devscripts/create-github-release.py
+++ b/devscripts/create-github-release.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import base64
import io
import json
import mimetypes
@@ -15,7 +14,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_basestring,
- compat_input,
compat_getpass,
compat_print,
compat_urllib_request,
@@ -27,8 +25,8 @@ from youtube_dl.utils import (
class GitHubReleaser(object):
- _API_URL = 'https://api.github.com/repos/rg3/youtube-dl/releases'
- _UPLOADS_URL = 'https://uploads.github.com/repos/rg3/youtube-dl/releases/%s/assets?name=%s'
+ _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases'
+ _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s'
_NETRC_MACHINE = 'github.com'
def __init__(self, debuglevel=0):
@@ -40,28 +38,20 @@ class GitHubReleaser(object):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
- self._username = info[0]
- self._password = info[2]
+ self._token = info[2]
compat_print('Using GitHub credentials found in .netrc...')
return
else:
compat_print('No GitHub credentials found in .netrc')
except (IOError, netrc.NetrcParseError):
compat_print('Unable to parse .netrc')
- self._username = compat_input(
- 'Type your GitHub username or email address and press [Return]: ')
- self._password = compat_getpass(
- 'Type your GitHub password and press [Return]: ')
+ self._token = compat_getpass(
+ 'Type your GitHub PAT (personal access token) and press [Return]: ')
def _call(self, req):
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
- # Authorizing manually since GitHub does not response with 401 with
- # WWW-Authenticate header set (see
- # https://developer.github.com/v3/#basic-authentication)
- b64 = base64.b64encode(
- ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
- req.add_header('Authorization', 'Basic %s' % b64)
+ req.add_header('Authorization', 'token %s' % self._token)
response = self._opener.open(req).read().decode('utf-8')
return json.loads(response)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py
index e93eb60fb..506a62377 100755
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -10,7 +10,7 @@ import textwrap
atom_template = textwrap.dedent("""\
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
- <link rel="self" href="http://rg3.github.io/youtube-dl/update/releases.atom" />
+ <link rel="self" href="http://ytdl-org.github.io/youtube-dl/update/releases.atom" />
<title>youtube-dl releases</title>
<id>https://yt-dl.org/feed/youtube-dl-updates-feed</id>
<updated>@TIMESTAMP@</updated>
@@ -21,7 +21,7 @@ entry_template = textwrap.dedent("""
<entry>
<id>https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@</id>
<title>New version @VERSION@</title>
- <link href="http://rg3.github.io/youtube-dl" />
+ <link href="http://ytdl-org.github.io/youtube-dl" />
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 4db5def5d..f2411c927 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -78,8 +78,8 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
sed -i "s/<unreleased>/$version/" ChangeLog
/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
-make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites
-git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog
+make README.md CONTRIBUTING.md issuetemplates supportedsites
+git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md youtube_dl/version.py ChangeLog
git commit $gpg_sign_commits -m "release $version"
/bin/echo -e "\n### Now tagging, signing and pushing..."
@@ -96,7 +96,7 @@ git push origin "$version"
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
read -p "VM running? (y/n) " -n 1
-wget "http://$buildserver/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py
index e25d28411..6c8d1cc2d 100644
--- a/devscripts/show-downloads-statistics.py
+++ b/devscripts/show-downloads-statistics.py
@@ -24,7 +24,7 @@ total_bytes = 0
for page in itertools.count(1):
releases = json.loads(compat_urllib_request.urlopen(
- 'https://api.github.com/repos/rg3/youtube-dl/releases?page=%s' % page
+ 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page
).read().decode('utf-8'))
if not releases:
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 9b8601751..35c1050e5 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -26,14 +26,15 @@
- **AcademicEarth:Course**
- **acast**
- **acast:channel**
- - **AddAnime**
- **ADN**: Anime Digital Network
- - **AdobeTV**
- - **AdobeTVChannel**
- - **AdobeTVShow**
- - **AdobeTVVideo**
+ - **AdobeConnect**
+ - **adobetv**
+ - **adobetv:channel**
+ - **adobetv:embed**
+ - **adobetv:show**
+ - **adobetv:video**
- **AdultSwim**
- - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network
+ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
- **afreecatv**: afreecatv.com
- **AirMozilla**
- **AliExpressLive**
@@ -44,9 +45,8 @@
- **AmericasTestKitchen**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimeOnDemand**
- - **anitube.se**
- **Anvato**
- - **AnySex**
+ - **aol.com**
- **APA**
- **Aparat**
- **AppleConnect**
@@ -58,16 +58,8 @@
- **ARD:mediathek**
- **ARDBetaMediathek**
- **Arkena**
- - **arte.tv**
- **arte.tv:+7**
- - **arte.tv:cinema**
- - **arte.tv:concert**
- - **arte.tv:creative**
- - **arte.tv:ddc**
- **arte.tv:embed**
- - **arte.tv:future**
- - **arte.tv:info**
- - **arte.tv:magazine**
- **arte.tv:playlist**
- **AsianCrush**
- **AsianCrushPlaylist**
@@ -78,17 +70,12 @@
- **AudioBoom**
- **audiomack**
- **audiomack:album**
- - **auroravid**: AuroraVid
- **AWAAN**
- **awaan:live**
- **awaan:season**
- **awaan:video**
- **AZMedien**: AZ Medien videos
- - **AZMedienPlaylist**: AZ Medien playlists
- - **AZMedienShowPlaylist**: AZ Medien show playlists
- **BaiduVideo**: 百度视频
- - **bambuser**
- - **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
- **Bandcamp:weekly**
@@ -98,15 +85,20 @@
- **bbc.co.uk:article**: BBC articles
- **bbc.co.uk:iplayer:playlist**
- **bbc.co.uk:playlist**
+ - **BBVTV**
- **Beatport**
- **Beeg**
- **BehindKink**
- **Bellator**
- **BellMedia**
- **Bet**
+ - **bfi:player**
- **Bigflix**
- **Bild**: Bild.de
- **BiliBili**
+ - **BilibiliAudio**
+ - **BilibiliAudioAlbum**
+ - **BiliBiliPlayer**
- **BioBioChileTV**
- **BIQLE**
- **BitChute**
@@ -150,6 +142,7 @@
- **CBSInteractive**
- **CBSLocal**
- **cbsnews**: CBS News
+ - **cbsnews:embed**
- **cbsnews:livevideo**: CBS News Live Videos
- **CBSSports**
- **CCMA**
@@ -164,6 +157,9 @@
- **chirbit**
- **chirbit:profile**
- **Cinchcast**
+ - **Cinemax**
+ - **CiscoLiveSearch**
+ - **CiscoLiveSession**
- **CJSW**
- **cliphunter**
- **Clippit**
@@ -171,26 +167,25 @@
- **Clipsyndicate**
- **CloserToTruth**
- **CloudflareStream**
- - **cloudtime**: CloudTime
- **Cloudy**
- **Clubic**
- **Clyp**
- **cmt.com**
- **CNBC**
+ - **CNBCVideo**
- **CNN**
- **CNNArticle**
- **CNNBlogs**
- - **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralFullEpisodes**
- **ComedyCentralShortname**
- **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
+ - **CONtv**
- **Corus**
- **Coub**
- **Cracked**
- **Crackle**
- - **Criterion**
- **CrooksAndLiars**
- **crunchyroll**
- **crunchyroll:playlist**
@@ -198,6 +193,7 @@
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTVNews**
+ - **cu.ntv.co.jp**: Nippon Television Network
- **Culturebox**
- **CultureUnplugged**
- **curiositystream**
@@ -207,8 +203,6 @@
- **dailymotion**
- **dailymotion:playlist**
- **dailymotion:user**
- - **DaisukiMotto**
- - **DaisukiMottoPlaylist**
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**
@@ -228,13 +222,12 @@
- **DiscoveryNetworksDe**
- **DiscoveryVR**
- **Disney**
+ - **dlive:stream**
+ - **dlive:vod**
- **Dotsub**
- **DouyuShow**
- **DouyuTV**: 斗鱼
- **DPlay**
- - **DPlayIt**
- - **dramafever**
- - **dramafever:series**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
@@ -250,7 +243,9 @@
- **EchoMsk**
- **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson
+ - **ehftv**
- **eHow**
+ - **EinsUndEinsTV**
- **Einthusan**
- **eitb.tv**
- **EllenTube**
@@ -268,6 +263,7 @@
- **EsriVideo**
- **Europa**
- **EveryonesMixtape**
+ - **EWETV**
- **ExpoTV**
- **Expressen**
- **ExtremeTube**
@@ -284,12 +280,12 @@
- **FiveThirtyEight**
- **FiveTV**
- **Flickr**
- - **Flipagram**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom**
- **Formula1**
- **FOX**
- **FOX9**
+ - **FOX9News**
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
@@ -309,15 +305,12 @@
- **FrontendMastersCourse**
- **FrontendMastersLesson**
- **Funimation**
- - **FunkChannel**
- - **FunkMix**
- - **FunnyOrDie**
+ - **Funk**
- **Fusion**
- **Fux**
- **FXNetworks**
+ - **Gaia**
- **GameInformer**
- - **GameOne**
- - **gameone:playlist**
- **GameSpot**
- **GameStar**
- **Gaskrank**
@@ -327,20 +320,18 @@
- **Gfycat**
- **GiantBomb**
- **Giga**
+ - **GlattvisionTV**
- **Glide**: Glide mobile video messages (glide.me)
- **Globo**
- **GloboArticle**
- **Go**
- - **Go90**
- **GodTube**
- **Golem**
- **GoogleDrive**
- **Goshgay**
- **GPUTechConf**
- **Groupon**
- - **Hark**
- **hbo**
- - **hbo:episode**
- **HearThisAt**
- **Heise**
- **HellPorno**
@@ -354,9 +345,10 @@
- **hitbox**
- **hitbox:live**
- **HitRecord**
+ - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau
- **HornBunny**
- **HotNewHipHop**
- - **HotStar**
+ - **hotstar**
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
@@ -364,18 +356,21 @@
- **HRTiPlaylist**
- **Huajiao**: 花椒直播
- **HuffPost**: Huffington Post
+ - **Hungama**
+ - **HungamaSong**
- **Hypem**
- - **Iconosquare**
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
- **Imgur**
- - **ImgurAlbum**
+ - **imgur:album**
+ - **imgur:gallery**
- **Ina**
- **Inc**
- **IndavideoEmbed**
- **InfoQ**
- **Instagram**
+ - **instagram:tag**: Instagram hashtag search
- **instagram:user**: Instagram user profile
- **Internazionale**
- **InternetVideoArchive**
@@ -395,7 +390,6 @@
- **JeuxVideo**
- **Joj**
- **Jove**
- - **jpopsuki.tv**
- **JWPlatform**
- **Kakao**
- **Kaltura**
@@ -403,14 +397,14 @@
- **Kankan**
- **Karaoketv**
- **KarriereVideos**
- - **keek**
+ - **Katsomo**
- **KeezMovies**
- **Ketnet**
- **KhanAcademy**
- **KickStarter**
+ - **KinjaEmbed**
- **KinoPoisk**
- **KonserthusetPlay**
- - **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
- **Ku6**
- **KUSI**
@@ -427,8 +421,10 @@
- **Lcp**
- **LcpPlay**
- **Le**: 乐视网
- - **Learnr**
- **Lecture2Go**
+ - **Lecturio**
+ - **LecturioCourse**
+ - **LecturioDeCourse**
- **LEGO**
- **Lemonde**
- **Lenta**
@@ -441,7 +437,11 @@
- **limelight:channel**
- **limelight:channel_list**
- **LineTV**
+ - **linkedin:learning**
+ - **linkedin:learning:course**
+ - **LinuxAcademy**
- **LiTV**
+ - **LiveJournal**
- **LiveLeak**
- **LiveLeakEmbed**
- **livestream**
@@ -454,11 +454,10 @@
- **lynda**: lynda.com videos
- **lynda:course**: lynda.com online courses
- **m6**
- - **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru
- - **MakerTV**
+ - **MallTV**
- **mangomolo:live**
- **mangomolo:video**
- **ManyVids**
@@ -468,9 +467,12 @@
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **media.ccc.de**
+ - **media.ccc.de:lists**
- **Medialaan**
- **Mediaset**
- **Mediasite**
+ - **MediasiteCatalog**
+ - **MediasiteNamedCatalog**
- **Medici**
- **megaphone.fm**: megaphone.fm embedded players
- **Meipai**: 美拍
@@ -481,21 +483,21 @@
- **Mgoon**
- **MGTV**: 芒果TV
- **MiaoPai**
- - **Minhateca**
- **MinistryGrid**
- **Minoto**
- **miomio.tv**
- **MiTele**: mitele.es
- **mixcloud**
- **mixcloud:playlist**
- - **mixcloud:stream**
- **mixcloud:user**
- **Mixer:live**
- **Mixer:vod**
- **MLB**
- **Mnet**
+ - **MNetTV**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex**
+ - **MofosexEmbed**
- **Mojvideo**
- **Morningstar**: morningstar.com
- **Motherless**
@@ -509,11 +511,10 @@
- **mtg**: MTG services
- **mtv**
- **mtv.de**
- - **mtv81**
- **mtv:video**
+ - **mtvjapan**
- **mtvservices:embedded**
- **MuenchenTV**: münchen.tv
- - **MusicPlayOn**
- **mva**: Microsoft Virtual Academy videos
- **mva:course**: Microsoft Virtual Academy courses
- **Mwave**
@@ -525,10 +526,10 @@
- **Myvi**
- **MyVidster**
- **MyviEmbed**
+ - **MyVisionTV**
- **n-tv.de**
- - **natgeo**
- - **natgeo:episodeguide**
- **natgeo:video**
+ - **NationalGeographicTV**
- **Naver**
- **NBA**
- **NBC**
@@ -550,6 +551,7 @@
- **netease:program**: 网易云音乐 - 电台节目
- **netease:singer**: 网易云音乐 - 歌手
- **netease:song**: 网易云音乐
+ - **NetPlus**
- **Netzkino**
- **Newgrounds**
- **NewgroundsPlaylist**
@@ -559,7 +561,6 @@
- **NextTV**: 壹電視
- **Nexx**
- **NexxEmbed**
- - **nfb**: National Film Board of Canada
- **nfl.com**
- **NhkVod**
- **nhl.com**
@@ -585,7 +586,6 @@
- **nowness**
- **nowness:playlist**
- **nowness:series**
- - **nowvideo**: NowVideo
- **Noz**
- **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **npo.nl:live**
@@ -601,6 +601,7 @@
- **NRKTVEpisodes**
- **NRKTVSeason**
- **NRKTVSeries**
+ - **NRLTV**
- **ntv.ru**
- **Nuvid**
- **NYTimes**
@@ -610,7 +611,6 @@
- **OdaTV**
- **Odnoklassniki**
- **OktoberfestTV**
- - **on.aol.com**
- **OnDemandKorea**
- **onet.pl**
- **onet.tv**
@@ -619,16 +619,26 @@
- **OnionStudios**
- **Ooyala**
- **OoyalaExternal**
- - **Openload**
- **OraTV**
+ - **orf:burgenland**: Radio Burgenland
- **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at
+ - **orf:kaernten**: Radio Kärnten
+ - **orf:noe**: Radio Niederösterreich
+ - **orf:oberoesterreich**: Radio Oberösterreich
- **orf:oe1**: Radio Österreich 1
+ - **orf:oe3**: Radio Österreich 3
+ - **orf:salzburg**: Radio Salzburg
+ - **orf:steiermark**: Radio Steiermark
+ - **orf:tirol**: Radio Tirol
- **orf:tvthek**: ORF TVthek
+ - **orf:vorarlberg**: Radio Vorarlberg
+ - **orf:wien**: Radio Wien
+ - **OsnatelTV**
+ - **OutsideTV**
- **PacktPub**
- **PacktPubCourse**
- - **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV
- **ParamountNetwork**
- **parliamentlive.tv**: UK parliament videos
@@ -649,7 +659,10 @@
- **Piksel**
- **Pinkbike**
- **Pladform**
+ - **Platzi**
+ - **PlatziCourse**
- **play.fm**
+ - **PlayPlusTV**
- **PlaysTV**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
@@ -661,21 +674,20 @@
- **Pokemon**
- **PolskieRadio**
- **PolskieRadioCategory**
+ - **Popcorntimes**
- **PopcornTV**
- **PornCom**
- **PornerBros**
- - **PornFlip**
- **PornHd**
- **PornHub**: PornHub and Thumbzilla
- - **PornHubPlaylist**
- - **PornHubUserVideos**
+ - **PornHubPagedVideoList**
+ - **PornHubUser**
+ - **PornHubUserVideosUpload**
- **Pornotube**
- **PornoVoisines**
- **PornoXO**
- **PornTube**
- **PressTV**
- - **PrimeShareTV**
- - **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
- **puhutv:serie**
@@ -686,6 +698,7 @@
- **qqmusic:playlist**: QQ音乐 - 歌单
- **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜
+ - **QuantumTV**
- **Quickline**
- **QuicklineLive**
- **R7**
@@ -693,7 +706,7 @@
- **radio.de**
- **radiobremen**
- **radiocanada**
- - **RadioCanadaAudioVideo**
+ - **radiocanada:audiovideo**
- **radiofrance**
- **RadioJavan**
- **Rai**
@@ -705,6 +718,7 @@
- **RBMARadio**
- **RDS**: RDS.ca
- **RedBullTV**
+ - **RedBullTVRrnContent**
- **Reddit**
- **RedditR**
- **RedTube**
@@ -714,8 +728,6 @@
- **Restudy**
- **Reuters**
- **ReverbNation**
- - **revision**
- - **revision3:embed**
- **RICE**
- **RMCDecouverte**
- **RockstarGames**
@@ -738,9 +750,7 @@
- **rtve.es:television**
- **RTVNH**
- **RTVS**
- - **Rudo**
- **RUHD**
- - **RulePorn**
- **rutube**: Rutube videos
- **rutube:channel**: Rutube channels
- **rutube:embed**: Rutube embedded videos
@@ -753,6 +763,8 @@
- **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
+ - **SAKTV**
+ - **SaltTV**
- **Sapo**: SAPO Vídeos
- **savefrom.net**
- **SBS**: sbs.com.au
@@ -760,11 +772,13 @@
- **screen.yahoo:search**: Yahoo screen search
- **Screencast**
- **ScreencastOMatic**
+ - **ScrippsNetworks**
- **scrippsnetworks:watch**
+ - **SCTE**
+ - **SCTECourse**
- **Seeker**
- **SenateISVP**
- **SendtoNews**
- - **ServingSys**
- **Servus**
- **Sexu**
- **SeznamZpravy**
@@ -775,6 +789,7 @@
- **ShowRoomLive**
- **Sina**
- **SkylineWebcams**
+ - **SkyNews**
- **skynewsarabia:article**
- **skynewsarabia:video**
- **SkySports**
@@ -794,6 +809,7 @@
- **soundcloud:set**
- **soundcloud:trackstation**
- **soundcloud:user**
+ - **SoundcloudEmbed**
- **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com**
@@ -802,13 +818,14 @@
- **southpark.nl**
- **southparkstudios.dk**
- **SpankBang**
+ - **SpankBangPlaylist**
- **Spankwire**
- **Spiegel**
- **Spiegel:Article**: Articles on spiegel.de
- **Spiegeltv**
- **sport.francetvinfo.fr**
- **Sport5**
- - **SportBoxEmbed**
+ - **SportBox**
- **SportDeutschland**
- **SpringboardPlatform**
- **Sprout**
@@ -819,12 +836,14 @@
- **Steam**
- **Stitcher**
- **Streamable**
- - **Streamango**
- **streamcloud.eu**
- **StreamCZ**
- **StreetVoice**
- **StretchInternet**
+ - **stv:player**
- **SunPorno**
+ - **sverigesradio:episode**
+ - **sverigesradio:publication**
- **SVT**
- **SVTPage**
- **SVTPlay**: SVT Play and Öppet arkiv
@@ -839,10 +858,13 @@
- **TastyTrade**
- **TBS**
- **TDSLifeway**
+ - **Teachable**
+ - **TeachableCourse**
- **teachertube**: teachertube.com videos
- **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel**
- **Teamcoco**
+ - **TeamTreeHouse**
- **TechTalks**
- **techtv.mit.edu**
- **ted**
@@ -855,13 +877,14 @@
- **TeleQuebec**
- **TeleQuebecEmission**
- **TeleQuebecLive**
+ - **TeleQuebecSquat**
- **TeleTask**
- **Telewebion**
- **TennisTV**
+ - **TenPlay**
- **TF1**
- **TFO**
- **TheIntercept**
- - **theoperaplatform**
- **ThePlatform**
- **ThePlatformFeed**
- **TheScene**
@@ -871,6 +894,8 @@
- **ThisAmericanLife**
- **ThisAV**
- **ThisOldHouse**
+ - **TikTok**
+ - **TikTokUser**
- **tinypic**: tinypic.com videos
- **TMZ**
- **TMZArticle**
@@ -884,6 +909,7 @@
- **ToypicsUser**: Toypics user profile
- **TrailerAddict** (Currently broken)
- **Trilulilu**
+ - **TruNews**
- **TruTV**
- **Tube8**
- **TubiTv**
@@ -894,12 +920,12 @@
- **tunein:topic**
- **TunePk**
- **Turbo**
- - **Tutv**
- **tv.dfb.de**
- **TV2**
- **tv2.hu**
- **TV2Article**
- - **TV3**
+ - **TV2DK**
+ - **TV2DKBornholmPlay**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
@@ -913,7 +939,9 @@
- **TVNet**
- **TVNoe**
- **TVNow**
- - **TVNowList**
+ - **TVNowAnnual**
+ - **TVNowNew**
+ - **TVNowSeason**
- **TVNowShow**
- **tvp**: Telewizja Polska
- **tvp:embed**: Telewizja Polska
@@ -921,6 +949,7 @@
- **TVPlayer**
- **TVPlayHome**
- **Tweakers**
+ - **TwitCasting**
- **twitch:chapter**
- **twitch:clips**
- **twitch:profile**
@@ -933,10 +962,12 @@
- **twitch:vod**
- **twitter**
- **twitter:amplify**
+ - **twitter:broadcast**
- **twitter:card**
- **udemy**
- **udemy:course**
- **UDNEmbed**: 聯合影音
+ - **UFCArabia**
- **UFCTV**
- **UKTVPlay**
- **umg:de**: Universal Music Deutschland
@@ -945,8 +976,6 @@
- **uol.com.br**
- **uplynk**
- **uplynk:preplay**
- - **Upskill**
- - **UpskillCourse**
- **Urort**: NRK P3 Urørt
- **URPlay**
- **USANetwork**
@@ -959,12 +988,12 @@
- **Vbox7**
- **VeeHD**
- **Veoh**
- - **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- **VevoPlaylist**
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
+ - **vhx:embed**
- **Viafree**
- **vice**
- **vice:article**
@@ -973,16 +1002,12 @@
- **Viddler**
- **Videa**
- **video.google:search**: Google Video search
- - **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- - **VideoMega**
- **videomore**
- **videomore:season**
- **videomore:video**
- - **VideoPremium**
- **VideoPress**
- - **videoweed**: VideoWeed
- **Vidio**
- **VidLii**
- **vidme**
@@ -991,9 +1016,8 @@
- **Vidzi**
- **vier**: vier.be and vijf.be
- **vier:videos**
- - **ViewLift**
- - **ViewLiftEmbed**
- - **Viewster**
+ - **viewlift**
+ - **viewlift:embed**
- **Viidea**
- **viki**
- **viki:channel**
@@ -1027,20 +1051,22 @@
- **Voot**
- **VoxMedia**
- **VoxMediaVolume**
- - **Vporn**
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
+ - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza
- **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
+ - **VTXTV**
- **vube**: Vube.com
- **VuClip**
- **VVVVID**
- **VyboryMos**
- **Vzaar**
+ - **Wakanim**
- **Walla**
+ - **WalyTV**
- **washingtonpost**
- **washingtonpost:article**
- **wat.tv**
@@ -1057,20 +1083,18 @@
- **Weibo**
- **WeiboMobile**
- **WeiqiTV**: WQTV
- - **wholecloud**: WholeCloud
- - **Wimp**
- **Wistia**
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop**
- - **wrzuta.pl**
- - **wrzuta.pl:playlist**
- **WSJ**: Wall Street Journal
- **WSJArticle**
+ - **WWE**
- **XBef**
- **XboxClips**
- - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
+ - **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
- **XHamster**
- **XHamsterEmbed**
+ - **XHamsterUser**
- **xiami:album**: 虾米音乐 - 专辑
- **xiami:artist**: 虾米音乐 - 歌手
- **xiami:collection**: 虾米音乐 - 精选集
@@ -1086,10 +1110,14 @@
- **XVideos**
- **XXXYMovies**
- **Yahoo**: Yahoo screen and movies
+ - **yahoo:gyao**
+ - **yahoo:gyao:player**
+ - **yahoo:japannews**: Yahoo! Japan News
- **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек
+ - **YandexVideo**
- **YapFiles**
- **YesJapan**
- **yinyuetai:video**: 音悦Tai
@@ -1125,3 +1153,4 @@
- **ZDF**
- **ZDFChannel**
- **zingmp3**: mp3.zing.vn
+ - **Zype**
diff --git a/setup.cfg b/setup.cfg
index af9a554c6..da78a9c47 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,4 +3,4 @@ universal = True
[flake8]
exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
-ignore = E402,E501,E731,E741
+ignore = E402,E501,E731,E741,W503
diff --git a/setup.py b/setup.py
index 7dbb5805f..af68b485e 100644
--- a/setup.py
+++ b/setup.py
@@ -104,7 +104,7 @@ setup(
version=__version__,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
- url='https://github.com/rg3/youtube-dl',
+ url='https://github.com/ytdl-org/youtube-dl',
author='Ricardo Garcia',
author_email='ytdl@yt-dl.org',
maintainer='Sergey M.',
@@ -124,6 +124,8 @@ setup(
'Development Status :: 5 - Production/Stable',
'Environment :: Console',
'License :: Public Domain',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
@@ -132,6 +134,13 @@ setup(
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: Implementation',
+ 'Programming Language :: Python :: Implementation :: CPython',
+ 'Programming Language :: Python :: Implementation :: IronPython',
+ 'Programming Language :: Python :: Implementation :: Jython',
+ 'Programming Language :: Python :: Implementation :: PyPy',
],
cmdclass={'build_lazy_extractors': build_lazy_extractors},
diff --git a/test/helper.py b/test/helper.py
index dfee217a9..e62aab11e 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -7,6 +7,7 @@ import json
import os.path
import re
import types
+import ssl
import sys
import youtube_dl.extractor
@@ -152,15 +153,27 @@ def expect_value(self, got, expected, field):
isinstance(got, compat_str),
'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got)))
got = 'md5:' + md5(got)
- elif isinstance(expected, compat_str) and expected.startswith('mincount:'):
+ elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
self.assertTrue(
isinstance(got, (list, dict)),
'Expected field %s to be a list or a dict, but it is of type %s' % (
field, type(got).__name__))
- expected_num = int(expected.partition(':')[2])
- assertGreaterEqual(
+ op, _, expected_num = expected.partition(':')
+ expected_num = int(expected_num)
+ if op == 'mincount':
+ assert_func = assertGreaterEqual
+ msg_tmpl = 'Expected %d items in field %s, but only got %d'
+ elif op == 'maxcount':
+ assert_func = assertLessEqual
+ msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
+ elif op == 'count':
+ assert_func = assertEqual
+ msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
+ else:
+ assert False
+ assert_func(
self, len(got), expected_num,
- 'Expected %d items in field %s, but only got %d' % (expected_num, field, len(got)))
+ msg_tmpl % (expected_num, field, len(got)))
return
self.assertEqual(
expected, got,
@@ -236,6 +249,20 @@ def assertGreaterEqual(self, got, expected, msg=None):
self.assertTrue(got >= expected, msg)
+def assertLessEqual(self, got, expected, msg=None):
+ if not (got <= expected):
+ if msg is None:
+ msg = '%r not less than or equal to %r' % (got, expected)
+ self.assertTrue(got <= expected, msg)
+
+
+def assertEqual(self, got, expected, msg=None):
+ if not (got == expected):
+ if msg is None:
+ msg = '%r not equal to %r' % (got, expected)
+ self.assertTrue(got == expected, msg)
+
+
def expect_warnings(ydl, warnings_re):
real_warning = ydl.report_warning
@@ -244,3 +271,12 @@ def expect_warnings(ydl, warnings_re):
real_warning(w)
ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4833396a5..71f6608fe 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -9,11 +9,30 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value
-from youtube_dl.compat import compat_etree_fromstring
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from youtube_dl.compat import compat_etree_fromstring, compat_http_server
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/teapot':
+ self.send_response(TEAPOT_RESPONSE_STATUS)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ else:
+ assert False
class TestIE(InfoExtractor):
@@ -42,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase):
<meta content='Foo' property=og:foobar>
<meta name="og:test1" content='foo > < bar'/>
<meta name="og:test2" content="foo >//< bar"/>
+ <meta property=og-test3 content='Ill-formatted opengraph'/>
'''
self.assertEqual(ie._og_search_title(html), 'Foo')
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
@@ -50,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
+ self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph')
self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
@@ -86,6 +107,184 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
+ def test_parse_html5_media_entries(self):
+ # from https://www.r18.com/
+ # with kpbs in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.r18.com/',
+ r'''
+ <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps">
+ <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps">
+ <p>Your browser does not support the video tag.</p>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '300kbps',
+ 'height': 240,
+ 'tbr': 300,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1000kbps',
+ 'height': 480,
+ 'tbr': 1000,
+ }, {
+ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4',
+ 'ext': 'mp4',
+ 'format_id': '1500kbps',
+ 'height': 740,
+ 'tbr': 1500,
+ }],
+ 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg'
+ })
+
+ # from https://www.csfd.cz/
+ # with width and height
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.csfd.cz/',
+ r'''
+ <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" >
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720">
+ <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080">
+ <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4',
+ 'ext': 'mp4',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4',
+ 'ext': 'mp4',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4',
+ 'ext': 'mp4',
+ 'width': 1920,
+ 'height': 1080,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm',
+ 'ext': 'webm',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm',
+ 'ext': 'webm',
+ 'width': 1280,
+ 'height': 720,
+ }, {
+ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm',
+ 'ext': 'webm',
+ 'width': 1920,
+ 'height': 1080,
+ }],
+ 'subtitles': {
+ 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}]
+ },
+ 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360'
+ })
+
+ # from https://tamasha.com/v/Kkdjw
+ # with height in label
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://tamasha.com/v/Kkdjw',
+ r'''
+ <video crossorigin="anonymous">
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4"
+ label="240p" res="240"/>
+ <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4"
+ label="144p" res="144"/>
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4',
+ 'ext': 'mp4',
+ 'format_id': '240p',
+ 'height': 240,
+ }, {
+ 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4',
+ 'ext': 'mp4',
+ 'format_id': '144p',
+ 'height': 144,
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ }]
+ })
+
+ # from https://www.directvnow.com
+ # with data-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video id="vid1" class="header--video-masked active" muted playsinline>
+ <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4',
+ 'ext': 'mp4',
+ }]
+ })
+
+ # from https://www.klarna.com/uk/
+ # with data-video-src
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://www.directvnow.com',
+ r'''
+ <video loop autoplay muted class="responsive-video block-kl__video video-on-medium">
+ <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" />
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
@@ -180,7 +379,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
def test_parse_m3u8_formats(self):
_TEST_CASES = [
(
- # https://github.com/rg3/youtube-dl/issues/11507
+ # https://github.com/ytdl-org/youtube-dl/issues/11507
# http://pluzz.francetv.fr/videos/le_ministere.html
'pluzz_francetv_11507',
'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais',
@@ -242,7 +441,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
}]
),
(
- # https://github.com/rg3/youtube-dl/issues/11995
+ # https://github.com/ytdl-org/youtube-dl/issues/11995
# http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor
'teamcoco_11995',
'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8',
@@ -316,7 +515,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
}]
),
(
- # https://github.com/rg3/youtube-dl/issues/12211
+ # https://github.com/ytdl-org/youtube-dl/issues/12211
# http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601
'toggle_mobile_12211',
'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8',
@@ -478,7 +677,64 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 1280,
'height': 720,
}]
- )
+ ),
+ (
+ # https://github.com/ytdl-org/youtube-dl/issues/18923
+ # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa
+ 'ted_18923',
+ 'http://hls.ted.com/talks/31241.m3u8',
+ [{
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '600k-Audio',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '68',
+ 'vcodec': 'none',
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '163',
+ 'acodec': 'none',
+ 'width': 320,
+ 'height': 180,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '481',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '769',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '984',
+ 'acodec': 'none',
+ 'width': 512,
+ 'height': 288,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '1255',
+ 'acodec': 'none',
+ 'width': 640,
+ 'height': 360,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '1693',
+ 'acodec': 'none',
+ 'width': 853,
+ 'height': 480,
+ }, {
+ 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b',
+ 'format_id': '2462',
+ 'acodec': 'none',
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
]
for m3u8_file, m3u8_url, expected_formats in _TEST_CASES:
@@ -492,11 +748,12 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
def test_parse_mpd_formats(self):
_TEST_CASES = [
(
- # https://github.com/rg3/youtube-dl/issues/13919
+ # https://github.com/ytdl-org/youtube-dl/issues/13919
# Also tests duplicate representation ids, see
- # https://github.com/rg3/youtube-dl/issues/15111
+ # https://github.com/ytdl-org/youtube-dl/issues/15111
'float_duration',
- 'http://unknown/manifest.mpd',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'm4a',
@@ -574,9 +831,10 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'height': 1080,
}]
), (
- # https://github.com/rg3/youtube-dl/pull/14844
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
'urls_only',
- 'http://unknown/manifest.mpd',
+ 'http://unknown/manifest.mpd', # mpd_url
+ None, # mpd_base_url
[{
'manifest_url': 'http://unknown/manifest.mpd',
'ext': 'mp4',
@@ -655,22 +913,68 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
'width': 1920,
'height': 1080,
}]
+ ), (
+ # https://github.com/ytdl-org/youtube-dl/issues/20346
+ # Media considered unfragmented even though it contains
+ # Initialization tag
+ 'unfragmented',
+ 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url
+ 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url
+ [{
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'm4a',
+ 'format_id': 'AUDIO-1',
+ 'format_note': 'DASH audio',
+ 'container': 'm4a_dash',
+ 'acodec': 'mp4a.40.2',
+ 'vcodec': 'none',
+ 'tbr': 129.87,
+ 'asr': 48000,
+
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-2',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 608.0,
+ 'width': 240,
+ 'height': 240,
+ 'fps': 30,
+ }, {
+ 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360',
+ 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd',
+ 'ext': 'mp4',
+ 'format_id': 'VIDEO-1',
+ 'format_note': 'DASH video',
+ 'container': 'mp4_dash',
+ 'acodec': 'none',
+ 'vcodec': 'avc1.4d401e',
+ 'tbr': 804.261,
+ 'width': 360,
+ 'height': 360,
+ 'fps': 30,
+ }]
)
]
- for mpd_file, mpd_url, expected_formats in _TEST_CASES:
+ for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES:
with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
mode='r', encoding='utf-8') as f:
formats = self.ie._parse_mpd_formats(
compat_etree_fromstring(f.read().encode('utf-8')),
- mpd_url=mpd_url)
+ mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
def test_parse_f4m_formats(self):
_TEST_CASES = [
(
- # https://github.com/rg3/youtube-dl/issues/14660
+ # https://github.com/ytdl-org/youtube-dl/issues/14660
'custom_base_url',
'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
[{
@@ -743,6 +1047,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
+ def test_response_with_expected_status_returns_content(self):
+ # Checks for mitigations against the effects of
+ # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
+ # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+ # or the underlying `_download_webpage_handle` returning no content
+ # when a response matches `expected_status`.
+
+ httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ port = http_server_port(httpd)
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ (content, urlh) = self.ie._download_webpage_handle(
+ 'http://127.0.0.1:%d/teapot' % port, None,
+ expected_status=TEAPOT_RESPONSE_STATUS)
+ self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index f0f5a8470..1e204e551 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -239,6 +239,76 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
+ def test_format_selection_string_ops(self):
+ formats = [
+ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
+ {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL},
+ ]
+ info_dict = _make_result(formats)
+
+ # equals (=)
+ ydl = YDL({'format': '[format_id=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not equal (!=)
+ ydl = YDL({'format': '[format_id!=abc-cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # starts with (^=)
+ ydl = YDL({'format': '[format_id^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not start with (!^=)
+ ydl = YDL({'format': '[format_id!^=abc]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # ends with ($=)
+ ydl = YDL({'format': '[format_id$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not end with (!$=)
+ ydl = YDL({'format': '[format_id!$=cba]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ # contains (*=)
+ ydl = YDL({'format': '[format_id*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'abc-cba')
+
+ # does not contain (!*=)
+ ydl = YDL({'format': '[format_id!*=bc-cb]'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'zxc-cxz')
+
+ ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
+ ydl = YDL({'format': '[format_id!*=-]'})
+ self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
+
def test_youtube_format_selection(self):
order = [
'38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
@@ -341,7 +411,7 @@ class TestFormatSelection(unittest.TestCase):
# For extractors with incomplete formats (all formats are audio-only or
# video-only) best and worst should fallback to corresponding best/worst
# video-only or audio-only formats (as per
- # https://github.com/rg3/youtube-dl/pull/5556)
+ # https://github.com/ytdl-org/youtube-dl/pull/5556)
formats = [
{'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
{'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL},
@@ -372,7 +442,7 @@ class TestFormatSelection(unittest.TestCase):
self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
def test_format_selection_issue_10083(self):
- # See https://github.com/rg3/youtube-dl/issues/10083
+ # See https://github.com/ytdl-org/youtube-dl/issues/10083
formats = [
{'format_id': 'regular', 'height': 360, 'url': TEST_URL},
{'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL},
@@ -746,11 +816,15 @@ class TestYoutubeDL(unittest.TestCase):
'webpage_url': 'http://example.com',
}
- def get_ids(params):
+ def get_downloaded_info_dicts(params):
ydl = YDL(params)
- # make a copy because the dictionary can be modified
- ydl.process_ie_result(playlist.copy())
- return [int(v['id']) for v in ydl.downloaded_info_dicts]
+ # make a deep copy because the dictionary and nested entries
+ # can be modified
+ ydl.process_ie_result(copy.deepcopy(playlist))
+ return ydl.downloaded_info_dicts
+
+ def get_ids(params):
+ return [int(v['id']) for v in get_downloaded_info_dicts(params)]
result = get_ids({})
self.assertEqual(result, [1, 2, 3, 4])
@@ -782,8 +856,24 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '2-4,3-4,3'})
self.assertEqual(result, [2, 3, 4])
+ # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
+ # @{
+ result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result[0]['playlist_index'], 2)
+ self.assertEqual(result[1]['playlist_index'], 3)
+
+ result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result[0]['playlist_index'], 2)
+ self.assertEqual(result[1]['playlist_index'], 3)
+ self.assertEqual(result[2]['playlist_index'], 4)
+
+ result = get_downloaded_info_dicts({'playlist_items': '4,2'})
+ self.assertEqual(result[0]['playlist_index'], 4)
+ self.assertEqual(result[1]['playlist_index'], 2)
+ # @}
+
def test_urlopen_no_file_protocol(self):
- # see https://github.com/rg3/youtube-dl/issues/8227
+ # see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()
self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
new file mode 100644
index 000000000..05f48bd74
--- /dev/null
+++ b/test/test_YoutubeDLCookieJar.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import os
+import re
+import sys
+import tempfile
+import unittest
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.utils import YoutubeDLCookieJar
+
+
+class TestYoutubeDLCookieJar(unittest.TestCase):
+ def test_keep_session_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ tf = tempfile.NamedTemporaryFile(delete=False)
+ try:
+ cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True)
+ temp = tf.read().decode('utf-8')
+ self.assertTrue(re.search(
+ r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
+ self.assertTrue(re.search(
+ r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp))
+ finally:
+ tf.close()
+ os.remove(tf.name)
+
+ def test_strip_httponly_prefix(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+
+ def assert_cookie_has_value(key):
+ self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
+
+ assert_cookie_has_value('HTTPONLY_COOKIE')
+ assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
+
+ def test_malformed_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ # Cookies should be empty since all malformed cookie file entries
+ # will be ignored
+ self.assertFalse(cookiejar._cookies)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
index 78a28751b..cc89fb6ab 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -44,16 +44,16 @@ class TestAES(unittest.TestCase):
def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
- intlist_to_bytes(self.iv[:8]) +
- b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
+ intlist_to_bytes(self.iv[:8])
+ + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae'
).decode('utf-8')
decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode('utf-8')
encrypted = base64.b64encode(
- intlist_to_bytes(self.iv[:8]) +
- b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
+ intlist_to_bytes(self.iv[:8])
+ + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83'
).decode('utf-8')
decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index cd1cd4b24..81056a999 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -110,7 +110,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user'])
self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review'])
- # https://github.com/rg3/youtube-dl/issues/1930
+ # https://github.com/ytdl-org/youtube-dl/issues/1930
def test_soundcloud_not_matching_sets(self):
self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set'])
@@ -119,16 +119,10 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr'])
def test_pbs(self):
- # https://github.com/rg3/youtube-dl/issues/2350
+ # https://github.com/ytdl-org/youtube-dl/issues/2350
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
- def test_yahoo_https(self):
- # https://github.com/rg3/youtube-dl/issues/2701
- self.assertMatch(
- 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
- ['Yahoo'])
-
def test_no_duplicated_ie_names(self):
name_accu = collections.defaultdict(list)
for ie in self.ies:
diff --git a/test/test_compat.py b/test/test_compat.py
index d6c54e135..86ff389fd 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from youtube_dl.compat import (
compat_getenv,
compat_setenv,
+ compat_etree_Element,
compat_etree_fromstring,
compat_expanduser,
compat_shlex_split,
@@ -39,7 +40,7 @@ class TestCompat(unittest.TestCase):
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
- test_str = 'C:\Documents and Settings\тест\Application Data'
+ test_str = r'C:\Documents and Settings\тест\Application Data'
compat_setenv('HOME', test_str)
self.assertEqual(compat_expanduser('~'), test_str)
compat_setenv('HOME', old_home or '')
@@ -90,6 +91,12 @@ class TestCompat(unittest.TestCase):
self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag'])
self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文'])
+ def test_compat_etree_Element(self):
+ try:
+ compat_etree_Element.items
+ except AttributeError:
+ self.fail('compat_etree_Element is not a type')
+
def test_compat_etree_fromstring(self):
xml = '''
<root foo="bar" spam="中文">
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 5cf2bf1a5..750472281 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,26 +9,16 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import try_rm
+from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
-import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
TEST_SIZE = 10 * 1024
diff --git a/test/test_http.py b/test/test_http.py
index 409fec9c8..3ee0a5dda 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,6 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from test.helper import http_server_port
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
@@ -16,15 +17,6 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index addb69d6f..4209d1d9a 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
- self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
+ self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 7d57a628e..17aaaf20d 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -26,7 +26,6 @@ from youtube_dl.extractor import (
ThePlatformIE,
ThePlatformFeedIE,
RTVEALaCartaIE,
- FunnyOrDieIE,
DemocracynowIE,
)
@@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
-class TestFunnyOrDieSubtitles(BaseTestSubtitles):
- url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
- IE = FunnyOrDieIE
-
- def test_allsubtitles(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['allsubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
-
-
class TestDemocracynowSubtitles(BaseTestSubtitles):
url = 'http://www.democracynow.org/shows/2015/7/3'
IE = DemocracynowIE
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
index f1e899819..9f18055e6 100644
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -34,8 +34,8 @@ def _make_testfunc(testfile):
def test_func(self):
as_file = os.path.join(TEST_DIR, testfile)
swf_file = os.path.join(TEST_DIR, test_id + '.swf')
- if ((not os.path.exists(swf_file)) or
- os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
+ if ((not os.path.exists(swf_file))
+ or os.path.getmtime(swf_file) < os.path.getmtime(as_file)):
# Recompile
try:
subprocess.check_call([
diff --git a/test/test_utils.py b/test/test_utils.py
index 9e28e008f..0896f4150 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -19,6 +19,7 @@ from youtube_dl.utils import (
age_restricted,
args_to_str,
encode_base_n,
+ caesar,
clean_html,
date_from_str,
DateRange,
@@ -33,11 +34,13 @@ from youtube_dl.utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
+ float_or_none,
get_element_by_class,
get_element_by_attribute,
get_elements_by_class,
get_elements_by_attribute,
InAdvancePagedList,
+ int_or_none,
intlist_to_bytes,
is_html,
js_to_json,
@@ -55,6 +58,7 @@ from youtube_dl.utils import (
parse_count,
parse_iso8601,
parse_resolution,
+ parse_bitrate,
pkcs1pad,
read_batch_urls,
sanitize_filename,
@@ -66,10 +70,13 @@ from youtube_dl.utils import (
remove_start,
remove_end,
remove_quotes,
+ rot47,
shell_quote,
smuggle_url,
str_to_int,
strip_jsonp,
+ strip_or_none,
+ subtitles_filename,
timeconvert,
unescapeHTML,
unified_strdate,
@@ -180,7 +187,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
- 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+ 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
@@ -257,6 +264,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+ def test_subtitles_filename(self):
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
+
def test_remove_start(self):
self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B')
@@ -330,6 +342,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
+ self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
+ self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -467,9 +481,30 @@ class TestUtil(unittest.TestCase):
shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
+ def test_float_or_none(self):
+ self.assertEqual(float_or_none('42.42'), 42.42)
+ self.assertEqual(float_or_none('42'), 42.0)
+ self.assertEqual(float_or_none(''), None)
+ self.assertEqual(float_or_none(None), None)
+ self.assertEqual(float_or_none([]), None)
+ self.assertEqual(float_or_none(set()), None)
+
+ def test_int_or_none(self):
+ self.assertEqual(int_or_none('42'), 42)
+ self.assertEqual(int_or_none(''), None)
+ self.assertEqual(int_or_none(None), None)
+ self.assertEqual(int_or_none([]), None)
+ self.assertEqual(int_or_none(set()), None)
+
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
+ self.assertEqual(str_to_int(523), 523)
+ # Python 3 has no long
+ if sys.version_info < (3, 0):
+ eval('self.assertEqual(str_to_int(123456L), 123456)')
+ self.assertEqual(str_to_int('noninteger'), None)
+ self.assertEqual(str_to_int([]), None)
def test_url_basename(self):
self.assertEqual(url_basename('http://foo.de/'), '')
@@ -507,6 +542,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(urljoin('http://foo.de/', ''), None)
self.assertEqual(urljoin('http://foo.de/', ['foobar']), None)
self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt')
+ self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de')
+ self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de')
def test_url_or_none(self):
self.assertEqual(url_or_none(None), None)
@@ -732,6 +769,18 @@ class TestUtil(unittest.TestCase):
d = json.loads(stripped)
self.assertEqual(d, {'status': 'success'})
+ def test_strip_or_none(self):
+ self.assertEqual(strip_or_none(' abc'), 'abc')
+ self.assertEqual(strip_or_none('abc '), 'abc')
+ self.assertEqual(strip_or_none(' abc '), 'abc')
+ self.assertEqual(strip_or_none('\tabc\t'), 'abc')
+ self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc')
+ self.assertEqual(strip_or_none('abc'), 'abc')
+ self.assertEqual(strip_or_none(''), '')
+ self.assertEqual(strip_or_none(None), None)
+ self.assertEqual(strip_or_none(42), None)
+ self.assertEqual(strip_or_none([]), None)
+
def test_uppercase_escape(self):
self.assertEqual(uppercase_escape('aä'), 'aä')
self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐')
@@ -789,6 +838,15 @@ class TestUtil(unittest.TestCase):
'vcodec': 'av01.0.05M.08',
'acodec': 'none',
})
+ self.assertEqual(parse_codecs('theora, vorbis'), {
+ 'vcodec': 'theora',
+ 'acodec': 'vorbis',
+ })
+ self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), {
+ 'vcodec': 'unknownvcodec',
+ 'acodec': 'unknownacodec',
+ })
+ self.assertEqual(parse_codecs('unknown'), {})
def test_escape_rfc3986(self):
reserved = "!*'();:@&=+$,/?#[]"
@@ -1028,6 +1086,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_resolution('4k'), {'height': 2160})
self.assertEqual(parse_resolution('8K'), {'height': 4320})
+ def test_parse_bitrate(self):
+ self.assertEqual(parse_bitrate(None), None)
+ self.assertEqual(parse_bitrate(''), None)
+ self.assertEqual(parse_bitrate('300kbps'), 300)
+ self.assertEqual(parse_bitrate('1500kbps'), 1500)
+ self.assertEqual(parse_bitrate('300 kbps'), 300)
+
def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))
@@ -1312,6 +1377,20 @@ Line 1
self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
+ def test_caesar(self):
+ self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
+ self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
+ self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
+ self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
+ self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
+ self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
+
+ def test_rot47(self):
+ self.assertEqual(rot47('youtube-dl'), r'J@FEF36\5=')
+ self.assertEqual(rot47('YOUTUBE-DL'), r'*~&%&qt\s{')
+
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646)
diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py
index 324ca8525..e69c57377 100644
--- a/test/test_youtube_chapters.py
+++ b/test/test_youtube_chapters.py
@@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase):
for description, duration, expected_chapters in self._TEST_CASES:
ie = YoutubeIE()
expect_value(
- self, ie._extract_chapters(description, duration),
+ self, ie._extract_chapters_from_description(description, duration),
expected_chapters, None)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index f0c370eee..69df30eda 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -74,6 +74,28 @@ _TESTS = [
]
+class TestPlayerInfo(unittest.TestCase):
+ def test_youtube_extract_player_info(self):
+ PLAYER_URLS = (
+ ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
+ # obsolete
+ ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
+ ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
+ ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
+ ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
+ ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
+ ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
+ ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
+ ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
+ ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
+ )
+ for player_url, expected_player_id in PLAYER_URLS:
+ expected_player_type = player_url.split('.')[-1]
+ player_type, player_id = YoutubeIE._extract_player_info(player_url)
+ self.assertEqual(player_type, expected_player_type)
+ self.assertEqual(player_id, expected_player_id)
+
+
class TestSignature(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/test/testdata/cookies/httponly_cookies.txt b/test/testdata/cookies/httponly_cookies.txt
new file mode 100644
index 000000000..c46541d6b
--- /dev/null
+++ b/test/testdata/cookies/httponly_cookies.txt
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE
+www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE
diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt
new file mode 100644
index 000000000..17bc40354
--- /dev/null
+++ b/test/testdata/cookies/malformed_cookies.txt
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar FALSE / FALSE 0 COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
diff --git a/test/testdata/cookies/session_cookies.txt b/test/testdata/cookies/session_cookies.txt
new file mode 100644
index 000000000..f6996f031
--- /dev/null
+++ b/test/testdata/cookies/session_cookies.txt
@@ -0,0 +1,6 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue
+www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value
diff --git a/test/testdata/m3u8/ted_18923.m3u8 b/test/testdata/m3u8/ted_18923.m3u8
new file mode 100644
index 000000000..52a27118b
--- /dev/null
+++ b/test/testdata/m3u8/ted_18923.m3u8
@@ -0,0 +1,28 @@
+#EXTM3U
+#EXT-X-VERSION:4
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1255659,PROGRAM-ID=1,CODECS="avc1.42c01e,mp4a.40.2",RESOLUTION=640x360
+/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=163154,PROGRAM-ID=1,CODECS="avc1.42c00c,mp4a.40.2",RESOLUTION=320x180
+/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=481701,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=769968,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=984037,PROGRAM-ID=1,CODECS="avc1.42c015,mp4a.40.2",RESOLUTION=512x288
+/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=1693925,PROGRAM-ID=1,CODECS="avc1.4d401f,mp4a.40.2",RESOLUTION=853x480
+/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=2462469,PROGRAM-ID=1,CODECS="avc1.640028,mp4a.40.2",RESOLUTION=1280x720
+/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b
+#EXT-X-STREAM-INF:AUDIO="600k",BANDWIDTH=68101,PROGRAM-ID=1,CODECS="mp4a.40.2",DEFAULT=YES
+/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b
+
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=74298,PROGRAM-ID=1,CODECS="avc1.42c00c",RESOLUTION=320x180,URI="/videos/BorisHesser_2018S/video/64k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=216200,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/180k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=304717,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/320k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=350933,PROGRAM-ID=1,CODECS="avc1.42c015",RESOLUTION=512x288,URI="/videos/BorisHesser_2018S/video/450k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=495850,PROGRAM-ID=1,CODECS="avc1.42c01e",RESOLUTION=640x360,URI="/videos/BorisHesser_2018S/video/600k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=810750,PROGRAM-ID=1,CODECS="avc1.4d401f",RESOLUTION=853x480,URI="/videos/BorisHesser_2018S/video/950k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=1273700,PROGRAM-ID=1,CODECS="avc1.640028",RESOLUTION=1280x720,URI="/videos/BorisHesser_2018S/video/1500k_iframe.m3u8?nobumpers=true&uniqueId=76011e2b"
+
+#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="600k",LANGUAGE="en",NAME="Audio",AUTOSELECT=YES,DEFAULT=YES,URI="/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b",BANDWIDTH=614400
diff --git a/test/testdata/mpd/unfragmented.mpd b/test/testdata/mpd/unfragmented.mpd
new file mode 100644
index 000000000..5a3720be7
--- /dev/null
+++ b/test/testdata/mpd/unfragmented.mpd
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011">
+ <Period duration="PT54.915S">
+ <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1">
+ <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360">
+ <BaseURL>DASH_360</BaseURL>
+ <SegmentBase indexRange="915-1114" indexRangeExact="true">
+ <Initialization range="0-914"/>
+ </SegmentBase>
+ </Representation>
+ <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240">
+ <BaseURL>DASH_240</BaseURL>
+ <SegmentBase indexRange="913-1112" indexRangeExact="true">
+ <Initialization range="0-912"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ <AdaptationSet>
+ <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1">
+ <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
+ <BaseURL>audio</BaseURL>
+ <SegmentBase indexRange="832-1007" indexRangeExact="true">
+ <Initialization range="0-831"/>
+ </SegmentBase>
+ </Representation>
+ </AdaptationSet>
+ </Period>
+</MPD>
diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh
index 4edab5214..17ab1341a 100644
--- a/youtube-dl.plugin.zsh
+++ b/youtube-dl.plugin.zsh
@@ -7,7 +7,7 @@
# https://github.com/zsh-users/antigen
# Install youtube-dl:
-# antigen bundle rg3/youtube-dl
+# antigen bundle ytdl-org/youtube-dl
# Bundles installed by antigen are available for use immediately.
# Update youtube-dl (and all other antigen bundles):
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 38ba43a97..19370f62b 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -82,14 +82,17 @@ from .utils import (
sanitize_url,
sanitized_Request,
std_headers,
+ str_or_none,
subtitles_filename,
UnavailableVideoError,
url_basename,
version_tuple,
write_json_file,
write_string,
+ YoutubeDLCookieJar,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -307,6 +310,8 @@ class YoutubeDL(object):
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
otherwise prefer ffmpeg.
+ ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
+ to the binary or its containing directory.
postprocessor_args: A list of additional command-line arguments for the
postprocessor.
@@ -396,9 +401,9 @@ class YoutubeDL(object):
else:
raise
- if (sys.platform != 'win32' and
- sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
- not params.get('restrictfilenames', False)):
+ if (sys.platform != 'win32'
+ and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not params.get('restrictfilenames', False)):
# Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
@@ -436,9 +441,9 @@ class YoutubeDL(object):
if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
if idxs:
correct_argv = (
- ['youtube-dl'] +
- [a for i, a in enumerate(argv) if i not in idxs] +
- ['--'] + [argv[i] for i in idxs]
+ ['youtube-dl']
+ + [a for i, a in enumerate(argv) if i not in idxs]
+ + ['--'] + [argv[i] for i in idxs]
)
self.report_warning(
'Long argument string detected. '
@@ -558,7 +563,7 @@ class YoutubeDL(object):
self.restore_console_title()
if self.params.get('cookiefile') is not None:
- self.cookiejar.save()
+ self.cookiejar.save(ignore_discard=True, ignore_expires=True)
def trouble(self, message=None, tb=None):
"""Determine action to take when a download problem appears.
@@ -846,10 +851,11 @@ class YoutubeDL(object):
if result_type in ('url', 'url_transparent'):
ie_result['url'] = sanitize_url(ie_result['url'])
extract_flat = self.params.get('extract_flat', False)
- if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
- extract_flat is True):
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(ie_result))
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
+ or extract_flat is True):
+ self.__forced_printings(
+ ie_result, self.prepare_filename(ie_result),
+ incomplete=True)
return ie_result
if result_type == 'video':
@@ -887,7 +893,7 @@ class YoutubeDL(object):
# url_transparent. In such cases outer metadata (from ie_result)
# should be propagated to inner one (info). For this to happen
# _type of info should be overridden with url_transparent. This
- # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
+ # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
if new_result.get('_type') == 'url':
new_result['_type'] = 'url_transparent'
@@ -985,7 +991,7 @@ class YoutubeDL(object):
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': i + playliststart,
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
@@ -1062,21 +1068,24 @@ class YoutubeDL(object):
if not m:
STR_OPERATORS = {
'=': operator.eq,
- '!=': operator.ne,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
m = str_operator_rex.search(filter_spec)
if m:
comparison_value = m.group('value')
- op = STR_OPERATORS[m.group('op')]
+ str_op = STR_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not str_op(attr, value)
+ else:
+ op = str_op
if not m:
raise ValueError('Invalid filter specification %r' % filter_spec)
@@ -1601,7 +1610,7 @@ class YoutubeDL(object):
# by extractor are incomplete or not (i.e. whether extractor provides only
# video-only or audio-only formats) for proper formats selection for
# extractors with such incomplete formats (see
- # https://github.com/rg3/youtube-dl/pull/5556).
+ # https://github.com/ytdl-org/youtube-dl/pull/5556).
# Since formats may be filtered during format selection and may not match
# the original formats the results may be incorrect. Thus original formats
# or pre-calculated metrics should be passed to format selection routines
@@ -1609,12 +1618,12 @@ class YoutubeDL(object):
# We will pass a context object containing all necessary additional data
# instead of just formats.
# This fixes incorrect format selection issue (see
- # https://github.com/rg3/youtube-dl/issues/10083).
+ # https://github.com/ytdl-org/youtube-dl/issues/10083).
incomplete_formats = (
# All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
# all formats are audio-only
- all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
ctx = {
'formats': formats,
@@ -1686,6 +1695,36 @@ class YoutubeDL(object):
subs[lang] = f
return subs
+ def __forced_printings(self, info_dict, filename, incomplete):
+ def print_mandatory(field):
+ if (self.params.get('force%s' % field, False)
+ and (not incomplete or info_dict.get(field) is not None)):
+ self.to_stdout(info_dict[field])
+
+ def print_optional(field):
+ if (self.params.get('force%s' % field, False)
+ and info_dict.get(field) is not None):
+ self.to_stdout(info_dict[field])
+
+ print_mandatory('title')
+ print_mandatory('id')
+ if self.params.get('forceurl', False) and not incomplete:
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_optional('thumbnail')
+ print_optional('description')
+ if self.params.get('forcefilename', False) and filename is not None:
+ self.to_stdout(filename)
+ if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_dict['duration']))
+ print_mandatory('format')
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(info_dict))
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@@ -1696,9 +1735,8 @@ class YoutubeDL(object):
if self._num_downloads >= int(max_downloads):
raise MaxDownloadsReached()
+ # TODO: backward compatibility, to be removed
info_dict['fulltitle'] = info_dict['title']
- if len(info_dict['title']) > 200:
- info_dict['title'] = info_dict['title'][:197] + '...'
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
@@ -1713,29 +1751,7 @@ class YoutubeDL(object):
info_dict['_filename'] = filename = self.prepare_filename(info_dict)
# Forced printings
- if self.params.get('forcetitle', False):
- self.to_stdout(info_dict['fulltitle'])
- if self.params.get('forceid', False):
- self.to_stdout(info_dict['id'])
- if self.params.get('forceurl', False):
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
- if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
- self.to_stdout(info_dict['thumbnail'])
- if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
- self.to_stdout(info_dict['description'])
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
- if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
- self.to_stdout(formatSeconds(info_dict['duration']))
- if self.params.get('forceformat', False):
- self.to_stdout(info_dict['format'])
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
+ self.__forced_printings(info_dict, filename, incomplete=False)
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
@@ -1776,6 +1792,8 @@ class YoutubeDL(object):
annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
+ elif not info_dict.get('annotations'):
+ self.report_warning('There are no annotations to write.')
else:
try:
self.to_screen('[info] Writing video annotations to: ' + annofn)
@@ -1797,7 +1815,7 @@ class YoutubeDL(object):
ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format)
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
@@ -1805,7 +1823,7 @@ class YoutubeDL(object):
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
- # See https://github.com/rg3/youtube-dl/issues/10268
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
except (OSError, IOError):
@@ -1940,8 +1958,8 @@ class YoutubeDL(object):
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('requested_formats') is None and
- info_dict.get('container') == 'm4a_dash'):
+ if (info_dict.get('requested_formats') is None
+ and info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn':
self.report_warning(
'%s: writing DASH m4a. '
@@ -1960,9 +1978,9 @@ class YoutubeDL(object):
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('protocol') == 'm3u8_native' or
- info_dict.get('protocol') == 'm3u8' and
- self.params.get('hls_prefer_native')):
+ if (info_dict.get('protocol') == 'm3u8_native'
+ or info_dict.get('protocol') == 'm3u8'
+ and self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
@@ -1988,10 +2006,10 @@ class YoutubeDL(object):
def download(self, url_list):
"""Download a given list of URLs."""
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- if (len(url_list) > 1 and
- outtmpl != '-' and
- '%' not in outtmpl and
- self.params.get('max_downloads') != 1):
+ if (len(url_list) > 1
+ and outtmpl != '-'
+ and '%' not in outtmpl
+ and self.params.get('max_downloads') != 1):
raise SameFileError(outtmpl)
for url in url_list:
@@ -2056,15 +2074,24 @@ class YoutubeDL(object):
self.report_warning('Unable to remove downloaded original file')
def _make_archive_id(self, info_dict):
+ video_id = info_dict.get('id')
+ if not video_id:
+ return
# Future-proof against any change in case
# and backwards compatibility with prior versions
- extractor = info_dict.get('extractor_key')
+ extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
if extractor is None:
- if 'id' in info_dict:
- extractor = info_dict.get('ie_key') # key in a playlist
- if extractor is None:
- return None # Incomplete video information
- return extractor.lower() + ' ' + info_dict['id']
+ url = str_or_none(info_dict.get('url'))
+ if not url:
+ return
+ # Try to find matching extractor for the URL and take its ie_key
+ for ie in self._ies:
+ if ie.suitable(url):
+ extractor = ie.ie_key()
+ break
+ else:
+ return
+ return extractor.lower() + ' ' + video_id
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
@@ -2072,7 +2099,7 @@ class YoutubeDL(object):
return False
vid_id = self._make_archive_id(info_dict)
- if vid_id is None:
+ if not vid_id:
return False # Incomplete video information
try:
@@ -2127,8 +2154,8 @@ class YoutubeDL(object):
if res:
res += ', '
res += '%s container' % fdict['container']
- if (fdict.get('vcodec') is not None and
- fdict.get('vcodec') != 'none'):
+ if (fdict.get('vcodec') is not None
+ and fdict.get('vcodec') != 'none'):
if res:
res += ', '
res += fdict['vcodec']
@@ -2215,7 +2242,7 @@ class YoutubeDL(object):
return
if type('') is not compat_str:
- # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
+ # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
self.report_warning(
'Your Python is broken! Update to a newer and supported version')
@@ -2297,10 +2324,9 @@ class YoutubeDL(object):
self.cookiejar = compat_cookiejar.CookieJar()
else:
opts_cookiefile = expand_path(opts_cookiefile)
- self.cookiejar = compat_cookiejar.MozillaCookieJar(
- opts_cookiefile)
+ self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
if os.access(opts_cookiefile, os.R_OK):
- self.cookiejar.load()
+ self.cookiejar.load(ignore_discard=True, ignore_expires=True)
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
@@ -2310,7 +2336,7 @@ class YoutubeDL(object):
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
+ # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
proxy_handler = PerRequestProxyHandler(proxies)
@@ -2318,12 +2344,13 @@ class YoutubeDL(object):
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+ redirect_handler = YoutubeDLRedirectHandler()
data_handler = compat_urllib_request_DataHandler()
# When passing our own FileHandler instance, build_opener won't add the
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
- # https://github.com/rg3/youtube-dl/issues/8227)
+ # https://github.com/ytdl-org/youtube-dl/issues/8227)
file_handler = compat_urllib_request.FileHandler()
def file_open(*args, **kwargs):
@@ -2331,11 +2358,11 @@ class YoutubeDL(object):
file_handler.file_open = file_open
opener = compat_urllib_request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
+ proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
opener.addheaders = []
self._opener = opener
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index ba435ea42..9a659fc65 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -48,7 +48,7 @@ from .YoutubeDL import YoutubeDL
def _real_main(argv=None):
# Compatibility fixes for Windows
if sys.platform == 'win32':
- # https://github.com/rg3/youtube-dl/issues/820
+ # https://github.com/ytdl-org/youtube-dl/issues/820
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
workaround_optparse_bug9161()
@@ -94,7 +94,7 @@ def _real_main(argv=None):
if opts.verbose:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except IOError:
- sys.exit('ERROR: batch file could not be read')
+ sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
_enc = preferredencoding()
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
@@ -166,6 +166,8 @@ def _real_main(argv=None):
if opts.max_sleep_interval is not None:
if opts.max_sleep_interval < 0:
parser.error('max sleep interval must be positive or 0')
+ if opts.sleep_interval is None:
+ parser.error('min sleep interval must be specified, use --min-sleep-interval')
if opts.max_sleep_interval < opts.sleep_interval:
parser.error('max sleep interval must be greater than or equal to min sleep interval')
else:
@@ -228,14 +230,14 @@ def _real_main(argv=None):
if opts.allsubtitles and not opts.writeautomaticsub:
opts.writesubtitles = True
- outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or
- (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or
- (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or
- (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or
- (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or
- (opts.useid and '%(id)s.%(ext)s') or
- (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or
- DEFAULT_OUTTMPL)
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
+ or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
+ or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
+ or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
+ or (opts.useid and '%(id)s.%(ext)s')
+ or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
+ or DEFAULT_OUTTMPL)
if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
parser.error('Cannot download a video and extract audio into the same'
' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 7b770340f..0ee9bc760 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -57,6 +57,17 @@ try:
except ImportError: # Python 2
import cookielib as compat_cookiejar
+if sys.version_info[0] == 2:
+ class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
+ def __init__(self, version, name, value, *args, **kwargs):
+ if isinstance(name, compat_str):
+ name = name.encode()
+ if isinstance(value, compat_str):
+ value = value.encode()
+ compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
+else:
+ compat_cookiejar_Cookie = compat_cookiejar.Cookie
+
try:
import http.cookies as compat_cookies
except ImportError: # Python 2
@@ -2364,7 +2375,7 @@ except ImportError: # Python 2
# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
# implementations from cpython 3.4.3's stdlib. Python 2's version
- # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
+ # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
def compat_urllib_parse_unquote_to_bytes(string):
"""unquote_to_bytes('abc%20def') -> b'abc def'."""
@@ -2508,6 +2519,15 @@ class _TreeBuilder(etree.TreeBuilder):
pass
+try:
+ # xml.etree.ElementTree.Element is a method in Python <=2.6 and
+ # the following will crash with:
+ # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
+ isinstance(None, xml.etree.ElementTree.Element)
+ from xml.etree.ElementTree import Element as compat_etree_Element
+except TypeError: # Python <=2.6
+ from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+
if sys.version_info[0] >= 3:
def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@@ -2640,9 +2660,9 @@ else:
try:
args = shlex.split('中文')
- assert (isinstance(args, list) and
- isinstance(args[0], compat_str) and
- args[0] == '中文')
+ assert (isinstance(args, list)
+ and isinstance(args[0], compat_str)
+ and args[0] == '中文')
compat_shlex_split = shlex.split
except (AssertionError, UnicodeEncodeError):
# Working around shlex issue with unicode strings on some python 2
@@ -2745,6 +2765,17 @@ else:
compat_expanduser = os.path.expanduser
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+ # os.path.realpath on Windows does not follow symbolic links
+ # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+ def compat_realpath(path):
+ while os.path.islink(path):
+ path = os.path.abspath(os.readlink(path))
+ return path
+else:
+ compat_realpath = os.path.realpath
+
+
if sys.version_info < (3, 0):
def compat_print(s):
from .utils import preferredencoding
@@ -2819,7 +2850,7 @@ else:
compat_socket_create_connection = socket.create_connection
-# Fix https://github.com/rg3/youtube-dl/issues/4223
+# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
# See http://bugs.python.org/issue9161 for what is broken
def workaround_optparse_bug9161():
op = optparse.OptionParser()
@@ -2944,7 +2975,7 @@ if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4,
# PyPy2 prior to version 5.4.0 expects byte strings as Windows function
# names, see the original PyPy issue [1] and the youtube-dl one [2].
# 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
- # 2. https://github.com/rg3/youtube-dl/pull/4392
+ # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
real = ctypes.WINFUNCTYPE(*args, **kwargs)
@@ -2967,8 +2998,10 @@ __all__ = [
'compat_basestring',
'compat_chr',
'compat_cookiejar',
+ 'compat_cookiejar_Cookie',
'compat_cookies',
'compat_ctypes_WINFUNCTYPE',
+ 'compat_etree_Element',
'compat_etree_fromstring',
'compat_etree_register_namespace',
'compat_expanduser',
@@ -2988,6 +3021,7 @@ __all__ = [
'compat_os_name',
'compat_parse_qs',
'compat_print',
+ 'compat_realpath',
'compat_setenv',
'compat_shlex_quote',
'compat_shlex_split',
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 5979833c0..1cdba89cd 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -176,7 +176,9 @@ class FileDownloader(object):
return
speed = float(byte_counter) / elapsed
if speed > rate_limit:
- time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
+ sleep_time = float(byte_counter) / rate_limit - elapsed
+ if sleep_time > 0:
+ time.sleep(sleep_time)
def temp_name(self, filename):
"""Returns a temporary filename for the given filename."""
@@ -330,15 +332,15 @@ class FileDownloader(object):
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False) and
- os.path.exists(encodeFilename(filename))
+ self.params.get('nooverwrites', False)
+ and os.path.exists(encodeFilename(filename))
)
if not hasattr(filename, 'write'):
continuedl_and_exists = (
- self.params.get('continuedl', True) and
- os.path.isfile(encodeFilename(filename)) and
- not self.params.get('nopart', False)
+ self.params.get('continuedl', True)
+ and os.path.isfile(encodeFilename(filename))
+ and not self.params.get('nopart', False)
)
# Check file already present
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index eaa7adf7c..c6d674bc6 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -53,7 +53,7 @@ class DashSegmentsFD(FragmentFD):
except compat_urllib_error.HTTPError as err:
# YouTube may often return 404 HTTP error for a fragment causing the
# whole download to fail. However if the same fragment is immediately
- # retried with the same request data this usually succeeds (1-2 attemps
+ # retried with the same request data this usually succeeds (1-2 attempts
# is usually enough) thus allowing to download the whole file successfully.
# To be future-proof we will retry all fragments that fail with any
# HTTP error.
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 958d00aac..c31f8910a 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -121,7 +121,11 @@ class CurlFD(ExternalFD):
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
cmd += self._option('--limit-rate', 'ratelimit')
- cmd += self._option('--retry', 'retries')
+ retry = self._option('--retry', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '2147483647'
+ cmd += retry
cmd += self._option('--max-filesize', 'max_filesize')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--proxy', 'proxy')
@@ -160,6 +164,12 @@ class WgetFD(ExternalFD):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--limit-rate', 'ratelimit')
+ retry = self._option('--tries', 'retries')
+ if len(retry) == 2:
+ if retry[1] in ('inf', 'infinite'):
+ retry[1] = '0'
+ cmd += retry
cmd += self._option('--bind-address', 'source_address')
cmd += self._option('--proxy', 'proxy')
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
@@ -184,6 +194,7 @@ class Aria2cFD(ExternalFD):
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
+ cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += ['--', info_dict['url']]
return cmd
@@ -229,7 +240,7 @@ class FFmpegFD(ExternalFD):
# setting -seekable prevents ffmpeg from guessing if the server
# supports seeking(by adding the header `Range: bytes=0-`), which
# can cause problems in some cases
- # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
+ # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
@@ -279,6 +290,7 @@ class FFmpegFD(ExternalFD):
tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
+ conn = info_dict.get('rtmp_conn')
if player_url is not None:
args += ['-rtmp_swfverify', player_url]
if page_url is not None:
@@ -293,6 +305,11 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_flashver', flash_version]
if live:
args += ['-rtmp_live', 'live']
+ if isinstance(conn, list):
+ for entry in conn:
+ args += ['-rtmp_conn', entry]
+ elif isinstance(conn, compat_str):
+ args += ['-rtmp_conn', conn]
args += ['-i', url, '-c', 'copy']
@@ -324,7 +341,7 @@ class FFmpegFD(ExternalFD):
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/rg3/youtube-dl/issues/8300).
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
raise
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 15e71be9a..8dd3c2eeb 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -238,8 +238,8 @@ def write_metadata_tag(stream, metadata):
def remove_encrypted_media(media):
- return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
- 'drmAdditionalHeaderSetId' not in e.attrib,
+ return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib
+ and 'drmAdditionalHeaderSetId' not in e.attrib,
media))
@@ -267,8 +267,8 @@ class F4mFD(FragmentFD):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
- for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
- doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
# If id attribute is missing it's valid for all media nodes
# without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
if 'id' not in e.attrib:
@@ -324,8 +324,8 @@ class F4mFD(FragmentFD):
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl()
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
- # and https://github.com/rg3/youtube-dl/issues/7823)
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
+ # and https://github.com/ytdl-org/youtube-dl/issues/7823)
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
doc = compat_etree_fromstring(manifest)
@@ -409,7 +409,7 @@ class F4mFD(FragmentFD):
# In tests, segments may be truncated, and thus
# FlvReader may not be able to parse the whole
# chunk. If so, write the segment as is
- # See https://github.com/rg3/youtube-dl/issues/9214
+ # See https://github.com/ytdl-org/youtube-dl/issues/9214
dest_stream.write(down_data)
break
raise
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
index 917f6dc01..02f35459e 100644
--- a/youtube_dl/downloader/fragment.py
+++ b/youtube_dl/downloader/fragment.py
@@ -190,12 +190,13 @@ class FragmentFD(FileDownloader):
})
def _start_frag_download(self, ctx):
+ resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags']
# This dict stores the download progress, it's updated by the progress
# hook
state = {
'status': 'downloading',
- 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
+ 'downloaded_bytes': resume_len,
'fragment_index': ctx['fragment_index'],
'fragment_count': total_frags,
'filename': ctx['filename'],
@@ -219,8 +220,8 @@ class FragmentFD(FileDownloader):
frag_total_bytes = s.get('total_bytes') or 0
if not ctx['live']:
estimated_size = (
- (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
- (state['fragment_index'] + 1) * total_frags)
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
+ / (state['fragment_index'] + 1) * total_frags)
state['total_bytes_estimate'] = estimated_size
if s['status'] == 'finished':
@@ -234,8 +235,8 @@ class FragmentFD(FileDownloader):
state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
if not ctx['live']:
state['eta'] = self.calc_eta(
- start, time_now, estimated_size,
- state['downloaded_bytes'])
+ start, time_now, estimated_size - resume_len,
+ state['downloaded_bytes'] - resume_len)
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index fd304527e..84bc34928 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -64,7 +64,7 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore')
if not self.can_download(s, info_dict):
- if info_dict.get('extra_param_to_segment_url'):
+ if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
self.report_error('pycrypto not found. Please install it.')
return False
self.report_warning(
@@ -75,9 +75,13 @@ class HlsFD(FragmentFD):
fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict)
- def is_ad_fragment(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
- s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+ def is_ad_fragment_start(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+
+ def is_ad_fragment_end(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
media_frags = 0
ad_frags = 0
@@ -87,12 +91,13 @@ class HlsFD(FragmentFD):
if not line:
continue
if line.startswith('#'):
- if is_ad_fragment(line):
- ad_frags += 1
+ if is_ad_fragment_start(line):
ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
continue
if ad_frag_next:
- ad_frag_next = False
+ ad_frags += 1
continue
media_frags += 1
@@ -123,7 +128,6 @@ class HlsFD(FragmentFD):
if line:
if not line.startswith('#'):
if ad_frag_next:
- ad_frag_next = False
continue
frag_index += 1
if frag_index <= ctx['fragment_index']:
@@ -148,8 +152,8 @@ class HlsFD(FragmentFD):
except compat_urllib_error.HTTPError as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
- # See https://github.com/rg3/youtube-dl/issues/10165,
- # https://github.com/rg3/youtube-dl/issues/10448).
+ # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+ # https://github.com/ytdl-org/youtube-dl/issues/10448).
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
@@ -165,7 +169,7 @@ class HlsFD(FragmentFD):
if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
- self._prepare_url(info_dict, decrypt_info['URI'])).read()
+ self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
frag_content = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
self._append_fragment(ctx, frag_content)
@@ -196,8 +200,10 @@ class HlsFD(FragmentFD):
'start': sub_range_start,
'end': sub_range_start + int(splitted_byte_range[0]),
}
- elif is_ad_fragment(line):
+ elif is_ad_fragment_start(line):
ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
self._finish_frag_download(ctx)
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 5b1e96013..5046878df 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -46,8 +46,8 @@ class HttpFD(FileDownloader):
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size') or
- self.params.get('http_chunk_size') or 0)
+ info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or self.params.get('http_chunk_size') or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
@@ -111,7 +111,7 @@ class HttpFD(FileDownloader):
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
- # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
+ # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
if content_range:
@@ -123,11 +123,11 @@ class HttpFD(FileDownloader):
content_len = int_or_none(content_range_m.group(3))
accept_content_len = (
# Non-chunked download
- not ctx.chunk_size or
+ not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
- content_range_end == range_end or
- content_len < range_end)
+ or content_range_end == range_end
+ or content_len < range_end)
if accept_content_len:
ctx.data_len = content_len
return
@@ -152,8 +152,8 @@ class HttpFD(FileDownloader):
raise
else:
# Examine the reported length
- if (content_length is not None and
- (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
+ if (content_length is not None
+ and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
@@ -227,7 +227,7 @@ class HttpFD(FileDownloader):
while True:
try:
# Download and write
- data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
+ data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
@@ -299,7 +299,7 @@ class HttpFD(FileDownloader):
'elapsed': now - ctx.start_time,
})
- if is_test and byte_counter == data_len:
+ if data_len is not None and byte_counter == data_len:
break
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
index 063fcf444..1ca666b4a 100644
--- a/youtube_dl/downloader/ism.py
+++ b/youtube_dl/downloader/ism.py
@@ -146,7 +146,7 @@ def write_piff_header(stream, params):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
- avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
+ avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one
avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
avcc_payload += u16.pack(len(sps))
avcc_payload += sps
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
index 4ac323bf6..6637f4f35 100644
--- a/youtube_dl/extractor/abc.py
+++ b/youtube_dl/extractor/abc.py
@@ -110,17 +110,17 @@ class ABCIViewIE(InfoExtractor):
# ABC iview programs are normally available for 14 days only.
_TESTS = [{
- 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
- 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
+ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
+ 'md5': '67715ce3c78426b11ba167d875ac6abf',
'info_dict': {
- 'id': 'ZX9371A050S00',
+ 'id': 'LE1927H001S00',
'ext': 'mp4',
- 'title': "Gaston's Birthday",
- 'series': "Ben And Holly's Little Kingdom",
- 'description': 'md5:f9de914d02f226968f598ac76f105bcf',
- 'upload_date': '20180604',
- 'uploader_id': 'abc4kids',
- 'timestamp': 1528140219,
+ 'title': "Series 11 Ep 1",
+ 'series': "Gruen",
+ 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
+ 'upload_date': '20190925',
+ 'uploader_id': 'abc1',
+ 'timestamp': 1569445289,
},
'params': {
'skip_download': True,
@@ -148,7 +148,7 @@ class ABCIViewIE(InfoExtractor):
'hdnea': token,
})
- for sd in ('sd', 'sd-low'):
+ for sd in ('720', 'sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url:
diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dl/extractor/abcnews.py
index cd29aca77..8b407bf9c 100644
--- a/youtube_dl/extractor/abcnews.py
+++ b/youtube_dl/extractor/abcnews.py
@@ -15,10 +15,13 @@ class AbcNewsVideoIE(AMPIE):
IE_NAME = 'abcnews:video'
_VALID_URL = r'''(?x)
https?://
- abcnews\.go\.com/
(?:
- [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
- video/embed\?.*?\bid=
+ abcnews\.go\.com/
+ (?:
+ [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
+ video/embed\?.*?\bid=
+ )|
+ fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
)
(?P<id>\d+)
'''
diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py
index 03b92a39c..0bc69a64f 100644
--- a/youtube_dl/extractor/abcotvs.py
+++ b/youtube_dl/extractor/abcotvs.py
@@ -4,29 +4,30 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ dict_get,
int_or_none,
- parse_iso8601,
+ try_get,
)
class ABCOTVSIE(InfoExtractor):
IE_NAME = 'abcotvs'
IE_DESC = 'ABC Owned Television Stations'
- _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
_TESTS = [
{
'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
'info_dict': {
- 'id': '472581',
+ 'id': '472548',
'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
'ext': 'mp4',
- 'title': 'East Bay museum celebrates vintage synthesizers',
+ 'title': 'East Bay museum celebrates synthesized music',
'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1421123075,
+ 'timestamp': 1421118520,
'upload_date': '20150113',
- 'uploader': 'Jonathan Bloom',
},
'params': {
# m3u8 download
@@ -37,39 +38,63 @@ class ABCOTVSIE(InfoExtractor):
'url': 'http://abc7news.com/472581',
'only_matching': True,
},
+ {
+ 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
+ 'only_matching': True,
+ },
]
+ _SITE_MAP = {
+ '6abc': 'wpvi',
+ 'abc11': 'wtvd',
+ 'abc13': 'ktrk',
+ 'abc30': 'kfsn',
+ 'abc7': 'kabc',
+ 'abc7chicago': 'wls',
+ 'abc7news': 'kgo',
+ 'abc7ny': 'wabc',
+ }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
-
- webpage = self._download_webpage(url, display_id)
+ site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id = display_id or video_id
+ station = self._SITE_MAP[site]
- m3u8 = self._html_search_meta(
- 'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
+ data = self._download_json(
+ 'https://api.abcotvs.com/v2/content', display_id, query={
+ 'id': video_id,
+ 'key': 'otv.web.%s.story' % station,
+ 'station': station,
+ })['data']
+ video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
+ video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
+ title = video.get('title') or video['linkText']
- formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
+ formats = []
+ m3u8_url = video.get('m3u8')
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
+ mp4_url = video.get('mp4')
+ if mp4_url:
+ formats.append({
+ 'abr': 128,
+ 'format_id': 'https',
+ 'height': 360,
+ 'url': mp4_url,
+ 'width': 640,
+ })
self._sort_formats(formats)
- title = self._og_search_title(webpage).strip()
- description = self._og_search_description(webpage).strip()
- thumbnail = self._og_search_thumbnail(webpage)
- timestamp = parse_iso8601(self._search_regex(
- r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
- webpage, 'upload date', fatal=False))
- uploader = self._search_regex(
- r'rel="author">([^<]+)</a>',
- webpage, 'uploader', default=None)
+ image = video.get('image') or {}
return {
'id': video_id,
'display_id': display_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'uploader': uploader,
+ 'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
+ 'thumbnail': dict_get(image, ('source', 'dynamicSource')),
+ 'timestamp': int_or_none(video.get('date')),
+ 'duration': int_or_none(video.get('length')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py
index 6d846ea7a..b17c792d2 100644
--- a/youtube_dl/extractor/acast.py
+++ b/youtube_dl/extractor/acast.py
@@ -7,6 +7,7 @@ import functools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ clean_html,
float_or_none,
int_or_none,
try_get,
@@ -17,27 +18,17 @@ from ..utils import (
class ACastIE(InfoExtractor):
IE_NAME = 'acast'
- _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<channel>[^/]+)/(?P<id>[^/#?]+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:embed|www)\.)?acast\.com/|
+ play\.acast\.com/s/
+ )
+ (?P<channel>[^/]+)/(?P<id>[^/#?]+)
+ '''
_TESTS = [{
- # test with one bling
- 'url': 'https://www.acast.com/condenasttraveler/-where-are-you-taipei-101-taiwan',
- 'md5': 'ada3de5a1e3a2a381327d749854788bb',
- 'info_dict': {
- 'id': '57de3baa-4bb0-487e-9418-2692c1277a34',
- 'ext': 'mp3',
- 'title': '"Where Are You?": Taipei 101, Taiwan',
- 'description': 'md5:a0b4ef3634e63866b542e5b1199a1a0e',
- 'timestamp': 1196172000,
- 'upload_date': '20071127',
- 'duration': 211,
- 'creator': 'Concierge',
- 'series': 'Condé Nast Traveler Podcast',
- 'episode': '"Where Are You?": Taipei 101, Taiwan',
- }
- }, {
- # test with multiple blings
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
- 'md5': 'a02393c74f3bdb1801c3ec2695577ce0',
+ 'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
'info_dict': {
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
'ext': 'mp3',
@@ -50,28 +41,43 @@ class ACastIE(InfoExtractor):
'series': 'Spår',
'episode': '2. Raggarmordet - Röster ur det förflutna',
}
+ }, {
+ 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
+ 'only_matching': True,
}]
def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups()
s = self._download_json(
- 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id),
- display_id)['result']
+ 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
+ display_id)
media_url = s['url']
+ if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
+ episode_url = s.get('episodeUrl')
+ if episode_url:
+ display_id = episode_url
+ else:
+ channel, display_id = re.match(self._VALID_URL, s['link']).groups()
cast_data = self._download_json(
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
display_id)['result']
e = cast_data['episode']
- title = e['name']
+ title = e.get('name') or s['title']
return {
'id': compat_str(e['id']),
'display_id': display_id,
'url': media_url,
'title': title,
- 'description': e.get('description') or e.get('summary'),
+ 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
'thumbnail': e.get('image'),
- 'timestamp': unified_timestamp(e.get('publishingDate')),
- 'duration': float_or_none(s.get('duration') or e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
+ 'duration': float_or_none(e.get('duration') or s.get('duration')),
'filesize': int_or_none(e.get('contentLength')),
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
@@ -83,17 +89,27 @@ class ACastIE(InfoExtractor):
class ACastChannelIE(InfoExtractor):
IE_NAME = 'acast:channel'
- _VALID_URL = r'https?://(?:www\.)?acast\.com/(?P<id>[^/#?]+)'
- _TEST = {
- 'url': 'https://www.acast.com/condenasttraveler',
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?acast\.com/|
+ play\.acast\.com/s/
+ )
+ (?P<id>[^/#?]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.acast.com/todayinfocus',
'info_dict': {
- 'id': '50544219-29bb-499e-a083-6087f4cb7797',
- 'title': 'Condé Nast Traveler Podcast',
- 'description': 'md5:98646dee22a5b386626ae31866638fbd',
+ 'id': '4efc5294-5385-4847-98bd-519799ce5786',
+ 'title': 'Today in Focus',
+ 'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
},
- 'playlist_mincount': 20,
- }
- _API_BASE_URL = 'https://www.acast.com/api/'
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'http://play.acast.com/s/ft-banking-weekly',
+ 'only_matching': True,
+ }]
+ _API_BASE_URL = 'https://play.acast.com/api/'
_PAGE_SIZE = 10
@classmethod
@@ -106,7 +122,7 @@ class ACastChannelIE(InfoExtractor):
channel_slug, note='Download page %d of channel data' % page)
for cast in casts:
yield self.url_result(
- 'https://www.acast.com/%s/%s' % (channel_slug, cast['url']),
+ 'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
'ACast', cast['id'])
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
deleted file mode 100644
index 9f8a71262..000000000
--- a/youtube_dl/extractor/addanime.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- ExtractorError,
- qualities,
-)
-
-
-class AddAnimeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
- _TESTS = [{
- 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
- 'md5': '72954ea10bc979ab5e2eb288b21425a0',
- 'info_dict': {
- 'id': '24MR3YO5SAS9',
- 'ext': 'mp4',
- 'description': 'One Piece 606',
- 'title': 'One Piece 606',
- },
- 'skip': 'Video is gone',
- }, {
- 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- webpage = self._download_webpage(url, video_id)
- except ExtractorError as ee:
- if not isinstance(ee.cause, compat_HTTPError) or \
- ee.cause.code != 503:
- raise
-
- redir_webpage = ee.cause.read().decode('utf-8')
- action = self._search_regex(
- r'<form id="challenge-form" action="([^"]+)"',
- redir_webpage, 'Redirect form')
- vc = self._search_regex(
- r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
- redir_webpage, 'redirect vc value')
- av = re.search(
- r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
- redir_webpage)
- if av is None:
- raise ExtractorError('Cannot find redirect math task')
- av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
-
- parsed_url = compat_urllib_parse_urlparse(url)
- av_val = av_res + len(parsed_url.netloc)
- confirm_url = (
- parsed_url.scheme + '://' + parsed_url.netloc +
- action + '?' +
- compat_urllib_parse_urlencode({
- 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
- self._download_webpage(
- confirm_url, video_id,
- note='Confirming after redirect')
- webpage = self._download_webpage(url, video_id)
-
- FORMATS = ('normal', 'hq')
- quality = qualities(FORMATS)
- formats = []
- for format_id in FORMATS:
- rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
- video_url = self._search_regex(rex, webpage, 'video file URLx',
- fatal=False)
- if not video_url:
- continue
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'quality': quality(format_id),
- })
- self._sort_formats(formats)
- video_title = self._og_search_title(webpage)
- video_description = self._og_search_description(webpage)
-
- return {
- '_type': 'video',
- 'id': video_id,
- 'formats': formats,
- 'title': video_title,
- 'description': video_description
- }
diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py
index 1eb99c39a..c95ad2173 100644
--- a/youtube_dl/extractor/adn.py
+++ b/youtube_dl/extractor/adn.py
@@ -21,7 +21,6 @@ from ..utils import (
intlist_to_bytes,
long_to_bytes,
pkcs1pad,
- srt_subtitles_timecode,
strip_or_none,
urljoin,
)
@@ -42,6 +41,18 @@ class ADNIE(InfoExtractor):
}
_BASE_URL = 'http://animedigitalnetwork.fr'
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
+ _POS_ALIGN_MAP = {
+ 'start': 1,
+ 'end': 3,
+ }
+ _LINE_ALIGN_MAP = {
+ 'middle': 8,
+ 'end': 4,
+ }
+
+ @staticmethod
+ def _ass_subtitles_timecode(seconds):
+ return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
def _get_subtitles(self, sub_path, video_id):
if not sub_path:
@@ -49,14 +60,20 @@ class ADNIE(InfoExtractor):
enc_subtitles = self._download_webpage(
urljoin(self._BASE_URL, sub_path),
- video_id, fatal=False)
+ video_id, 'Downloading subtitles location', fatal=False) or '{}'
+ subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
+ if subtitle_location:
+ enc_subtitles = self._download_webpage(
+ urljoin(self._BASE_URL, subtitle_location),
+ video_id, 'Downloading subtitles data', fatal=False,
+ headers={'Origin': 'https://animedigitalnetwork.fr'})
if not enc_subtitles:
return None
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
- bytes_to_intlist(binascii.unhexlify(self._K + '9032ad7083106400')),
+ bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
))
subtitles_json = self._parse_json(
@@ -67,23 +84,27 @@ class ADNIE(InfoExtractor):
subtitles = {}
for sub_lang, sub in subtitles_json.items():
- srt = ''
- for num, current in enumerate(sub):
- start, end, text = (
+ ssa = '''[Script Info]
+ScriptType:V4.00
+[V4 Styles]
+Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding
+Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0
+[Events]
+Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
+ for current in sub:
+ start, end, text, line_align, position_align = (
float_or_none(current.get('startTime')),
float_or_none(current.get('endTime')),
- current.get('text'))
+ current.get('text'), current.get('lineAlign'),
+ current.get('positionAlign'))
if start is None or end is None or text is None:
continue
- srt += os.linesep.join(
- (
- '%d' % num,
- '%s --> %s' % (
- srt_subtitles_timecode(start),
- srt_subtitles_timecode(end)),
- text,
- os.linesep,
- ))
+ alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0)
+ ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % (
+ self._ass_subtitles_timecode(start),
+ self._ass_subtitles_timecode(end),
+ '{\\a%d}' % alignment if alignment != 2 else '',
+ text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}'))
if sub_lang == 'vostf':
sub_lang = 'fr'
@@ -91,8 +112,8 @@ class ADNIE(InfoExtractor):
'ext': 'json',
'data': json.dumps(sub),
}, {
- 'ext': 'srt',
- 'data': srt,
+ 'ext': 'ssa',
+ 'data': ssa,
}])
return subtitles
@@ -100,7 +121,15 @@ class ADNIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
player_config = self._parse_json(self._search_regex(
- r'playerConfig\s*=\s*({.+});', webpage, 'player config'), video_id)
+ r'playerConfig\s*=\s*({.+});', webpage,
+ 'player config', default='{}'), video_id, fatal=False)
+ if not player_config:
+ config_url = urljoin(self._BASE_URL, self._search_regex(
+ r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
+ webpage, 'config url'))
+ player_config = self._download_json(
+ config_url, video_id,
+ 'Downloading player config JSON metadata')['player']
video_info = {}
video_info_str = self._search_regex(
@@ -129,12 +158,15 @@ class ADNIE(InfoExtractor):
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode()
links_data = self._download_json(
- urljoin(self._BASE_URL, links_url), video_id, headers={
+ urljoin(self._BASE_URL, links_url), video_id,
+ 'Downloading links JSON metadata', headers={
'Authorization': 'Bearer ' + authorization,
})
links = links_data.get('links') or {}
metas = metas or links_data.get('meta') or {}
- sub_path = (sub_path or links_data.get('subtitles')) + '&token=' + token
+ sub_path = sub_path or links_data.get('subtitles') or \
+ 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
+ sub_path += '&token=' + token
error = links_data.get('error')
title = metas.get('title') or video_info['title']
@@ -142,9 +174,11 @@ class ADNIE(InfoExtractor):
for format_id, qualities in links.items():
if not isinstance(qualities, dict):
continue
- for load_balancer_url in qualities.values():
+ for quality, load_balancer_url in qualities.items():
load_balancer_data = self._download_json(
- load_balancer_url, video_id, fatal=False) or {}
+ load_balancer_url, video_id,
+ 'Downloading %s %s JSON metadata' % (format_id, quality),
+ fatal=False) or {}
m3u8_url = load_balancer_data.get('location')
if not m3u8_url:
continue
diff --git a/youtube_dl/extractor/adobeconnect.py b/youtube_dl/extractor/adobeconnect.py
new file mode 100644
index 000000000..728549eb9
--- /dev/null
+++ b/youtube_dl/extractor/adobeconnect.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class AdobeConnectIE(InfoExtractor):
+ _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P<id>[\w-]+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
+ qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
+ is_live = qs.get('isLive', ['false'])[0] == 'true'
+ formats = []
+ for con_string in qs['conStrings'][0].split(','):
+ formats.append({
+ 'format_id': con_string.split('://')[0],
+ 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]),
+ 'ext': 'flv',
+ 'play_path': 'mp4:' + qs['streamName'][0],
+ 'rtmp_conn': 'S:' + qs['ticket'][0],
+ 'rtmp_live': is_live,
+ 'url': con_string,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py
index 1cf2dcbf3..38dca1b0a 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dl/extractor/adobepass.py
@@ -25,6 +25,11 @@ MSO_INFO = {
'username_field': 'username',
'password_field': 'password',
},
+ 'ATT': {
+ 'name': 'AT&T U-verse',
+ 'username_field': 'userid',
+ 'password_field': 'password',
+ },
'ATTOTT': {
'name': 'DIRECTV NOW',
'username_field': 'email',
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
index 008c98e51..80060f037 100644
--- a/youtube_dl/extractor/adobetv.py
+++ b/youtube_dl/extractor/adobetv.py
@@ -1,25 +1,119 @@
from __future__ import unicode_literals
+import functools
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
- parse_duration,
- unified_strdate,
- str_to_int,
- int_or_none,
float_or_none,
+ int_or_none,
ISO639Utils,
- determine_ext,
+ OnDemandPagedList,
+ parse_duration,
+ str_or_none,
+ str_to_int,
+ unified_strdate,
)
class AdobeTVBaseIE(InfoExtractor):
- _API_BASE_URL = 'http://tv.adobe.com/api/v4/'
+ def _call_api(self, path, video_id, query, note=None):
+ return self._download_json(
+ 'http://tv.adobe.com/api/v4/' + path,
+ video_id, note, query=query)['data']
+
+ def _parse_subtitles(self, video_data, url_key):
+ subtitles = {}
+ for translation in video_data.get('translations', []):
+ vtt_path = translation.get(url_key)
+ if not vtt_path:
+ continue
+ lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
+ subtitles.setdefault(lang, []).append({
+ 'ext': 'vtt',
+ 'url': vtt_path,
+ })
+ return subtitles
+
+ def _parse_video_data(self, video_data):
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
+
+ s3_extracted = False
+ formats = []
+ for source in video_data.get('videos', []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ f = {
+ 'format_id': source.get('quality_level'),
+ 'fps': int_or_none(source.get('frame_rate')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ 'width': int_or_none(source.get('width')),
+ 'url': source_url,
+ }
+ original_filename = source.get('original_filename')
+ if original_filename:
+ if not (f.get('height') and f.get('width')):
+ mobj = re.search(r'_(\d+)x(\d+)', original_filename)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ if original_filename.startswith('s3://') and not s3_extracted:
+ formats.append({
+ 'format_id': 'original',
+ 'preference': 1,
+ 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
+ })
+ s3_extracted = True
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
+ 'formats': formats,
+ 'subtitles': self._parse_subtitles(video_data, 'vtt'),
+ }
+
+
+class AdobeTVEmbedIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:embed'
+ _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.adobe.com/embed/22/4153',
+ 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
+ 'info_dict': {
+ 'id': '4153',
+ 'ext': 'flv',
+ 'title': 'Creating Graphics Optimized for BlackBerry',
+ 'description': 'md5:eac6e8dced38bdaae51cd94447927459',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20091109',
+ 'duration': 377,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
+ return self._parse_video_data(video_data)
class AdobeTVIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
_TEST = {
@@ -42,45 +136,33 @@ class AdobeTVIE(AdobeTVBaseIE):
if not language:
language = 'en'
- video_data = self._download_json(
- self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
- urlname)['data'][0]
-
- formats = [{
- 'url': source['url'],
- 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- 'tbr': int_or_none(source.get('video_data_rate')),
- } for source in video_data['videos']]
- self._sort_formats(formats)
-
- return {
- 'id': compat_str(video_data['id']),
- 'title': video_data['title'],
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('thumbnail'),
- 'upload_date': unified_strdate(video_data.get('start_date')),
- 'duration': parse_duration(video_data.get('duration')),
- 'view_count': str_to_int(video_data.get('playcount')),
- 'formats': formats,
- }
+ video_data = self._call_api(
+ 'episode/get', urlname, {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ 'urlname': urlname,
+ })[0]
+ return self._parse_video_data(video_data)
class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
- def _parse_page_data(self, page_data):
- return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
+ _PAGE_SIZE = 25
+
+ def _fetch_page(self, display_id, query, page):
+ page += 1
+ query['page'] = page
+ for element_data in self._call_api(
+ self._RESOURCE, display_id, query, 'Download Page %d' % page):
+ yield self._process_data(element_data)
- def _extract_playlist_entries(self, url, display_id):
- page = self._download_json(url, display_id)
- entries = self._parse_page_data(page['data'])
- for page_num in range(2, page['paging']['pages'] + 1):
- entries.extend(self._parse_page_data(
- self._download_json(url + '&page=%d' % page_num, display_id)['data']))
- return entries
+ def _extract_playlist_entries(self, display_id, query):
+ return OnDemandPagedList(functools.partial(
+ self._fetch_page, display_id, query), self._PAGE_SIZE)
class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:show'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
_TEST = {
@@ -92,26 +174,31 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
},
'playlist_mincount': 136,
}
-
- def _get_element_url(self, element_data):
- return element_data['urls'][0]
+ _RESOURCE = 'episode'
+ _process_data = AdobeTVBaseIE._parse_video_data
def _real_extract(self, url):
language, show_urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
- query = 'language=%s&show_urlname=%s' % (language, show_urlname)
+ query = {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ }
- show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
+ show_data = self._call_api(
+ 'show/get', show_urlname, query)[0]
return self.playlist_result(
- self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
- compat_str(show_data['id']),
- show_data['show_name'],
- show_data['show_description'])
+ self._extract_playlist_entries(show_urlname, query),
+ str_or_none(show_data.get('id')),
+ show_data.get('show_name'),
+ show_data.get('show_description'))
class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:channel'
_VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
_TEST = {
@@ -121,24 +208,30 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
},
'playlist_mincount': 96,
}
+ _RESOURCE = 'show'
- def _get_element_url(self, element_data):
- return element_data['url']
+ def _process_data(self, show_data):
+ return self.url_result(
+ show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
def _real_extract(self, url):
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
if not language:
language = 'en'
- query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
+ query = {
+ 'channel_urlname': channel_urlname,
+ 'language': language,
+ }
if category_urlname:
- query += '&category_urlname=%s' % category_urlname
+ query['category_urlname'] = category_urlname
return self.playlist_result(
- self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
+ self._extract_playlist_entries(channel_urlname, query),
channel_urlname)
-class AdobeTVVideoIE(InfoExtractor):
+class AdobeTVVideoIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:video'
_VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
_TEST = {
@@ -160,38 +253,36 @@ class AdobeTVVideoIE(InfoExtractor):
video_data = self._parse_json(self._search_regex(
r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
-
- formats = [{
- 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
- 'url': source['src'],
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- 'tbr': int_or_none(source.get('bitrate')),
- } for source in video_data['sources']]
+ title = video_data['title']
+
+ formats = []
+ sources = video_data.get('sources') or []
+ for source in sources:
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ formats.append({
+ 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
+ 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
+ 'height': int_or_none(source.get('height') or None),
+ 'tbr': int_or_none(source.get('bitrate') or None),
+ 'width': int_or_none(source.get('width') or None),
+ 'url': source_src,
+ })
self._sort_formats(formats)
# For both metadata and downloaded files the duration varies among
# formats. I just pick the max one
duration = max(filter(None, [
float_or_none(source.get('duration'), scale=1000)
- for source in video_data['sources']]))
-
- subtitles = {}
- for translation in video_data.get('translations', []):
- lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
- if lang_id not in subtitles:
- subtitles[lang_id] = []
- subtitles[lang_id].append({
- 'url': translation['vttPath'],
- 'ext': 'vtt',
- })
+ for source in sources]))
return {
'id': video_id,
'formats': formats,
- 'title': video_data['title'],
+ 'title': title,
'description': video_data.get('description'),
- 'thumbnail': video_data['video'].get('poster'),
+ 'thumbnail': video_data.get('video', {}).get('poster'),
'duration': duration,
- 'subtitles': subtitles,
+ 'subtitles': self._parse_subtitles(video_data, 'vttPath'),
}
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py
index 88c96a950..8d1d9ac7d 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -1,13 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .turner import TurnerBaseIE
from ..utils import (
+ determine_ext,
+ float_or_none,
int_or_none,
+ mimetype2ext,
+ parse_age_limit,
+ parse_iso8601,
strip_or_none,
- url_or_none,
+ try_get,
)
@@ -21,8 +27,8 @@ class AdultSwimIE(TurnerBaseIE):
'ext': 'mp4',
'title': 'Rick and Morty - Pilot',
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
- 'timestamp': 1493267400,
- 'upload_date': '20170427',
+ 'timestamp': 1543294800,
+ 'upload_date': '20181127',
},
'params': {
# m3u8 download
@@ -43,6 +49,7 @@ class AdultSwimIE(TurnerBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}, {
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
'info_dict': {
@@ -61,9 +68,9 @@ class AdultSwimIE(TurnerBaseIE):
}, {
'url': 'http://www.adultswim.com/videos/attack-on-titan',
'info_dict': {
- 'id': 'b7A69dzfRzuaXIECdxW8XQ',
+ 'id': 'attack-on-titan',
'title': 'Attack on Titan',
- 'description': 'md5:6c8e003ea0777b47013e894767f5e114',
+ 'description': 'md5:41caa9416906d90711e31dc00cb7db7e',
},
'playlist_mincount': 12,
}, {
@@ -78,83 +85,118 @@ class AdultSwimIE(TurnerBaseIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': '404 Not Found',
}]
def _real_extract(self, url):
show_path, episode_path = re.match(self._VALID_URL, url).groups()
display_id = episode_path or show_path
- webpage = self._download_webpage(url, display_id)
- initial_data = self._parse_json(self._search_regex(
- r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
- webpage, 'initial data'), display_id)
-
- is_stream = show_path == 'streams'
- if is_stream:
- if not episode_path:
- episode_path = 'live-stream'
+ query = '''query {
+ getShowBySlug(slug:"%s") {
+ %%s
+ }
+}''' % show_path
+ if episode_path:
+ query = query % '''title
+ getVideoBySlug(slug:"%s") {
+ _id
+ auth
+ description
+ duration
+ episodeNumber
+ launchDate
+ mediaID
+ seasonNumber
+ poster
+ title
+ tvRating
+ }''' % episode_path
+ ['getVideoBySlug']
+ else:
+ query = query % '''metaDescription
+ title
+ videos(first:1000,sort:["episode_number"]) {
+ edges {
+ node {
+ _id
+ slug
+ }
+ }
+ }'''
+ show_data = self._download_json(
+ 'https://www.adultswim.com/api/search', display_id,
+ data=json.dumps({'query': query}).encode(),
+ headers={'Content-Type': 'application/json'})['data']['getShowBySlug']
+ if episode_path:
+ video_data = show_data['getVideoBySlug']
+ video_id = video_data['_id']
+ episode_title = title = video_data['title']
+ series = show_data.get('title')
+ if series:
+ title = '%s - %s' % (series, title)
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(video_data.get('description')),
+ 'duration': float_or_none(video_data.get('duration')),
+ 'formats': [],
+ 'subtitles': {},
+ 'age_limit': parse_age_limit(video_data.get('tvRating')),
+ 'thumbnail': video_data.get('poster'),
+ 'timestamp': parse_iso8601(video_data.get('launchDate')),
+ 'series': series,
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode': episode_title,
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ }
- video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
- video_id = video_data.get('stream')
+ auth = video_data.get('auth')
+ media_id = video_data.get('mediaID')
+ if media_id:
+ info.update(self._extract_ngtv_info(media_id, {
+ # CDN_TOKEN_APP_ID from:
+ # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
+ 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
+ }, {
+ 'url': url,
+ 'site_name': 'AdultSwim',
+ 'auth_required': auth,
+ }))
- if not video_id:
- entries = []
- for episode in video_data.get('archiveEpisodes', []):
- episode_url = url_or_none(episode.get('url'))
- if not episode_url:
+ if not auth:
+ extract_data = self._download_json(
+ 'https://www.adultswim.com/api/shows/v1/videos/' + video_id,
+ video_id, query={'fields': 'stream'}, fatal=False) or {}
+ assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or []
+ for asset in assets:
+ asset_url = asset.get('url')
+ if not asset_url:
continue
- entries.append(self.url_result(
- episode_url, 'AdultSwim', episode.get('id')))
- return self.playlist_result(
- entries, video_data.get('id'), video_data.get('title'),
- strip_or_none(video_data.get('description')))
- else:
- show_data = initial_data['show']
-
- if not episode_path:
- entries = []
- for video in show_data.get('videos', []):
- slug = video.get('slug')
- if not slug:
+ ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
+ if ext == 'm3u8':
+ info['formats'].extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'f4m':
continue
- entries.append(self.url_result(
- 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
- 'AdultSwim', video.get('id')))
- return self.playlist_result(
- entries, show_data.get('id'), show_data.get('title'),
- strip_or_none(show_data.get('metadata', {}).get('description')))
-
- video_data = show_data['sluggedVideo']
- video_id = video_data['id']
+ # info['formats'].extend(self._extract_f4m_formats(
+ # asset_url, video_id, f4m_id='hds', fatal=False))
+ elif ext in ('scc', 'ttml', 'vtt'):
+ info['subtitles'].setdefault('en', []).append({
+ 'url': asset_url,
+ })
+ self._sort_formats(info['formats'])
- info = self._extract_cvp_info(
- 'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
- video_id, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
- 'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
- },
- }, {
- 'url': url,
- 'site_name': 'AdultSwim',
- 'auth_required': video_data.get('auth'),
- })
-
- info.update({
- 'id': video_id,
- 'display_id': display_id,
- 'description': info.get('description') or strip_or_none(video_data.get('description')),
- })
- if not is_stream:
- info.update({
- 'duration': info.get('duration') or int_or_none(video_data.get('duration')),
- 'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
- 'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
- 'episode': info['title'],
- 'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
- })
-
- info['series'] = video_data.get('collection_title') or info.get('series')
- if info['series'] and info['series'] != info['title']:
- info['title'] = '%s - %s' % (info['series'], info['title'])
-
- return info
+ return info
+ else:
+ entries = []
+ for edge in show_data.get('videos', {}).get('edges', []):
+ video = edge.get('node') or {}
+ slug = video.get('slug')
+ if not slug:
+ continue
+ entries.append(self.url_result(
+ 'http://adultswim.com/videos/%s/%s' % (show_path, slug),
+ 'AdultSwim', video.get('_id')))
+ return self.playlist_result(
+ entries, show_path, show_data.get('title'),
+ strip_or_none(show_data.get('metaDescription')))
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py
index 398e56ea3..611b948f5 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dl/extractor/aenetworks.py
@@ -1,14 +1,15 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE
from ..utils import (
+ extract_attributes,
+ ExtractorError,
+ int_or_none,
smuggle_url,
update_url_query,
- unescapeHTML,
- extract_attributes,
- get_element_by_attribute,
)
from ..compat import (
compat_urlparse,
@@ -19,35 +20,76 @@ class AENetworksBaseIE(ThePlatformIE):
_THEPLATFORM_KEY = 'crazyjava'
_THEPLATFORM_SECRET = 's3cr3t'
+ def _extract_aen_smil(self, smil_url, video_id, auth=None):
+ query = {'mbr': 'true'}
+ if auth:
+ query['auth'] = auth
+ TP_SMIL_QUERY = [{
+ 'assetTypes': 'high_video_ak',
+ 'switch': 'hls_high_ak'
+ }, {
+ 'assetTypes': 'high_video_s3'
+ }, {
+ 'assetTypes': 'high_video_s3',
+ 'switch': 'hls_ingest_fastly'
+ }]
+ formats = []
+ subtitles = {}
+ last_e = None
+ for q in TP_SMIL_QUERY:
+ q.update(query)
+ m_url = update_url_query(smil_url, q)
+ m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
+ except ExtractorError as e:
+ last_e = e
+ continue
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if last_e and not formats:
+ raise last_e
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
class AENetworksIE(AENetworksBaseIE):
IE_NAME = 'aenetworks'
- IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
+ IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?P<domain>
- (?:history|aetv|mylifetime|lifetimemovieclub)\.com|
+ (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
fyi\.tv
)/
(?:
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
- specials/(?P<special_display_id>[^/]+)/full-special
+ specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
+ collections/[^/]+/(?P<collection_display_id>[^/]+)
)
'''
_TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
- 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': {
'id': '22253814',
'ext': 'mp4',
- 'title': 'Winter Is Coming',
+ 'title': 'Winter is Coming',
'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
'timestamp': 1338306241,
'upload_date': '20120529',
'uploader': 'AENE-NEW',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
'add_ie': ['ThePlatform'],
}, {
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
@@ -80,6 +122,12 @@ class AENetworksIE(AENetworksBaseIE):
}, {
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
'only_matching': True
+ }, {
+ 'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
+ 'only_matching': True
}]
_DOMAIN_TO_REQUESTOR_ID = {
'history.com': 'HISTORY',
@@ -90,9 +138,9 @@ class AENetworksIE(AENetworksBaseIE):
}
def _real_extract(self, url):
- domain, show_path, movie_display_id, special_display_id = re.match(self._VALID_URL, url).groups()
- display_id = show_path or movie_display_id or special_display_id
- webpage = self._download_webpage(url, display_id)
+ domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
+ display_id = show_path or movie_display_id or special_display_id or collection_display_id
+ webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
if show_path:
url_parts = show_path.split('/')
url_parts_len = len(url_parts)
@@ -120,11 +168,6 @@ class AENetworksIE(AENetworksBaseIE):
return self.playlist_result(
entries, self._html_search_meta('aetn:SeasonId', webpage))
- query = {
- 'mbr': 'true',
- 'assetTypes': 'high_video_ak',
- 'switch': 'hls_high_ak',
- }
video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex(
[r"media_url\s*=\s*'(?P<url>[^']+)'",
@@ -134,64 +177,39 @@ class AENetworksIE(AENetworksBaseIE):
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
+ auth = None
if theplatform_metadata.get('AETN$isBehindWall'):
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
resource = self._get_mvpd_resource(
requestor_id, theplatform_metadata['title'],
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
theplatform_metadata['ratings'][0]['rating'])
- query['auth'] = self._extract_mvpd_auth(
+ auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
info.update(self._search_json_ld(webpage, video_id, fatal=False))
- media_url = update_url_query(media_url, query)
- media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
- formats, subtitles = self._extract_theplatform_smil(media_url, video_id)
- self._sort_formats(formats)
- info.update({
- 'id': video_id,
- 'formats': formats,
- 'subtitles': subtitles,
- })
+ info.update(self._extract_aen_smil(media_url, video_id, auth))
return info
class HistoryTopicIE(AENetworksBaseIE):
IE_NAME = 'history:topic'
IE_DESC = 'History.com Topic'
- _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)(?:/[^/]+(?:/(?P<video_display_id>[^/?#]+))?)?'
+ _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
_TESTS = [{
- 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
+ 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
'info_dict': {
'id': '40700995724',
'ext': 'mp4',
- 'title': "Bet You Didn't Know: Valentine's Day",
+ 'title': "History of Valentine’s Day",
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
'timestamp': 1375819729,
'upload_date': '20130806',
- 'uploader': 'AENE-NEW',
},
'params': {
# m3u8 download
'skip_download': True,
},
'add_ie': ['ThePlatform'],
- }, {
- 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos',
- 'info_dict':
- {
- 'id': 'world-war-i-history',
- 'title': 'World War I History',
- },
- 'playlist_mincount': 23,
- }, {
- 'url': 'http://www.history.com/topics/world-war-i-history/videos',
- 'only_matching': True,
- }, {
- 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history',
- 'only_matching': True,
- }, {
- 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches',
- 'only_matching': True,
}]
def theplatform_url_result(self, theplatform_url, video_id, query):
@@ -211,27 +229,19 @@ class HistoryTopicIE(AENetworksBaseIE):
}
def _real_extract(self, url):
- topic_id, video_display_id = re.match(self._VALID_URL, url).groups()
- if video_display_id:
- webpage = self._download_webpage(url, video_display_id)
- release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups()
- release_url = unescapeHTML(release_url)
-
- return self.theplatform_url_result(
- release_url, video_id, {
- 'mbr': 'true',
- 'switch': 'hls',
- 'assetTypes': 'high_video_ak',
- })
- else:
- webpage = self._download_webpage(url, topic_id)
- entries = []
- for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage):
- video_attributes = extract_attributes(episode_item)
- entries.append(self.theplatform_url_result(
- video_attributes['data-release-url'], video_attributes['data-id'], {
- 'mbr': 'true',
- 'switch': 'hls',
- 'assetTypes': 'high_video_ak',
- }))
- return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
+ result = self._download_json(
+ 'https://feeds.video.aetnd.com/api/v2/history/videos',
+ video_id, query={'filter[id]': video_id})['results'][0]
+ title = result['title']
+ info = self._extract_aen_smil(result['publicUrl'], video_id)
+ info.update({
+ 'title': title,
+ 'description': result.get('description'),
+ 'duration': int_or_none(result.get('duration')),
+ 'timestamp': int_or_none(result.get('added'), 1000),
+ })
+ return info
diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
index 01736872d..9c9d77ae1 100644
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
@@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
+ js_to_json,
try_get,
unified_strdate,
)
@@ -13,22 +14,21 @@ from ..utils import (
class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
+ 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': {
- 'id': '1_5g5zua6e',
- 'title': 'Summer Dinner Party',
+ 'id': '5b400b9ee338f922cb06450c',
+ 'title': 'Weeknight Japanese Suppers',
'ext': 'mp4',
- 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1497285541,
- 'upload_date': '20170612',
- 'uploader_id': 'roger.metcalf@americastestkitchen.com',
- 'release_date': '20170617',
+ 'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1523664000,
+ 'upload_date': '20180414',
+ 'release_date': '20180414',
'series': "America's Test Kitchen",
- 'season_number': 17,
- 'episode': 'Summer Dinner Party',
- 'episode_number': 24,
+ 'season_number': 18,
+ 'episode': 'Weeknight Japanese Suppers',
+ 'episode_number': 15,
},
'params': {
'skip_download': True,
@@ -43,22 +43,19 @@ class AmericasTestKitchenIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- partner_id = self._search_regex(
- r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
- webpage, 'kaltura partner id')
-
video_data = self._parse_json(
self._search_regex(
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
webpage, 'initial context'),
- video_id)
+ video_id, js_to_json)
ep_data = try_get(
video_data,
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
- external_id = ep_data.get('external_id') or ep_meta['external_id']
+
+ zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
@@ -72,8 +69,8 @@ class AmericasTestKitchenIE(InfoExtractor):
return {
'_type': 'url_transparent',
- 'url': 'kaltura:%s:%s' % (partner_id, external_id),
- 'ie_key': 'Kaltura',
+ 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
+ 'ie_key': 'Zype',
'title': title,
'description': description,
'thumbnail': thumbnail,
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py
deleted file mode 100644
index 2fd912da4..000000000
--- a/youtube_dl/extractor/anitube.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from __future__ import unicode_literals
-
-from .nuevo import NuevoBaseIE
-
-
-class AnitubeIE(NuevoBaseIE):
- IE_NAME = 'anitube.se'
- _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.anitube.se/video/36621',
- 'md5': '59d0eeae28ea0bc8c05e7af429998d43',
- 'info_dict': {
- 'id': '36621',
- 'ext': 'mp4',
- 'title': 'Recorder to Randoseru 01',
- 'duration': 180.19,
- },
- 'skip': 'Blocked in the US',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- key = self._search_regex(
- r'src=["\']https?://[^/]+/embed/([A-Za-z0-9_-]+)', webpage, 'key')
-
- return self._extract_nuevo(
- 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, video_id)
diff --git a/youtube_dl/extractor/anysex.py b/youtube_dl/extractor/anysex.py
deleted file mode 100644
index ad86d6e58..000000000
--- a/youtube_dl/extractor/anysex.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- int_or_none,
-)
-
-
-class AnySexIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?anysex\.com/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://anysex.com/156592/',
- 'md5': '023e9fbb7f7987f5529a394c34ad3d3d',
- 'info_dict': {
- 'id': '156592',
- 'ext': 'mp4',
- 'title': 'Busty and sexy blondie in her bikini strips for you',
- 'description': 'md5:de9e418178e2931c10b62966474e1383',
- 'categories': ['Erotic'],
- 'duration': 270,
- 'age_limit': 18,
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._html_search_regex(r"video_url\s*:\s*'([^']+)'", webpage, 'video URL')
-
- title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
- description = self._html_search_regex(
- r'<div class="description"[^>]*>([^<]+)</div>', webpage, 'description', fatal=False)
- thumbnail = self._html_search_regex(
- r'preview_url\s*:\s*\'(.*?)\'', webpage, 'thumbnail', fatal=False)
-
- categories = re.findall(
- r'<a href="http://anysex\.com/categories/[^"]+" title="[^"]*">([^<]+)</a>', webpage)
-
- duration = parse_duration(self._search_regex(
- r'<b>Duration:</b> (?:<q itemprop="duration">)?(\d+:\d+)', webpage, 'duration', fatal=False))
- view_count = int_or_none(self._html_search_regex(
- r'<b>Views:</b> (\d+)', webpage, 'view count', fatal=False))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'categories': categories,
- 'duration': duration,
- 'view_count': view_count,
- 'age_limit': 18,
- }
diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py
index cb9279193..e87994a6a 100644
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dl/extractor/aol.py
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
ExtractorError,
int_or_none,
@@ -12,12 +16,12 @@ from ..utils import (
class AolIE(InfoExtractor):
- IE_NAME = 'on.aol.com'
- _VALID_URL = r'(?:aol-video:|https?://(?:(?:www|on)\.)?aol\.com/(?:[^/]+/)*(?:[^/?#&]+-)?)(?P<id>[^/?#&]+)'
+ IE_NAME = 'aol.com'
+ _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
_TESTS = [{
# video with 5min ID
- 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
+ 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/',
'md5': '18ef68f48740e86ae94b98da815eec42',
'info_dict': {
'id': '518167793',
@@ -34,7 +38,7 @@ class AolIE(InfoExtractor):
}
}, {
# video with vidible ID
- 'url': 'http://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
+ 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/',
'info_dict': {
'id': '5707d6b8e4b090497b04f706',
'ext': 'mp4',
@@ -49,16 +53,28 @@ class AolIE(InfoExtractor):
'skip_download': True,
}
}, {
- 'url': 'http://on.aol.com/partners/abc-551438d309eab105804dbfe8/sneak-peek-was-haley-really-framed-570eaebee4b0448640a5c944',
+ 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/',
'only_matching': True,
}, {
- 'url': 'http://on.aol.com/shows/park-bench-shw518173474-559a1b9be4b0c3bfad3357a7?context=SH:SHW518173474:PL4327:1460619712763',
+ 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/',
'only_matching': True,
}, {
- 'url': 'http://on.aol.com/video/519442220',
+ 'url': 'aol-video:5707d6b8e4b090497b04f706',
'only_matching': True,
}, {
- 'url': 'aol-video:5707d6b8e4b090497b04f706',
+ 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
}]
@@ -73,7 +89,7 @@ class AolIE(InfoExtractor):
video_data = response['data']
formats = []
- m3u8_url = video_data.get('videoMasterPlaylist')
+ m3u8_url = url_or_none(video_data.get('videoMasterPlaylist'))
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
@@ -96,6 +112,12 @@ class AolIE(InfoExtractor):
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
})
+ else:
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query)
+ f.update({
+ 'width': int_or_none(qs.get('w', [None])[0]),
+ 'height': int_or_none(qs.get('h', [None])[0]),
+ })
formats.append(f)
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py
index 6eb8bbb6e..883dcee7a 100644
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dl/extractor/aparat.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ merge_dicts,
mimetype2ext,
url_or_none,
)
@@ -12,59 +13,83 @@ from ..utils import (
class AparatIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.aparat.com/v/wP8On',
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
'info_dict': {
'id': 'wP8On',
'ext': 'mp4',
'title': 'تیم گلکسی 11 - زومیت',
- 'age_limit': 0,
+ 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
+ 'duration': 231,
+ 'timestamp': 1387394859,
+ 'upload_date': '20131218',
+ 'view_count': int,
},
- # 'skip': 'Extremely unreliable',
- }
+ }, {
+ # multiple formats
+ 'url': 'https://www.aparat.com/v/8dflw/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
- # Note: There is an easier-to-parse configuration at
- # http://www.aparat.com/video/video/config/videohash/%video_id
- # but the URL in there does not work
- webpage = self._download_webpage(
- 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
- video_id)
+ # Provides more metadata
+ webpage = self._download_webpage(url, video_id, fatal=False)
- title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
+ if not webpage:
+ # Note: There is an easier-to-parse configuration at
+ # http://www.aparat.com/video/video/config/videohash/%video_id
+ # but the URL in there does not work
+ webpage = self._download_webpage(
+ 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
+ video_id)
- file_list = self._parse_json(
+ options = self._parse_json(
self._search_regex(
- r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
- 'file list'),
+ r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
+ webpage, 'options', group='value'),
video_id)
+ player = options['plugins']['sabaPlayerPlugin']
+
formats = []
- for item in file_list[0]:
- file_url = url_or_none(item.get('file'))
- if not file_url:
- continue
- ext = mimetype2ext(item.get('type'))
- label = item.get('label')
- formats.append({
- 'url': file_url,
- 'ext': ext,
- 'format_id': label or ext,
- 'height': int_or_none(self._search_regex(
- r'(\d+)[pP]', label or '', 'height', default=None)),
- })
- self._sort_formats(formats)
+ for sources in player['multiSRC']:
+ for item in sources:
+ if not isinstance(item, dict):
+ continue
+ file_url = url_or_none(item.get('src'))
+ if not file_url:
+ continue
+ item_type = item.get('type')
+ if item_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ file_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ ext = mimetype2ext(item.get('type'))
+ label = item.get('label')
+ formats.append({
+ 'url': file_url,
+ 'ext': ext,
+ 'format_id': 'http-%s' % (label or ext),
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', label or '', 'height',
+ default=None)),
+ })
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+ info = self._search_json_ld(webpage, video_id, default={})
- thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
+ if not info.get('title'):
+ info['title'] = player['title']
- return {
+ return merge_dicts(info, {
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'age_limit': self._family_friendly_search(webpage),
+ 'thumbnail': url_or_none(options.get('poster')),
+ 'duration': int_or_none(player.get('duration')),
'formats': formats,
- }
+ })
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 6bf8f61eb..5b7b2dd6d 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
@@ -8,70 +9,42 @@ from .generic import GenericIE
from ..utils import (
determine_ext,
ExtractorError,
- qualities,
int_or_none,
parse_duration,
+ qualities,
+ str_or_none,
+ try_get,
unified_strdate,
- xpath_text,
+ unified_timestamp,
update_url_query,
url_or_none,
+ xpath_text,
)
from ..compat import compat_etree_fromstring
-class ARDMediathekIE(InfoExtractor):
- IE_NAME = 'ARD:mediathek'
- _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
-
- _TESTS = [{
- # available till 26.07.2022
- 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
- 'info_dict': {
- 'id': '44726822',
- 'ext': 'mp4',
- 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
- 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
- 'duration': 1740,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }, {
- 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
- 'only_matching': True,
- }, {
- # audio
- 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
- 'only_matching': True,
- }, {
- 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
- 'only_matching': True,
- }, {
- # audio
- 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
- 'only_matching': True,
- }]
+class ARDMediathekBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
def _extract_media_info(self, media_info_url, webpage, video_id):
media_info = self._download_json(
media_info_url, video_id, 'Downloading media JSON')
+ return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
+ def _parse_media_info(self, media_info, video_id, fsk):
formats = self._extract_formats(media_info, video_id)
if not formats:
- if '"fsk"' in webpage:
+ if fsk:
raise ExtractorError(
'This video is only available after 20:00', expected=True)
elif media_info.get('_geoblocked'):
- raise ExtractorError('This video is not available due to geo restriction', expected=True)
+ self.raise_geo_restricted(
+ 'This video is not available due to geoblocking',
+ countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
- duration = int_or_none(media_info.get('_duration'))
- thumbnail = media_info.get('_previewImage')
- is_live = media_info.get('_isLive') is True
-
subtitles = {}
subtitle_url = media_info.get('_subtitleUrl')
if subtitle_url:
@@ -82,9 +55,9 @@ class ARDMediathekIE(InfoExtractor):
return {
'id': video_id,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'is_live': is_live,
+ 'duration': int_or_none(media_info.get('_duration')),
+ 'thumbnail': media_info.get('_previewImage'),
+ 'is_live': media_info.get('_isLive') is True,
'formats': formats,
'subtitles': subtitles,
}
@@ -113,11 +86,11 @@ class ARDMediathekIE(InfoExtractor):
update_url_query(stream_url, {
'hdcore': '3.1.1',
'plugin': 'aasp-3.1.1.69.124'
- }),
- video_id, f4m_id='hds', fatal=False))
+ }), video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
- stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ stream_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
else:
if server and server.startswith('rtmp'):
f = {
@@ -130,7 +103,9 @@ class ARDMediathekIE(InfoExtractor):
'url': stream_url,
'format_id': 'a%s-%s-%s' % (num, ext, quality)
}
- m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
+ m = re.search(
+ r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
+ stream_url)
if m:
f.update({
'width': int(m.group('width')),
@@ -141,6 +116,48 @@ class ARDMediathekIE(InfoExtractor):
formats.append(f)
return formats
+
+class ARDMediathekIE(ARDMediathekBaseIE):
+ IE_NAME = 'ARD:mediathek'
+ _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
+
+ _TESTS = [{
+ # available till 26.07.2022
+ 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
+ 'info_dict': {
+ 'id': '44726822',
+ 'ext': 'mp4',
+ 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
+ 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
+ 'duration': 1740,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
+
def _real_extract(self, url):
# determine video id from url
m = re.match(self._VALID_URL, url)
@@ -173,13 +190,18 @@ class ARDMediathekIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
- r'<h4 class="headline">(.*?)</h4>'],
+ r'<h4 class="headline">(.*?)</h4>',
+ r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
- 'description', webpage, 'meta description')
+ 'description', webpage, 'meta description', default=None)
+ if description is None:
+ description = self._html_search_regex(
+ r'<p\s+class="teasertext">(.+?)</p>',
+ webpage, 'teaser text', default=None)
# Thumbnail is sometimes not present.
# It is in the mobile version, but that seems to use a different URL
@@ -227,7 +249,7 @@ class ARDMediathekIE(InfoExtractor):
class ARDIE(InfoExtractor):
- _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+ _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
_TESTS = [{
# available till 14.02.2019
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
@@ -242,6 +264,9 @@ class ARDIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
+ 'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
+ 'only_matching': True,
+ }, {
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
'only_matching': True,
}]
@@ -287,74 +312,111 @@ class ARDIE(InfoExtractor):
}
-class ARDBetaMediathekIE(InfoExtractor):
- _VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
+class ARDBetaMediathekIE(ARDMediathekBaseIE):
+ _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
_TESTS = [{
- 'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
- 'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
+ 'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+ 'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
'info_dict': {
'display_id': 'die-robuste-roswita',
- 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
- 'title': 'Tatort: Die robuste Roswita',
+ 'id': '70153354',
+ 'title': 'Die robuste Roswita',
'description': r're:^Der Mord.*trüber ist als die Ilm.',
'duration': 5316,
- 'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
- 'upload_date': '20180826',
+ 'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
+ 'timestamp': 1577047500,
+ 'upload_date': '20191222',
'ext': 'mp4',
},
+ }, {
+ 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
+ 'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('video_id')
display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
- data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
- data = self._parse_json(data_json, display_id)
-
- res = {
- 'id': video_id,
- 'display_id': display_id,
+ if display_id:
+ display_id = display_id.rstrip('/')
+ if not display_id:
+ display_id = video_id
+
+ player_page = self._download_json(
+ 'https://api.ardmediathek.de/public-gateway',
+ display_id, data=json.dumps({
+ 'query': '''{
+ playerPage(client:"%s", clipId: "%s") {
+ blockedByFsk
+ broadcastedOn
+ maturityContentRating
+ mediaCollection {
+ _duration
+ _geoblocked
+ _isLive
+ _mediaArray {
+ _mediaStreamArray {
+ _quality
+ _server
+ _stream
}
- formats = []
- for widget in data.values():
- if widget.get('_geoblocked'):
- raise ExtractorError('This video is not available due to geoblocking', expected=True)
-
- if '_duration' in widget:
- res['duration'] = widget['_duration']
- if 'clipTitle' in widget:
- res['title'] = widget['clipTitle']
- if '_previewImage' in widget:
- res['thumbnail'] = widget['_previewImage']
- if 'broadcastedOn' in widget:
- res['upload_date'] = unified_strdate(widget['broadcastedOn'])
- if 'synopsis' in widget:
- res['description'] = widget['synopsis']
- if '_subtitleUrl' in widget:
- res['subtitles'] = {'de': [{
- 'ext': 'ttml',
- 'url': widget['_subtitleUrl'],
- }]}
- if '_quality' in widget:
- format_url = widget['_stream']['json'][0]
-
- if format_url.endswith('.f4m'):
- formats.extend(self._extract_f4m_formats(
- format_url + '?hdcore=3.11.0',
- video_id, f4m_id='hds', fatal=False))
- elif format_url.endswith('m3u8'):
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'format_id': 'http-' + widget['_quality'],
- 'url': format_url,
- 'preference': 10, # Plain HTTP, that's nice
- })
-
- self._sort_formats(formats)
- res['formats'] = formats
-
- return res
+ }
+ _previewImage
+ _subtitleUrl
+ _type
+ }
+ show {
+ title
+ }
+ synopsis
+ title
+ tracking {
+ atiCustomVars {
+ contentId
+ }
+ }
+ }
+}''' % (mobj.group('client'), video_id),
+ }).encode(), headers={
+ 'Content-Type': 'application/json'
+ })['data']['playerPage']
+ title = player_page['title']
+ content_id = str_or_none(try_get(
+ player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
+ media_collection = player_page.get('mediaCollection') or {}
+ if not media_collection and content_id:
+ media_collection = self._download_json(
+ 'https://www.ardmediathek.de/play/media/' + content_id,
+ content_id, fatal=False) or {}
+ info = self._parse_media_info(
+ media_collection, content_id or video_id,
+ player_page.get('blockedByFsk'))
+ age_limit = None
+ description = player_page.get('synopsis')
+ maturity_content_rating = player_page.get('maturityContentRating')
+ if maturity_content_rating:
+ age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
+ if not age_limit and description:
+ age_limit = int_or_none(self._search_regex(
+ r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
+ info.update({
+ 'age_limit': age_limit,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
+ 'series': try_get(player_page, lambda x: x['show']['title']),
+ })
+ return info
diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py
index 4495ddbb0..854f58767 100644
--- a/youtube_dl/extractor/arkena.py
+++ b/youtube_dl/extractor/arkena.py
@@ -103,7 +103,7 @@ class ArkenaIE(InfoExtractor):
f_url, video_id, mpd_id=kind, fatal=False))
elif kind == 'silverlight':
# TODO: process when ism is supported (see
- # https://github.com/rg3/youtube-dl/issues/8118)
+ # https://github.com/ytdl-org/youtube-dl/issues/8118)
continue
else:
tbr = float_or_none(f.get('Bitrate'), 1000)
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index ffc321821..2bd3bfe8a 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -4,17 +4,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_urlparse,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
- find_xpath_attr,
- get_element_by_attribute,
int_or_none,
- NO_DEFAULT,
qualities,
try_get,
unified_strdate,
@@ -25,59 +18,7 @@ from ..utils import (
# add tests.
-class ArteTvIE(InfoExtractor):
- _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
- IE_NAME = 'arte.tv'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- lang = mobj.group('lang')
- video_id = mobj.group('id')
-
- ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
- ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
- ref_xml_doc = self._download_xml(
- ref_xml_url, video_id, note='Downloading metadata')
- config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
- config_xml_url = config_node.attrib['ref']
- config = self._download_xml(
- config_xml_url, video_id, note='Downloading configuration')
-
- formats = [{
- 'format_id': q.attrib['quality'],
- # The playpath starts at 'mp4:', if we don't manually
- # split the url, rtmpdump will incorrectly parse them
- 'url': q.text.split('mp4:', 1)[0],
- 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1],
- 'ext': 'flv',
- 'quality': 2 if q.attrib['quality'] == 'hd' else 1,
- } for q in config.findall('./urls/url')]
- self._sort_formats(formats)
-
- title = config.find('.//name').text
- thumbnail = config.find('.//firstThumbnailUrl').text
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
-
-
class ArteTVBaseIE(InfoExtractor):
- @classmethod
- def _extract_url_info(cls, url):
- mobj = re.match(cls._VALID_URL, url)
- lang = mobj.group('lang')
- query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
- if 'vid' in query:
- video_id = query['vid'][0]
- else:
- # This is not a real id, it can be for example AJT for the news
- # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal
- video_id = mobj.group('id')
- return video_id, lang
-
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer']
@@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor):
'upload_date': unified_strdate(upload_date_str),
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
}
- qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
+ qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
LANGS = {
'fr': 'F',
'de': 'A',
'en': 'E[ANG]',
'es': 'E[ESP]',
+ 'it': 'E[ITA]',
+ 'pl': 'E[POL]',
}
langcode = LANGS.get(lang, lang)
@@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor):
l = re.escape(langcode)
# Language preference from most to least priority
- # Reference: section 5.6.3 of
- # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf
+ # Reference: section 6.8 of
+ # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
PREFERENCES = (
# original version in requested language, without subtitles
r'VO{0}$'.format(l),
@@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor):
class ArteTVPlus7IE(ArteTVBaseIE):
IE_NAME = 'arte.tv:+7'
- _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?P<lang>fr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D',
- 'only_matching': True,
- }, {
- 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22',
- 'only_matching': True,
- }, {
- 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url)
-
- def _real_extract(self, url):
- video_id, lang = self._extract_url_info(url)
- webpage = self._download_webpage(url, video_id)
- return self._extract_from_webpage(webpage, video_id, lang)
-
- def _extract_from_webpage(self, webpage, video_id, lang):
- patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']')
- ids = (video_id, '')
- # some pages contain multiple videos (like
- # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D),
- # so we first try to look for json URLs that contain the video id from
- # the 'vid' parameter.
- patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates]
- json_url = self._html_search_regex(
- patterns, webpage, 'json vp url', default=None)
- if not json_url:
- def find_iframe_url(webpage, default=NO_DEFAULT):
- return self._html_search_regex(
- r'<iframe[^>]+src=(["\'])(?P<url>.+\bjson_url=.+?)\1',
- webpage, 'iframe url', group='url', default=default)
-
- iframe_url = find_iframe_url(webpage, None)
- if not iframe_url:
- embed_url = self._html_search_regex(
- r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None)
- if embed_url:
- player = self._download_json(
- embed_url, video_id, 'Downloading player page')
- iframe_url = find_iframe_url(player['html'])
- # en and es URLs produce react-based pages with different layout (e.g.
- # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world)
- if not iframe_url:
- program = self._search_regex(
- r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n',
- webpage, 'program', default=None)
- if program:
- embed_html = self._parse_json(program, video_id)
- if embed_html:
- iframe_url = find_iframe_url(embed_html['embed_html'])
- if iframe_url:
- json_url = compat_parse_qs(
- compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
- if json_url:
- title = self._search_regex(
- r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
- webpage, 'title', default=None, group='title')
- return self._extract_from_json_url(json_url, video_id, lang, title=title)
- # Different kind of embed URL (e.g.
- # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
- entries = [
- self.url_result(url)
- for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)]
- return self.playlist_result(entries)
-
-
-# It also uses the arte_vp_url url from the webpage to extract the information
-class ArteTVCreativeIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:creative'
- _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1',
- 'info_dict': {
- 'id': '057405-001-A',
- 'ext': 'mp4',
- 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)',
- 'upload_date': '20150716',
- },
- }, {
- 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion',
- 'playlist_count': 11,
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde',
- 'only_matching': True,
- }]
-
-
-class ArteTVInfoIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:info'
- _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere',
- 'info_dict': {
- 'id': '067528-000-A',
- 'ext': 'mp4',
- 'title': 'Service civique, un cache misère ?',
- 'upload_date': '20160403',
- },
- }]
-
-
-class ArteTVFutureIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:future'
- _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
_TESTS = [{
- 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses',
+ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
'info_dict': {
- 'id': '050940-028-A',
+ 'id': '088501-000-A',
'ext': 'mp4',
- 'title': 'Les écrevisses aussi peuvent être anxieuses',
- 'upload_date': '20140902',
+ 'title': 'Mexico: Stealing Petrol to Survive',
+ 'upload_date': '20190628',
},
- }, {
- 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable',
- 'only_matching': True,
}]
-
-class ArteTVDDCIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:ddc'
- _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)'
-
- _TESTS = []
-
def _real_extract(self, url):
- video_id, lang = self._extract_url_info(url)
- if lang == 'folge':
- lang = 'de'
- elif lang == 'emission':
- lang = 'fr'
- webpage = self._download_webpage(url, video_id)
- scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage)
- script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url')
- javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator')
- json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url')
- return self._extract_from_json_url(json_url, video_id, lang)
-
-
-class ArteTVConcertIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:concert'
- _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde',
- 'md5': '9ea035b7bd69696b67aa2ccaaa218161',
- 'info_dict': {
- 'id': '186',
- 'ext': 'mp4',
- 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"',
- 'upload_date': '20140128',
- 'description': 'md5:486eb08f991552ade77439fe6d82c305',
- },
- }]
-
-
-class ArteTVCinemaIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:cinema'
- _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)'
-
- _TESTS = [{
- 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck',
- 'md5': 'a5b9dd5575a11d93daf0e3f404f45438',
- 'info_dict': {
- 'id': '062494-000-A',
- 'ext': 'mp4',
- 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck',
- 'upload_date': '20150807',
- },
- }]
-
-
-class ArteTVMagazineIE(ArteTVPlus7IE):
- IE_NAME = 'arte.tv:magazine'
- _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- # Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..."
- 'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium',
- 'md5': '2a9369bcccf847d1c741e51416299f25',
- 'info_dict': {
- 'id': '065965-000-A',
- 'ext': 'mp4',
- 'title': 'Trepalium - Extrait Ep.01',
- 'upload_date': '20160121',
- },
- }, {
- # Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium"
- 'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium',
- 'md5': 'fedc64fc7a946110fe311634e79782ca',
- 'info_dict': {
- 'id': '054813-004_PLUS7-F',
- 'ext': 'mp4',
- 'title': 'Trepalium (4/6)',
- 'description': 'md5:10057003c34d54e95350be4f9b05cb40',
- 'upload_date': '20160218',
- },
- }, {
- 'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis',
- 'only_matching': True,
- }]
+ lang, video_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_from_json_url(
+ 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
+ video_id, lang)
class ArteTVEmbedIE(ArteTVPlus7IE):
IE_NAME = 'arte.tv:embed'
_VALID_URL = r'''(?x)
- http://www\.arte\.tv
- /(?:playerv2/embed|arte_vp/index)\.php\?json_url=
+ https://www\.arte\.tv
+ /player/v3/index\.php\?json_url=
(?P<json_url>
- http://arte\.tv/papi/tvguide/videos/stream/player/
- (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]*
+ https?://api\.arte\.tv/api/player/v1/config/
+ (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
)
'''
_TESTS = []
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- lang = mobj.group('lang')
- json_url = mobj.group('json_url')
+ json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
return self._extract_from_json_url(json_url, video_id, lang)
-class TheOperaPlatformIE(ArteTVPlus7IE):
- IE_NAME = 'theoperaplatform'
- _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello',
- 'md5': '970655901fa2e82e04c00b955e9afe7b',
- 'info_dict': {
- 'id': '060338-009-A',
- 'ext': 'mp4',
- 'title': 'Verdi - OTELLO',
- 'upload_date': '20160927',
- },
- }]
-
-
class ArteTVPlaylistIE(ArteTVBaseIE):
IE_NAME = 'arte.tv:playlist'
- _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)'
+ _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
_TESTS = [{
- 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV',
+ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
'info_dict': {
- 'id': 'PL-013263',
- 'title': 'Areva & Uramin',
- 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf',
+ 'id': 'RC-016954',
+ 'title': 'Earn a Living',
+ 'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
},
'playlist_mincount': 6,
- }, {
- 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV',
- 'only_matching': True,
}]
def _real_extract(self, url):
- playlist_id, lang = self._extract_url_info(url)
+ lang, playlist_id = re.match(self._VALID_URL, url).groups()
collection = self._download_json(
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
% (lang, playlist_id), playlist_id)
diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py
index 6d71c5ad5..0348e680c 100644
--- a/youtube_dl/extractor/asiancrush.py
+++ b/youtube_dl/extractor/asiancrush.py
@@ -5,14 +5,12 @@ import re
from .common import InfoExtractor
from .kaltura import KalturaIE
-from ..utils import (
- extract_attributes,
- remove_end,
-)
+from ..utils import extract_attributes
class AsianCrushIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P<id>\d+)v\b'
+ _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
+ _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
_TESTS = [{
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
@@ -20,7 +18,7 @@ class AsianCrushIE(InfoExtractor):
'id': '1_y4tmjm5r',
'ext': 'mp4',
'title': 'Women Who Flirt',
- 'description': 'md5:3db14e9186197857e7063522cb89a805',
+ 'description': 'md5:7e986615808bcfb11756eb503a751487',
'timestamp': 1496936429,
'upload_date': '20170608',
'uploader_id': 'craig@crifkin.com',
@@ -28,10 +26,27 @@ class AsianCrushIE(InfoExtractor):
}, {
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
@@ -51,7 +66,7 @@ class AsianCrushIE(InfoExtractor):
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
player = self._download_webpage(
- 'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
+ 'https://api.%s/embeddedVideoPlayer' % host, video_id,
query={'id': entry_id})
kaltura_id = self._search_regex(
@@ -63,15 +78,23 @@ class AsianCrushIE(InfoExtractor):
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
default='513551')
- return self.url_result(
- 'kaltura:%s:%s' % (partner_id, kaltura_id),
- ie=KalturaIE.ie_key(), video_id=kaltura_id,
- video_title=title)
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
+ webpage, 'description', fatal=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
+ 'ie_key': KalturaIE.ie_key(),
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ }
class AsianCrushPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P<id>\d+)s\b'
- _TEST = {
+ _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
+ _TESTS = [{
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
'info_dict': {
'id': '12481',
@@ -79,7 +102,16 @@ class AsianCrushPlaylistIE(InfoExtractor):
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
},
'playlist_count': 20,
- }
+ }, {
+ 'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -96,15 +128,15 @@ class AsianCrushPlaylistIE(InfoExtractor):
entries.append(self.url_result(
mobj.group('url'), ie=AsianCrushIE.ie_key()))
- title = remove_end(
- self._html_search_regex(
- r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
- 'title', default=None) or self._og_search_title(
- webpage, default=None) or self._html_search_meta(
- 'twitter:title', webpage, 'title',
- default=None) or self._search_regex(
- r'<title>([^<]+)</title>', webpage, 'title', fatal=False),
- ' | AsianCrush')
+ title = self._html_search_regex(
+ r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
+ 'title', default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title',
+ default=None) or self._search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
+ if title:
+ title = re.sub(r'\s*\|\s*.+?$', '', title)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index ae1c09427..c2cec9845 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -1,202 +1,118 @@
+# coding: utf-8
from __future__ import unicode_literals
-import time
-import hmac
-import hashlib
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
- float_or_none,
int_or_none,
- sanitized_Request,
urlencode_postdata,
- xpath_text,
)
class AtresPlayerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
+ _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
_NETRC_MACHINE = 'atresplayer'
_TESTS = [
{
- 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
- 'md5': 'efd56753cda1bb64df52a3074f62e38a',
+ 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
'info_dict': {
- 'id': 'capitulo-10-especial-solidario-nochebuena',
+ 'id': '5d4aa2c57ed1a88fc715a615',
'ext': 'mp4',
- 'title': 'Especial Solidario de Nochebuena',
- 'description': 'md5:e2d52ff12214fa937107d21064075bf1',
- 'duration': 5527.6,
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Capítulo 7: Asuntos pendientes',
+ 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
+ 'duration': 3413,
+ },
+ 'params': {
+ 'format': 'bestvideo',
},
'skip': 'This video is only available for registered users'
},
{
- 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
- 'md5': '6e52cbb513c405e403dbacb7aacf8747',
- 'info_dict': {
- 'id': 'capitulo-112-david-bustamante',
- 'ext': 'flv',
- 'title': 'David Bustamante',
- 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
- 'duration': 1439.0,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
+ 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
+ 'only_matching': True,
},
{
- 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
+ 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
'only_matching': True,
},
]
-
- _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
- _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
- _TIMESTAMP_SHIFT = 30000
-
- _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
- _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
- _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
- _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
-
- _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
-
- _ERRORS = {
- 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
- 'DELETED': 'This video has expired and is no longer available for online streaming.',
- 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
- # 'PREMIUM': 'PREMIUM',
- }
+ _API_BASE = 'https://api.atresplayer.com/'
def _real_initialize(self):
self._login()
+ def _handle_error(self, e, code):
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
+ error = self._parse_json(e.cause.read(), None)
+ if error.get('error') == 'required_registered':
+ self.raise_login_required()
+ raise ExtractorError(error['error_description'], expected=True)
+ raise
+
def _login(self):
username, password = self._get_login_info()
if username is None:
return
- login_form = {
- 'j_username': username,
- 'j_password': password,
- }
+ self._request_webpage(
+ self._API_BASE + 'login', None, 'Downloading login page')
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form))
- request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- response = self._download_webpage(
- request, None, 'Logging in')
+ try:
+ target_url = self._download_json(
+ 'https://account.atresmedia.com/api/login', None,
+ 'Logging in', headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ }))['targetUrl']
+ except ExtractorError as e:
+ self._handle_error(e, 400)
- error = self._html_search_regex(
- r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
- response, 'error', default=None)
- if error:
- raise ExtractorError(
- 'Unable to login: %s' % error, expected=True)
+ self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
- episode_id = self._search_regex(
- r'episode="([^"]+)"', webpage, 'episode id')
+ try:
+ episode = self._download_json(
+ self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
+ except ExtractorError as e:
+ self._handle_error(e, 403)
- request = sanitized_Request(
- self._PLAYER_URL_TEMPLATE % episode_id,
- headers={'User-Agent': self._USER_AGENT})
- player = self._download_json(request, episode_id, 'Downloading player JSON')
-
- episode_type = player.get('typeOfEpisode')
- error_message = self._ERRORS.get(episode_type)
- if error_message:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
+ title = episode['titulo']
formats = []
- video_url = player.get('urlVideo')
- if video_url:
- format_info = {
- 'url': video_url,
- 'format_id': 'http',
- }
- mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
- if mobj:
- format_info.update({
- 'width': int_or_none(mobj.group('width')),
- 'height': int_or_none(mobj.group('height')),
- 'tbr': int_or_none(mobj.group('bitrate')),
- })
- formats.append(format_info)
-
- timestamp = int_or_none(self._download_webpage(
- self._TIME_API_URL,
- video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
- timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
- token = hmac.new(
- self._MAGIC.encode('ascii'),
- (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
- ).hexdigest()
-
- request = sanitized_Request(
- self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
- headers={'User-Agent': self._USER_AGENT})
-
- fmt_json = self._download_json(
- request, video_id, 'Downloading windows video JSON')
-
- result = fmt_json.get('resultDes')
- if result.lower() != 'ok':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, result), expected=True)
-
- for format_id, video_url in fmt_json['resultObject'].items():
- if format_id == 'token' or not video_url.startswith('http'):
- continue
- if 'geodeswowsmpra3player' in video_url:
- # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
- # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
- # this videos are protected by DRM, the f4m downloader doesn't support them
+ for source in episode.get('sources', []):
+ src = source.get('src')
+ if not src:
continue
- video_url_hd = video_url.replace('free_es', 'es')
- formats.extend(self._extract_f4m_formats(
- video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
- fatal=False))
- formats.extend(self._extract_mpd_formats(
- video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
- fatal=False))
+ src_type = source.get('type')
+ if src_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
- path_data = player.get('pathData')
-
- episode = self._download_xml(
- self._EPISODE_URL_TEMPLATE % path_data, video_id,
- 'Downloading episode XML')
-
- duration = float_or_none(xpath_text(
- episode, './media/asset/info/technical/contentDuration', 'duration'))
-
- art = episode.find('./media/asset/info/art')
- title = xpath_text(art, './name', 'title')
- description = xpath_text(art, './description', 'description')
- thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
-
- subtitles = {}
- subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
- if subtitle_url:
- subtitles['es'] = [{
- 'ext': 'srt',
- 'url': subtitle_url,
- }]
+ heartbeat = episode.get('heartbeat') or {}
+ omniture = episode.get('omniture') or {}
+ get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
return {
+ 'display_id': display_id,
'id': video_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
+ 'description': episode.get('descripcion'),
+ 'thumbnail': episode.get('imgPoster'),
+ 'duration': int_or_none(episode.get('duration')),
'formats': formats,
- 'subtitles': subtitles,
+ 'channel': get_meta('channel'),
+ 'season': get_meta('season'),
+ 'episode_number': int_or_none(get_meta('episodeNumber')),
}
diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py
index 1584d53fc..95e572d70 100644
--- a/youtube_dl/extractor/atvat.py
+++ b/youtube_dl/extractor/atvat.py
@@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex(
- r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
- webpage, 'player data')), display_id)['config']['initial_video']
+ [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
+ r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
+ webpage, 'player data', group='json')),
+ display_id)['config']['initial_video']
video_id = video_data['id']
video_title = video_data['title']
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py
index 393f381c6..c51837b40 100644
--- a/youtube_dl/extractor/audioboom.py
+++ b/youtube_dl/extractor/audioboom.py
@@ -2,22 +2,25 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import float_or_none
+from ..utils import (
+ clean_html,
+ float_or_none,
+)
class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
- 'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
+ 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
+ 'md5': '7b00192e593ff227e6a315486979a42d',
'info_dict': {
- 'id': '4279833',
+ 'id': '7398103',
'ext': 'mp3',
- 'title': '3/09/2016 Czaban Hour 3',
- 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
- 'duration': 2245.72,
- 'uploader': 'SB Nation A.M.',
- 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
+ 'title': 'Asim Chaudhry',
+ 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
+ 'duration': 4000.99,
+ 'uploader': 'Sue Perkins: An hour or so with...',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
}
}, {
'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
@@ -32,8 +35,8 @@ class AudioBoomIE(InfoExtractor):
clip = None
clip_store = self._parse_json(
- self._search_regex(
- r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
+ self._html_search_regex(
+ r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
webpage, 'clip store', default='{}', group='json'),
video_id, fatal=False)
if clip_store:
@@ -47,14 +50,15 @@ class AudioBoomIE(InfoExtractor):
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
'audio', webpage, 'audio url')
- title = from_clip('title') or self._og_search_title(webpage)
- description = from_clip('description') or self._og_search_description(webpage)
+ title = from_clip('title') or self._html_search_meta(
+ ['og:title', 'og:audio:title', 'audio_title'], webpage)
+ description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
duration = float_or_none(from_clip('duration') or self._html_search_meta(
'weibo:audio:duration', webpage))
- uploader = from_clip('author') or self._og_search_property(
- 'audio:artist', webpage, 'uploader', fatal=False)
+ uploader = from_clip('author') or self._html_search_meta(
+ ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
uploader_url = from_clip('author_url') or self._html_search_meta(
'audioboo:channel', webpage, 'uploader url')
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py
index 62049b921..cc7771354 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dl/extractor/audiomack.py
@@ -62,7 +62,7 @@ class AudiomackIE(InfoExtractor):
# Audiomack wraps a lot of soundcloud tracks in their branded wrapper
# if so, pass the work off to the soundcloud extractor
if SoundcloudIE.suitable(api_response['url']):
- return {'_type': 'url', 'url': api_response['url'], 'ie_key': 'Soundcloud'}
+ return self.url_result(api_response['url'], SoundcloudIE.ie_key())
return {
'id': compat_str(api_response.get('id', album_url_tag)),
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
index 68f26e2ca..b1e20def5 100644
--- a/youtube_dl/extractor/azmedien.py
+++ b/youtube_dl/extractor/azmedien.py
@@ -1,213 +1,66 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from .kaltura import KalturaIE
-from ..utils import (
- get_element_by_class,
- get_element_by_id,
- strip_or_none,
- urljoin,
-)
-class AZMedienBaseIE(InfoExtractor):
- def _kaltura_video(self, partner_id, entry_id):
- return self.url_result(
- 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
- video_id=entry_id)
-
-
-class AZMedienIE(AZMedienBaseIE):
+class AZMedienIE(InfoExtractor):
IE_DESC = 'AZ Medien videos'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
- (?:
+ (?P<host>
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
- [0-9]+-show-[^/\#]+
- (?:
- /[0-9]+-episode-[^/\#]+
- (?:
- /[0-9]+-segment-(?:[^/\#]+\#)?|
- \#
- )|
- \#
+ [^/]+/
+ (?P<id>
+ [^/]+-(?P<article_id>\d+)
)
- (?P<id>[^\#]+)
+ (?:
+ \#video=
+ (?P<kaltura_id>
+ [_0-9a-z]+
+ )
+ )?
'''
_TESTS = [{
- # URL with 'segment'
- 'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
+ 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
'info_dict': {
- 'id': '1_2444peh4',
+ 'id': '1_anruz3wy',
'ext': 'mp4',
- 'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
- 'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
- 'uploader_id': 'TeleZ?ri',
- 'upload_date': '20161218',
- 'timestamp': 1482084490,
+ 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+ 'uploader_id': 'TVOnline',
+ 'upload_date': '20180930',
+ 'timestamp': 1538328802,
},
'params': {
'skip_download': True,
},
}, {
- # URL with 'segment' and fragment:
- 'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
- 'only_matching': True
- }, {
- # URL with 'episode' and fragment:
- 'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
- 'only_matching': True
- }, {
- # URL with 'show' and fragment:
- 'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
+ 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'only_matching': True
}]
+ _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+ _PARTNER_ID = '1719221'
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- partner_id = self._search_regex(
- r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
- webpage, 'kaltura partner id')
- entry_id = self._html_search_regex(
- r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
- % re.escape(video_id), webpage, 'kaltura entry id', group='id')
+ host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
- return self._kaltura_video(partner_id, entry_id)
+ if not entry_id:
+ entry_id = self._download_json(
+ self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
+ 'variables': json.dumps({
+ 'contextId': 'NewsArticle:' + article_id,
+ }),
+ })['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
-
-class AZMedienPlaylistIE(AZMedienBaseIE):
- IE_DESC = 'AZ Medien playlists'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?
- (?:
- telezueri\.ch|
- telebaern\.tv|
- telem1\.ch
- )/
- (?P<id>[0-9]+-
- (?:
- show|
- topic|
- themen
- )-[^/\#]+
- (?:
- /[0-9]+-episode-[^/\#]+
- )?
- )$
- '''
-
- _TESTS = [{
- # URL with 'episode'
- 'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
- 'info_dict': {
- 'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
- 'title': 'News - Donnerstag, 15. Dezember 2016',
- },
- 'playlist_count': 9,
- }, {
- # URL with 'themen'
- 'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
- 'info_dict': {
- 'id': '258-themen-tele-m1-classics',
- 'title': 'Tele M1 Classics',
- },
- 'playlist_mincount': 15,
- }, {
- # URL with 'topic', contains nested playlists
- 'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
- 'only_matching': True,
- }, {
- # URL with 'show' only
- 'url': 'http://www.telezueri.ch/86-show-talktaeglich',
- 'only_matching': True
- }]
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
- webpage = self._download_webpage(url, show_id)
-
- entries = []
-
- partner_id = self._search_regex(
- r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
- webpage, 'kaltura partner id', default=None)
-
- if partner_id:
- entries = [
- self._kaltura_video(partner_id, m.group('id'))
- for m in re.finditer(
- r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
-
- if not entries:
- entries = [
- self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
- for m in re.finditer(
- r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
-
- if not entries:
- entries = [
- # May contain nested playlists (e.g. [1]) thus no explicit
- # ie_key
- # 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
- self.url_result(urljoin(url, m.group('url')))
- for m in re.finditer(
- r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
-
- title = self._search_regex(
- r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
- webpage, 'title',
- default=strip_or_none(get_element_by_id(
- 'video-title', webpage)), group='title')
-
- return self.playlist_result(entries, show_id, title)
-
-
-class AZMedienShowPlaylistIE(AZMedienBaseIE):
- IE_DESC = 'AZ Medien show playlists'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?
- (?:
- telezueri\.ch|
- telebaern\.tv|
- telem1\.ch
- )/
- (?:
- all-episodes|
- alle-episoden
- )/
- (?P<id>[^/?#&]+)
- '''
-
- _TEST = {
- 'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
- 'info_dict': {
- 'id': 'astrotalk',
- 'title': 'TeleZüri: AstroTalk - alle episoden',
- 'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
- },
- 'playlist_mincount': 13,
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- episodes = get_element_by_class('search-mobile-box', webpage)
- entries = [self.url_result(
- urljoin(url, m.group('url'))) for m in re.finditer(
- r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
- title = self._og_search_title(webpage, fatal=False)
- description = self._og_search_description(webpage)
- return self.playlist_result(entries, playlist_id, title, description)
+ return self.url_result(
+ 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
+ ie=KalturaIE.ie_key(), video_id=entry_id)
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py
deleted file mode 100644
index 34f1b3d83..000000000
--- a/youtube_dl/extractor/bambuser.py
+++ /dev/null
@@ -1,142 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import itertools
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class BambuserIE(InfoExtractor):
- IE_NAME = 'bambuser'
- _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
- _API_KEY = '005f64509e19a868399060af746a00aa'
- _LOGIN_URL = 'https://bambuser.com/user'
- _NETRC_MACHINE = 'bambuser'
-
- _TEST = {
- 'url': 'http://bambuser.com/v/4050584',
- # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
- # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
- 'info_dict': {
- 'id': '4050584',
- 'ext': 'flv',
- 'title': 'Education engineering days - lightning talks',
- 'duration': 3741,
- 'uploader': 'pixelversity',
- 'uploader_id': '344706',
- 'timestamp': 1382976692,
- 'upload_date': '20131028',
- 'view_count': int,
- },
- 'params': {
- # It doesn't respect the 'Range' header, it would download the whole video
- # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
- 'skip_download': True,
- },
- }
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'form_id': 'user_login',
- 'op': 'Log in',
- 'name': username,
- 'pass': password,
- }
-
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form))
- request.add_header('Referer', self._LOGIN_URL)
- response = self._download_webpage(
- request, None, 'Logging in')
-
- login_error = self._html_search_regex(
- r'(?s)<div class="messages error">(.+?)</div>',
- response, 'login error', default=None)
- if login_error:
- raise ExtractorError(
- 'Unable to login: %s' % login_error, expected=True)
-
- def _real_initialize(self):
- self._login()
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- info = self._download_json(
- 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
- % (self._API_KEY, video_id), video_id)
-
- error = info.get('error')
- if error:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error), expected=True)
-
- result = info['result']
-
- return {
- 'id': video_id,
- 'title': result['title'],
- 'url': result['url'],
- 'thumbnail': result.get('preview'),
- 'duration': int_or_none(result.get('length')),
- 'uploader': result.get('username'),
- 'uploader_id': compat_str(result.get('owner', {}).get('uid')),
- 'timestamp': int_or_none(result.get('created')),
- 'fps': float_or_none(result.get('framerate')),
- 'view_count': int_or_none(result.get('views_total')),
- 'comment_count': int_or_none(result.get('comment_count')),
- }
-
-
-class BambuserChannelIE(InfoExtractor):
- IE_NAME = 'bambuser:channel'
- _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
- # The maximum number we can get with each request
- _STEP = 50
- _TEST = {
- 'url': 'http://bambuser.com/channel/pixelversity',
- 'info_dict': {
- 'title': 'pixelversity',
- },
- 'playlist_mincount': 60,
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user = mobj.group('user')
- urls = []
- last_id = ''
- for i in itertools.count(1):
- req_url = (
- 'http://bambuser.com/xhr-api/index.php?username={user}'
- '&sort=created&access_mode=0%2C1%2C2&limit={count}'
- '&method=broadcast&format=json&vid_older_than={last}'
- ).format(user=user, count=self._STEP, last=last_id)
- req = sanitized_Request(req_url)
- # Without setting this header, we wouldn't get any result
- req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
- data = self._download_json(
- req, user, 'Downloading page %d' % i)
- results = data['result']
- if not results:
- break
- last_id = results[-1]['vid']
- urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
-
- return {
- '_type': 'playlist',
- 'title': user,
- 'entries': urls,
- }
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index abcfa301d..002c39c39 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -1,8 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import itertools
+import re
from .common import InfoExtractor
from ..utils import (
@@ -17,10 +17,12 @@ from ..utils import (
parse_iso8601,
try_get,
unescapeHTML,
+ url_or_none,
urlencode_postdata,
urljoin,
)
from ..compat import (
+ compat_etree_Element,
compat_HTTPError,
compat_urlparse,
)
@@ -38,6 +40,7 @@ class BBCCoUkIE(InfoExtractor):
iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
music/(?:clips|audiovideo/popular)[/#]|
radio/player/|
+ sounds/play/|
events/[^/]+/play/[^/]+/
)
(?P<id>%s)(?!/(?:episodes|broadcasts|clips))
@@ -68,7 +71,7 @@ class BBCCoUkIE(InfoExtractor):
'info_dict': {
'id': 'b039d07m',
'ext': 'flv',
- 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4',
+ 'title': 'Kaleidoscope, Leonard Cohen',
'description': 'The Canadian poet and songwriter reflects on his musical career.',
},
'params': {
@@ -206,7 +209,7 @@ class BBCCoUkIE(InfoExtractor):
},
'skip': 'Now it\'s really geo-restricted',
}, {
- # compact player (https://github.com/rg3/youtube-dl/issues/8147)
+ # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
'info_dict': {
'id': 'p028bfkj',
@@ -219,6 +222,20 @@ class BBCCoUkIE(InfoExtractor):
'skip_download': True,
},
}, {
+ 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'm0007jz9',
+ 'ext': 'mp4',
+ 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
+ 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
+ 'duration': 9840,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
'only_matching': True,
}, {
@@ -310,7 +327,13 @@ class BBCCoUkIE(InfoExtractor):
def _get_subtitles(self, media, programme_id):
subtitles = {}
for connection in self._extract_connections(media):
- captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
+ cc_url = url_or_none(connection.get('href'))
+ if not cc_url:
+ continue
+ captions = self._download_xml(
+ cc_url, programme_id, 'Downloading captions', fatal=False)
+ if not isinstance(captions, compat_etree_Element):
+ continue
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
subtitles[lang] = [
{
@@ -505,7 +528,7 @@ class BBCCoUkIE(InfoExtractor):
def get_programme_id(item):
def get_from_attributes(item):
- for p in('identifier', 'group'):
+ for p in ('identifier', 'group'):
value = item.get(p)
if value and re.match(r'^[pb][\da-z]{7}$', value):
return value
@@ -601,7 +624,7 @@ class BBCIE(BBCCoUkIE):
'url': 'http://www.bbc.com/news/world-europe-32668511',
'info_dict': {
'id': 'world-europe-32668511',
- 'title': 'Russia stages massive WW2 parade despite Western boycott',
+ 'title': 'Russia stages massive WW2 parade',
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
},
'playlist_count': 2,
@@ -795,6 +818,15 @@ class BBCIE(BBCCoUkIE):
'uploader': 'Radio 3',
'uploader_id': 'bbc_radio_three',
},
+ }, {
+ 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
+ 'info_dict': {
+ 'id': 'p06w9tws',
+ 'ext': 'mp4',
+ 'title': 'md5:2fabf12a726603193a2879a055f72514',
+ 'description': 'Learn English words and phrases from this story',
+ },
+ 'add_ie': [BBCCoUkIE.ie_key()],
}]
@classmethod
@@ -945,6 +977,15 @@ class BBCIE(BBCCoUkIE):
if entries:
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+ # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
+ group_id = self._search_regex(
+ r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
+ webpage, 'group id', default=None)
+ if playlist_id:
+ return self.url_result(
+ 'https://www.bbc.co.uk/programmes/%s' % group_id,
+ ie=BBCCoUkIE.ie_key())
+
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
programme_id = self._search_regex(
[r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py
index 2eaec1ab4..86abdae00 100644
--- a/youtube_dl/extractor/beampro.py
+++ b/youtube_dl/extractor/beampro.py
@@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE):
class BeamProVodIE(BeamProBaseIE):
IE_NAME = 'Mixer:vod'
- _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
+ _TESTS = [{
'url': 'https://mixer.com/willow8714?vod=2259830',
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
'info_dict': {
@@ -119,7 +119,13 @@ class BeamProVodIE(BeamProBaseIE):
'params': {
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
+ 'only_matching': True,
+ }]
@staticmethod
def _extract_format(vod, vod_type):
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py
index bf22a41b7..5788d13ba 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dl/extractor/beeg.py
@@ -2,20 +2,19 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
- compat_chr,
- compat_ord,
- compat_urllib_parse_unquote,
+ compat_str,
+ compat_urlparse,
)
from ..utils import (
int_or_none,
- parse_iso8601,
- urljoin,
+ unified_timestamp,
)
class BeegIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
+ _TESTS = [{
+ # api/v6 v1
'url': 'http://beeg.com/5416503',
'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
'info_dict': {
@@ -29,76 +28,53 @@ class BeegIE(InfoExtractor):
'tags': list,
'age_limit': 18,
}
- }
+ }, {
+ # api/v6 v2
+ 'url': 'https://beeg.com/1941093077?t=911-1391',
+ 'only_matching': True,
+ }, {
+ # api/v6 v2 w/o t
+ 'url': 'https://beeg.com/1277207756',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://beeg.porn/video/5416503',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://beeg.porn/5416503',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- cpl_url = self._search_regex(
- r'<script[^>]+src=(["\'])(?P<url>(?:/static|(?:https?:)?//static\.beeg\.com)/cpl/\d+\.js.*?)\1',
- webpage, 'cpl', default=None, group='url')
-
- cpl_url = urljoin(url, cpl_url)
-
- beeg_version, beeg_salt = [None] * 2
-
- if cpl_url:
- cpl = self._download_webpage(
- self._proto_relative_url(cpl_url), video_id,
- 'Downloading cpl JS', fatal=False)
- if cpl:
- beeg_version = int_or_none(self._search_regex(
- r'beeg_version\s*=\s*([^\b]+)', cpl,
- 'beeg version', default=None)) or self._search_regex(
- r'/(\d+)\.js', cpl_url, 'beeg version', default=None)
- beeg_salt = self._search_regex(
- r'beeg_salt\s*=\s*(["\'])(?P<beeg_salt>.+?)\1', cpl, 'beeg salt',
- default=None, group='beeg_salt')
-
- beeg_version = beeg_version or '2185'
- beeg_salt = beeg_salt or 'pmweAkq8lAYKdfWcFCUj0yoVgoPlinamH5UE1CB3H'
+ beeg_version = self._search_regex(
+ r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
+ default='1546225636701')
+
+ if len(video_id) >= 10:
+ query = {
+ 'v': 2,
+ }
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ t = qs.get('t', [''])[0].split('-')
+ if len(t) > 1:
+ query.update({
+ 's': t[0],
+ 'e': t[1],
+ })
+ else:
+ query = {'v': 1}
for api_path in ('', 'api.'):
video = self._download_json(
'https://%sbeeg.com/api/v6/%s/video/%s'
% (api_path, beeg_version, video_id), video_id,
- fatal=api_path == 'api.')
+ fatal=api_path == 'api.', query=query)
if video:
break
- def split(o, e):
- def cut(s, x):
- n.append(s[:x])
- return s[x:]
- n = []
- r = len(o) % e
- if r > 0:
- o = cut(o, r)
- while len(o) > e:
- o = cut(o, e)
- n.append(o)
- return n
-
- def decrypt_key(key):
- # Reverse engineered from http://static.beeg.com/cpl/1738.js
- a = beeg_salt
- e = compat_urllib_parse_unquote(key)
- o = ''.join([
- compat_chr(compat_ord(e[n]) - compat_ord(a[n % len(a)]) % 21)
- for n in range(len(e))])
- return ''.join(split(o, 3)[::-1])
-
- def decrypt_url(encrypted_url):
- encrypted_url = self._proto_relative_url(
- encrypted_url.replace('{DATA_MARKERS}', ''), 'https:')
- key = self._search_regex(
- r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None)
- if not key:
- return encrypted_url
- return encrypted_url.replace(key, decrypt_key(key))
-
formats = []
for format_id, video_url in video.items():
if not video_url:
@@ -108,18 +84,20 @@ class BeegIE(InfoExtractor):
if not height:
continue
formats.append({
- 'url': decrypt_url(video_url),
+ 'url': self._proto_relative_url(
+ video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
'format_id': format_id,
'height': int(height),
})
self._sort_formats(formats)
title = video['title']
- video_id = video.get('id') or video_id
+ video_id = compat_str(video.get('id') or video_id)
display_id = video.get('code')
description = video.get('desc')
+ series = video.get('ps_name')
- timestamp = parse_iso8601(video.get('date'), ' ')
+ timestamp = unified_timestamp(video.get('date'))
duration = int_or_none(video.get('duration'))
tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
@@ -129,6 +107,7 @@ class BeegIE(InfoExtractor):
'display_id': display_id,
'title': title,
'description': description,
+ 'series': series,
'timestamp': timestamp,
'duration': duration,
'tags': tags,
diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py
index f36a2452d..9f9de96c6 100644
--- a/youtube_dl/extractor/bellmedia.py
+++ b/youtube_dl/extractor/bellmedia.py
@@ -22,10 +22,11 @@ class BellMediaIE(InfoExtractor):
bravo|
mtv|
space|
- etalk
+ etalk|
+ marilyn
)\.ca|
- much\.com
- )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
+ (?:much|cp24)\.com
+ )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
'md5': '36d3ef559cfe8af8efe15922cd3ce950',
@@ -61,6 +62,9 @@ class BellMediaIE(InfoExtractor):
}, {
'url': 'http://www.etalk.ca/video?videoid=663455',
'only_matching': True,
+ }, {
+ 'url': 'https://www.cp24.com/video?clipId=1982548',
+ 'only_matching': True,
}]
_DOMAINS = {
'thecomedynetwork': 'comedy',
@@ -70,6 +74,7 @@ class BellMediaIE(InfoExtractor):
'animalplanet': 'aniplan',
'etalk': 'ctv',
'bnnbloomberg': 'bnn',
+ 'marilyn': 'ctv_marilyn',
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py
new file mode 100644
index 000000000..60c8944b5
--- /dev/null
+++ b/youtube_dl/extractor/bfi.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class BFIPlayerIE(InfoExtractor):
+ IE_NAME = 'bfi:player'
+ _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P<id>[\w-]+)-online'
+ _TEST = {
+ 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online',
+ 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8',
+ 'info_dict': {
+ 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63',
+ 'ext': 'mp4',
+ 'title': 'Computer Doctor',
+ 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b',
+ },
+ 'skip': 'BFI Player films cannot be played outside of the UK',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ entries = []
+ for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage):
+ player_attr = extract_attributes(player_el)
+ ooyala_id = player_attr.get('data-video-id')
+ if not ooyala_id:
+ continue
+ entries.append(self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala',
+ ooyala_id, player_attr.get('data-label')))
+ return self.playlist_result(entries)
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index 4d6b051fe..4dc597e16 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -15,6 +15,7 @@ from ..utils import (
float_or_none,
parse_iso8601,
smuggle_url,
+ str_or_none,
strip_jsonp,
unified_timestamp,
unsmuggle_url,
@@ -23,7 +24,18 @@ from ..utils import (
class BiliBiliIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|bangumi)\.)?
+ bilibili\.(?:tv|com)/
+ (?:
+ (?:
+ video/[aA][vV]|
+ anime/(?P<anime_id>\d+)/play\#
+ )(?P<id_bv>\d+)|
+ video/[bB][vV](?P<id>[^/?#&]+)
+ )
+ '''
_TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/',
@@ -91,10 +103,14 @@ class BiliBiliIE(InfoExtractor):
'skip_download': True, # Test metadata only
},
}]
+ }, {
+ # new BV video id format
+ 'url': 'https://www.bilibili.com/video/BV1JE411F741',
+ 'only_matching': True,
}]
- _APP_KEY = '84956560bc028eb7'
- _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e'
+ _APP_KEY = 'iVGUTjsxvpLeuDCf'
+ _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
def _report_error(self, result):
if 'message' in result:
@@ -108,7 +124,7 @@ class BiliBiliIE(InfoExtractor):
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = mobj.group('id') or mobj.group('id_bv')
anime_id = mobj.group('anime_id')
webpage = self._download_webpage(url, video_id)
@@ -306,3 +322,129 @@ class BiliBiliBangumiIE(InfoExtractor):
return self.playlist_result(
entries, bangumi_id,
season_info.get('bangumi_title'), season_info.get('evaluate'))
+
+
+class BilibiliAudioBaseIE(InfoExtractor):
+ def _call_api(self, path, sid, query=None):
+ if not query:
+ query = {'sid': sid}
+ return self._download_json(
+ 'https://www.bilibili.com/audio/music-service-c/web/' + path,
+ sid, query=query)['data']
+
+
+class BilibiliAudioIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/au1003142',
+ 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
+ 'info_dict': {
+ 'id': '1003142',
+ 'ext': 'm4a',
+ 'title': '【tsukimi】YELLOW / 神山羊',
+ 'artist': 'tsukimi',
+ 'comment_count': int,
+ 'description': 'YELLOW的mp3版!',
+ 'duration': 183,
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1564836614,
+ 'upload_date': '20190803',
+ 'uploader': 'tsukimi-つきみぐー',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ au_id = self._match_id(url)
+
+ play_data = self._call_api('url', au_id)
+ formats = [{
+ 'url': play_data['cdns'][0],
+ 'filesize': int_or_none(play_data.get('size')),
+ }]
+
+ song = self._call_api('song/info', au_id)
+ title = song['title']
+ statistic = song.get('statistic') or {}
+
+ subtitles = None
+ lyric = song.get('lyric')
+ if lyric:
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }]
+ }
+
+ return {
+ 'id': au_id,
+ 'title': title,
+ 'formats': formats,
+ 'artist': song.get('author'),
+ 'comment_count': int_or_none(statistic.get('comment')),
+ 'description': song.get('intro'),
+ 'duration': int_or_none(song.get('duration')),
+ 'subtitles': subtitles,
+ 'thumbnail': song.get('cover'),
+ 'timestamp': int_or_none(song.get('passtime')),
+ 'uploader': song.get('uname'),
+ 'view_count': int_or_none(statistic.get('play')),
+ }
+
+
+class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/am10624',
+ 'info_dict': {
+ 'id': '10624',
+ 'title': '每日新曲推荐(每日11:00更新)',
+ 'description': '每天11:00更新,为你推送最新音乐',
+ },
+ 'playlist_count': 19,
+ }
+
+ def _real_extract(self, url):
+ am_id = self._match_id(url)
+
+ songs = self._call_api(
+ 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
+
+ entries = []
+ for song in songs:
+ sid = str_or_none(song.get('id'))
+ if not sid:
+ continue
+ entries.append(self.url_result(
+ 'https://www.bilibili.com/audio/au' + sid,
+ BilibiliAudioIE.ie_key(), sid))
+
+ if entries:
+ album_data = self._call_api('menu/info', am_id) or {}
+ album_title = album_data.get('title')
+ if album_title:
+ for entry in entries:
+ entry['album'] = album_title
+ return self.playlist_result(
+ entries, am_id, album_title, album_data.get('intro'))
+
+ return self.playlist_result(entries, am_id)
+
+
+class BiliBiliPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.bilibili.tv/video/av%s/' % video_id,
+ ie=BiliBiliIE.ie_key(), video_id=video_id)
diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py
index b92031c8a..dc86c57c5 100644
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dl/extractor/biobiochiletv.py
@@ -6,7 +6,6 @@ from ..utils import (
ExtractorError,
remove_end,
)
-from .rudo import RudoIE
class BioBioChileTVIE(InfoExtractor):
@@ -41,11 +40,15 @@ class BioBioChileTVIE(InfoExtractor):
}, {
'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml',
'info_dict': {
- 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos',
+ 'id': 'b4xd0LK3SK',
'ext': 'mp4',
- 'uploader': '(none)',
- 'upload_date': '20160708',
- 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos',
+ # TODO: fix url_transparent information overriding
+ # 'uploader': 'Juan Pablo Echenique',
+ 'title': 'Comentario Oscar Cáceres',
+ },
+ 'params': {
+ # empty m3u8 manifest
+ 'skip_download': True,
},
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
@@ -60,7 +63,9 @@ class BioBioChileTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- rudo_url = RudoIE._extract_url(webpage)
+ rudo_url = self._search_regex(
+ r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
+ webpage, 'embed URL', None, group='url')
if not rudo_url:
raise ExtractorError('No videos found')
@@ -68,7 +73,7 @@ class BioBioChileTVIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
- r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
+ r'<a[^>]+href=["\'](?:https?://(?:busca|www)\.biobiochile\.cl)?/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>',
webpage, 'uploader', fatal=False)
return {
diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py
index beaebfd2a..af21e3ee5 100644
--- a/youtube_dl/extractor/biqle.py
+++ b/youtube_dl/extractor/biqle.py
@@ -2,39 +2,96 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from .vk import VKIE
+from ..utils import (
+ HEADRequest,
+ int_or_none,
+)
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
- 'url': 'http://www.biqle.ru/watch/847655_160197695',
- 'md5': 'ad5f746a874ccded7b8f211aeea96637',
+ # Youtube embed
+ 'url': 'https://biqle.ru/watch/-115995369_456239081',
+ 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
'info_dict': {
- 'id': '160197695',
+ 'id': '8v4f-avW-VI',
'ext': 'mp4',
- 'title': 'Foo Fighters - The Pretender (Live at Wembley Stadium)',
- 'uploader': 'Andrey Rogozin',
- 'upload_date': '20110605',
- }
+ 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
+ 'description': 'Passe-Partout',
+ 'uploader_id': 'mrsimpsonstef3',
+ 'uploader': 'Phanolito',
+ 'upload_date': '20120822',
+ },
}, {
- 'url': 'https://biqle.org/watch/-44781847_168547604',
+ 'url': 'http://biqle.org/watch/-44781847_168547604',
'md5': '7f24e72af1db0edf7c1aaba513174f97',
'info_dict': {
- 'id': '168547604',
+ 'id': '-44781847_168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
+ 'timestamp': 1396633454,
'uploader': 'Dmitry Kotov',
+ 'upload_date': '20140404',
+ 'uploader_id': '47850140',
},
- 'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
embed_url = self._proto_relative_url(self._search_regex(
- r'<iframe.+?src="((?:http:)?//daxab\.com/[^"]+)".*?></iframe>', webpage, 'embed url'))
+ r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
+ webpage, 'embed url'))
+ if VKIE.suitable(embed_url):
+ return self.url_result(embed_url, VKIE.ie_key(), video_id)
+
+ self._request_webpage(
+ HEADRequest(embed_url), video_id, headers={'Referer': url})
+ video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
+ item = self._download_json(
+ 'https://api.vk.com/method/video.get', video_id,
+ headers={'User-Agent': 'okhttp/3.4.1'}, query={
+ 'access_token': access_token,
+ 'sig': sig,
+ 'v': 5.44,
+ 'videos': video_id,
+ })['response']['items'][0]
+ title = item['title']
+
+ formats = []
+ for f_id, f_url in item.get('files', {}).items():
+ if f_id == 'external':
+ return self.url_result(f_url)
+ ext, height = f_id.split('_')
+ formats.append({
+ 'format_id': height + 'p',
+ 'url': f_url,
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for k, v in item.items():
+ if k.startswith('photo_') and v:
+ width = k.replace('photo_', '')
+ thumbnails.append({
+ 'id': width,
+ 'url': v,
+ 'width': int_or_none(width),
+ })
return {
- '_type': 'url_transparent',
- 'url': embed_url,
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'comment_count': int_or_none(item.get('comments')),
+ 'description': item.get('description'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnails': thumbnails,
+ 'timestamp': int_or_none(item.get('date')),
+ 'uploader': item.get('owner_id'),
+ 'view_count': int_or_none(item.get('views')),
}
diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py
index 446a1ab19..0c773e66e 100644
--- a/youtube_dl/extractor/bitchute.py
+++ b/youtube_dl/extractor/bitchute.py
@@ -5,7 +5,11 @@ import itertools
import re
from .common import InfoExtractor
-from ..utils import urlencode_postdata
+from ..utils import (
+ orderedSet,
+ unified_strdate,
+ urlencode_postdata,
+)
class BitChuteIE(InfoExtractor):
@@ -20,6 +24,7 @@ class BitChuteIE(InfoExtractor):
'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Victoria X Rave',
+ 'upload_date': '20170813',
},
}, {
'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
@@ -37,16 +42,27 @@ class BitChuteIE(InfoExtractor):
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
})
- title = self._search_regex(
+ title = self._html_search_regex(
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
webpage, 'title', default=None) or self._html_search_meta(
'description', webpage, 'title',
default=None) or self._og_search_description(webpage)
+ format_urls = []
+ for mobj in re.finditer(
+ r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+ format_urls.append(mobj.group('url'))
+ format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
+
formats = [
- {'url': mobj.group('url')}
- for mobj in re.finditer(
- r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage)]
+ {'url': format_url}
+ for format_url in orderedSet(format_urls)]
+
+ if not formats:
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
description = self._html_search_regex(
@@ -56,8 +72,13 @@ class BitChuteIE(InfoExtractor):
webpage, default=None) or self._html_search_meta(
'twitter:image:src', webpage, 'thumbnail')
uploader = self._html_search_regex(
- r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage,
- 'uploader', fatal=False)
+ (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
+ r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
+ webpage, 'uploader', fatal=False)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
+ webpage, 'upload date', fatal=False))
return {
'id': video_id,
@@ -65,6 +86,7 @@ class BitChuteIE(InfoExtractor):
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
+ 'upload_date': upload_date,
'formats': formats,
}
diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py
index e829974ff..dc60224d0 100644
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dl/extractor/bleacherreport.py
@@ -71,7 +71,7 @@ class BleacherReportIE(InfoExtractor):
video = article_data.get('video')
if video:
video_type = video['type']
- if video_type == 'cms.bleacherreport.com':
+ if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'):
info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id']
elif video_type == 'ooyala.com':
info['url'] = 'ooyala:%s' % video['id']
@@ -87,9 +87,9 @@ class BleacherReportIE(InfoExtractor):
class BleacherReportCMSIE(AMPIE):
- _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})'
+ _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
_TESTS = [{
- 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
+ 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
@@ -101,6 +101,6 @@ class BleacherReportCMSIE(AMPIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id)
+ info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id)
info['id'] = video_id
return info
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py
index 3b8eabe8f..db5e12b21 100644
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dl/extractor/blinkx.py
@@ -32,8 +32,8 @@ class BlinkxIE(InfoExtractor):
video_id = self._match_id(url)
display_id = video_id[:8]
- api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' +
- 'video=%s' % video_id)
+ api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
+ + 'video=%s' % video_id)
data_json = self._download_webpage(api_url, display_id)
data = json.loads(data_json)['api']['results'][0]
duration = None
diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py
index 86a7f4d7d..6017e8344 100644
--- a/youtube_dl/extractor/bokecc.py
+++ b/youtube_dl/extractor/bokecc.py
@@ -11,8 +11,8 @@ from ..utils import ExtractorError
class BokeCCBaseIE(InfoExtractor):
def _extract_bokecc_formats(self, webpage, video_id, format_id=None):
player_params_str = self._html_search_regex(
- r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)',
- webpage, 'player params')
+ r'<(?:script|embed)[^>]+src=(?P<q>["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P<query>.+?)(?P=q)',
+ webpage, 'player params', group='query')
player_params = compat_parse_qs(player_params_str)
@@ -36,9 +36,9 @@ class BokeCCIE(BokeCCBaseIE):
_VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{
- 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',
+ 'url': 'http://union.bokecc.com/playvideo.bo?vid=E0ABAE9D4F509B189C33DC5901307461&uid=FE644790DE9D154A',
'info_dict': {
- 'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30',
+ 'id': 'FE644790DE9D154A_E0ABAE9D4F509B189C33DC5901307461',
'ext': 'flv',
'title': 'BokeCC Video',
},
diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py
index a25d500e4..b9715df00 100644
--- a/youtube_dl/extractor/bravotv.py
+++ b/youtube_dl/extractor/bravotv.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .adobepass import AdobePassIE
from ..utils import (
smuggle_url,
@@ -12,16 +14,16 @@ from ..utils import (
class BravoTVIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{
- 'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
- 'md5': '9086d0b7ef0ea2aabc4781d75f4e5863',
+ 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
+ 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
'info_dict': {
- 'id': 'zHyk1_HU_mPy',
+ 'id': 'epL0pmK1kQlT',
'ext': 'mp4',
- 'title': 'LCK Ep 12: Fishy Finale',
- 'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.',
+ 'title': 'The Top Chef Season 16 Winner Is...',
+ 'description': 'Find out who takes the title of Top Chef!',
'uploader': 'NBCU-BRAV',
- 'upload_date': '20160302',
- 'timestamp': 1456945320,
+ 'upload_date': '20190314',
+ 'timestamp': 1552591860,
}
}, {
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
@@ -32,30 +34,38 @@ class BravoTVIE(AdobePassIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
+ r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
display_id)
info = {}
query = {
'mbr': 'true',
}
account_pid, release_pid = [None] * 2
- tve = settings.get('sharedTVE')
+ tve = settings.get('ls_tve')
if tve:
query['manifest'] = 'm3u'
- account_pid = 'HNK2IC'
- release_pid = tve['release_pid']
+ mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
+ if mobj:
+ account_pid, tp_path = mobj.groups()
+ release_pid = tp_path.strip('/').split('/')[-1]
+ else:
+ account_pid = 'HNK2IC'
+ tp_path = release_pid = tve['release_pid']
if tve.get('entitlement') == 'auth':
- adobe_pass = settings.get('adobePass', {})
+ adobe_pass = settings.get('tve_adobe_auth', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'bravo'),
tve['title'], release_pid, tve.get('rating'))
query['auth'] = self._extract_mvpd_auth(
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
else:
- shared_playlist = settings['shared_playlist']
+ shared_playlist = settings['ls_playlist']
account_pid = shared_playlist['account_pid']
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
- release_pid = metadata['release_pid']
+ tp_path = release_pid = metadata.get('release_pid')
+ if not release_pid:
+ release_pid = metadata['guid']
+ tp_path = 'media/guid/2140479951/' + release_pid
info.update({
'title': metadata['title'],
'description': metadata.get('description'),
@@ -67,7 +77,7 @@ class BravoTVIE(AdobePassIE):
'_type': 'url_transparent',
'id': release_pid,
'url': smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid),
+ 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path),
query), {'force_smil_url': True}),
'ie_key': 'ThePlatform',
})
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 14f9a14ed..2aa9f4782 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -1,42 +1,44 @@
# coding: utf-8
from __future__ import unicode_literals
+import base64
import re
-import json
+import struct
-from .common import InfoExtractor
from .adobepass import AdobePassIE
+from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
+ compat_HTTPError,
compat_parse_qs,
- compat_str,
compat_urllib_parse_urlparse,
compat_urlparse,
compat_xml_parse_error,
- compat_HTTPError,
)
from ..utils import (
- determine_ext,
- ExtractorError,
+ clean_html,
extract_attributes,
+ ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
float_or_none,
- js_to_json,
int_or_none,
+ js_to_json,
+ mimetype2ext,
parse_iso8601,
+ smuggle_url,
+ str_or_none,
unescapeHTML,
unsmuggle_url,
+ UnsupportedError,
update_url_query,
- clean_html,
- mimetype2ext,
+ url_or_none,
)
class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
- _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
_TESTS = [
{
@@ -53,7 +55,8 @@ class BrightcoveLegacyIE(InfoExtractor):
'timestamp': 1368213670,
'upload_date': '20130510',
'uploader_id': '1589608506001',
- }
+ },
+ 'skip': 'The player has been deactivated by the content owner',
},
{
# From http://medianetwork.oracle.com/video/player/1785452137001
@@ -68,6 +71,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'upload_date': '20120814',
'uploader_id': '1460825906',
},
+ 'skip': 'video not playable',
},
{
# From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
@@ -77,7 +81,7 @@ class BrightcoveLegacyIE(InfoExtractor):
'ext': 'mp4',
'title': 'This Bracelet Acts as a Personal Thermostat',
'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
- 'uploader': 'Mashable',
+ # 'uploader': 'Mashable',
'timestamp': 1382041798,
'upload_date': '20131017',
'uploader_id': '1130468786001',
@@ -122,15 +126,17 @@ class BrightcoveLegacyIE(InfoExtractor):
'id': '3550319591001',
},
'playlist_mincount': 7,
+ 'skip': 'Unsupported URL',
},
{
- # playlist with 'playlistTab' (https://github.com/rg3/youtube-dl/issues/9965)
+ # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
'info_dict': {
'id': '1522758701001',
'title': 'Lesson 08',
},
'playlist_mincount': 10,
+ 'skip': 'Unsupported URL',
},
{
# playerID inferred from bcpid
@@ -139,12 +145,6 @@ class BrightcoveLegacyIE(InfoExtractor):
'only_matching': True, # Tested in GenericIE
}
]
- FLV_VCODECS = {
- 1: 'SORENSON',
- 2: 'ON2',
- 3: 'H264',
- 4: 'VP8',
- }
@classmethod
def _build_brighcove_url(cls, object_str):
@@ -153,10 +153,10 @@ class BrightcoveLegacyIE(InfoExtractor):
<object class="BrightcoveExperience">{params}</object>
"""
- # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
+ # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
lambda m: m.group(1) + '/>', object_str)
- # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
+ # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
object_str = object_str.replace('<--', '<!--')
# remove namespace to simplify extraction
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
@@ -236,7 +236,8 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod
def _make_brightcove_url(cls, params):
- return update_url_query(cls._FEDERATED_URL, params)
+ return update_url_query(
+ 'http://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod
def _extract_brightcove_url(cls, webpage):
@@ -295,163 +296,50 @@ class BrightcoveLegacyIE(InfoExtractor):
videoPlayer = query.get('@videoPlayer')
if videoPlayer:
# We set the original url as the default 'Referer' header
- referer = smuggled_data.get('Referer', url)
+ referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
+ video_id = videoPlayer[0]
if 'playerID' not in query:
mobj = re.search(r'/bcpid(\d+)', url)
if mobj is not None:
query['playerID'] = [mobj.group(1)]
- return self._get_video_info(
- videoPlayer[0], query, referer=referer)
- elif 'playerKey' in query:
- player_key = query['playerKey']
- return self._get_playlist_info(player_key[0])
- else:
- raise ExtractorError(
- 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
- expected=True)
-
- def _get_video_info(self, video_id, query, referer=None):
- headers = {}
- linkBase = query.get('linkBaseURL')
- if linkBase is not None:
- referer = linkBase[0]
- if referer is not None:
- headers['Referer'] = referer
- webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
-
- error_msg = self._html_search_regex(
- r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
- 'error message', default=None)
- if error_msg is not None:
- raise ExtractorError(
- 'brightcove said: %s' % error_msg, expected=True)
-
- self.report_extraction(video_id)
- info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json')
- info = json.loads(info)['data']
- video_info = info['programmedContent']['videoPlayer']['mediaDTO']
- video_info['_youtubedl_adServerURL'] = info.get('adServerURL')
-
- return self._extract_video_info(video_info)
-
- def _get_playlist_info(self, player_key):
- info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key
- playlist_info = self._download_webpage(
- info_url, player_key, 'Downloading playlist information')
-
- json_data = json.loads(playlist_info)
- if 'videoList' in json_data:
- playlist_info = json_data['videoList']
- playlist_dto = playlist_info['mediaCollectionDTO']
- elif 'playlistTabs' in json_data:
- playlist_info = json_data['playlistTabs']
- playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0]
- else:
- raise ExtractorError('Empty playlist')
-
- videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']]
-
- return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'],
- playlist_title=playlist_dto['displayName'])
-
- def _extract_video_info(self, video_info):
- video_id = compat_str(video_info['id'])
- publisher_id = video_info.get('publisherId')
- info = {
- 'id': video_id,
- 'title': video_info['displayName'].strip(),
- 'description': video_info.get('shortDescription'),
- 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
- 'uploader': video_info.get('publisherName'),
- 'uploader_id': compat_str(publisher_id) if publisher_id else None,
- 'duration': float_or_none(video_info.get('length'), 1000),
- 'timestamp': int_or_none(video_info.get('creationDate'), 1000),
- }
-
- renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', [])
- if renditions:
- formats = []
- for rend in renditions:
- url = rend['defaultURL']
- if not url:
- continue
- ext = None
- if rend['remote']:
- url_comp = compat_urllib_parse_urlparse(url)
- if url_comp.path.endswith('.m3u8'):
- formats.extend(
- self._extract_m3u8_formats(
- url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- continue
- elif 'akamaihd.net' in url_comp.netloc:
- # This type of renditions are served through
- # akamaihd.net, but they don't use f4m manifests
- url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB'
- ext = 'flv'
- if ext is None:
- ext = determine_ext(url)
- tbr = int_or_none(rend.get('encodingRate'), 1000)
- a_format = {
- 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''),
- 'url': url,
- 'ext': ext,
- 'filesize': int_or_none(rend.get('size')) or None,
- 'tbr': tbr,
- }
- if rend.get('audioOnly'):
- a_format.update({
- 'vcodec': 'none',
- })
- else:
- a_format.update({
- 'height': int_or_none(rend.get('frameHeight')),
- 'width': int_or_none(rend.get('frameWidth')),
- 'vcodec': rend.get('videoCodec'),
- })
-
- # m3u8 manifests with remote == false are media playlists
- # Not calling _extract_m3u8_formats here to save network traffic
- if ext == 'm3u8':
- a_format.update({
- 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''),
- 'ext': 'mp4',
- 'protocol': 'm3u8_native',
- })
-
- formats.append(a_format)
- self._sort_formats(formats)
- info['formats'] = formats
- elif video_info.get('FLVFullLengthURL') is not None:
- info.update({
- 'url': video_info['FLVFullLengthURL'],
- 'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')),
- 'filesize': int_or_none(video_info.get('FLVFullSize')),
- })
-
- if self._downloader.params.get('include_ads', False):
- adServerURL = video_info.get('_youtubedl_adServerURL')
- if adServerURL:
- ad_info = {
- '_type': 'url',
- 'url': adServerURL,
- }
- if 'url' in info:
- return {
- '_type': 'playlist',
- 'title': info['title'],
- 'entries': [ad_info, info],
- }
+ publisher_id = query.get('publisherId')
+ if publisher_id and publisher_id[0].isdigit():
+ publisher_id = publisher_id[0]
+ if not publisher_id:
+ player_key = query.get('playerKey')
+ if player_key and ',' in player_key[0]:
+ player_key = player_key[0]
else:
- return ad_info
-
- if 'url' not in info and not info.get('formats'):
- raise ExtractorError('Unable to extract video url for %s' % video_id)
- return info
+ player_id = query.get('playerID')
+ if player_id and player_id[0].isdigit():
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ player_page = self._download_webpage(
+ 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+ video_id, headers=headers, fatal=False)
+ if player_page:
+ player_key = self._search_regex(
+ r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
+ player_page, 'player key', fatal=False)
+ if player_key:
+ enc_pub_id = player_key.split(',')[1].replace('~', '=')
+ publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
+ if publisher_id:
+ brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+ if referer:
+ brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
+ return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+ # TODO: figure out if it's possible to extract playlistId from playerKey
+ # elif 'playerKey' in query:
+ # player_key = query['playerKey']
+ # return self._get_playlist_info(player_key[0])
+ raise UnsupportedError(url)
class BrightcoveNewIE(AdobePassIE):
IE_NAME = 'brightcove:new'
- _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
@@ -485,6 +373,21 @@ class BrightcoveNewIE(AdobePassIE):
'skip_download': True,
}
}, {
+ # playlist stream
+ 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
+ 'info_dict': {
+ 'id': '5718313430001',
+ 'title': 'No Audio Playlist',
+ },
+ 'playlist_count': 7,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
+ 'only_matching': True,
+ }, {
# ref: prefixed video id
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
'only_matching': True,
@@ -523,7 +426,7 @@ class BrightcoveNewIE(AdobePassIE):
# [2] looks like:
for video, script_tag, account_id, player_id, embed in re.findall(
r'''(?isx)
- (<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
+ (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
(?:.*?
(<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
@@ -652,10 +555,16 @@ class BrightcoveNewIE(AdobePassIE):
subtitles = {}
for text_track in json_data.get('text_tracks', []):
- if text_track.get('src'):
- subtitles.setdefault(text_track.get('srclang'), []).append({
- 'url': text_track['src'],
- })
+ if text_track.get('kind') != 'captions':
+ continue
+ text_track_url = url_or_none(text_track.get('src'))
+ if not text_track_url:
+ continue
+ lang = (str_or_none(text_track.get('srclang'))
+ or str_or_none(text_track.get('label')) or 'en').lower()
+ subtitles.setdefault(lang, []).append({
+ 'url': text_track_url,
+ })
is_live = False
duration = float_or_none(json_data.get('duration'), 1000)
@@ -683,47 +592,65 @@ class BrightcoveNewIE(AdobePassIE):
'ip_blocks': smuggled_data.get('geo_ip_blocks'),
})
- account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
+ account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ policy_key_id = '%s_%s' % (account_id, player_id)
+ policy_key = self._downloader.cache.load('brightcove', policy_key_id)
+ policy_key_extracted = False
+ store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
- webpage = self._download_webpage(
- 'http://players.brightcove.net/%s/%s_%s/index.min.js'
- % (account_id, player_id, embed), video_id)
+ def extract_policy_key():
+ webpage = self._download_webpage(
+ 'http://players.brightcove.net/%s/%s_%s/index.min.js'
+ % (account_id, player_id, embed), video_id)
- policy_key = None
+ policy_key = None
- catalog = self._search_regex(
- r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
- if catalog:
- catalog = self._parse_json(
- js_to_json(catalog), video_id, fatal=False)
+ catalog = self._search_regex(
+ r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
- policy_key = catalog.get('policyKey')
+ catalog = self._parse_json(
+ js_to_json(catalog), video_id, fatal=False)
+ if catalog:
+ policy_key = catalog.get('policyKey')
- if not policy_key:
- policy_key = self._search_regex(
- r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
- webpage, 'policy key', group='pk')
+ if not policy_key:
+ policy_key = self._search_regex(
+ r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+ webpage, 'policy key', group='pk')
- api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
- headers = {
- 'Accept': 'application/json;pk=%s' % policy_key,
- }
+ store_pk(policy_key)
+ return policy_key
+
+ api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
+ headers = {}
referrer = smuggled_data.get('referrer')
if referrer:
headers.update({
'Referer': referrer,
'Origin': re.search(r'https?://[^/]+', referrer).group(0),
})
- try:
- json_data = self._download_json(api_url, video_id, headers=headers)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
- message = json_data.get('message') or json_data['error_code']
- if json_data.get('error_subcode') == 'CLIENT_GEO':
- self.raise_geo_restricted(msg=message)
- raise ExtractorError(message, expected=True)
- raise
+
+ for _ in range(2):
+ if not policy_key:
+ policy_key = extract_policy_key()
+ policy_key_extracted = True
+ headers['Accept'] = 'application/json;pk=%s' % policy_key
+ try:
+ json_data = self._download_json(api_url, video_id, headers=headers)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+ json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+ message = json_data.get('message') or json_data['error_code']
+ if json_data.get('error_subcode') == 'CLIENT_GEO':
+ self.raise_geo_restricted(msg=message)
+ elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
+ policy_key = None
+ store_pk(None)
+ continue
+ raise ExtractorError(message, expected=True)
+ raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
@@ -739,5 +666,12 @@ class BrightcoveNewIE(AdobePassIE):
'tveToken': tve_token,
})
+ if content_type == 'playlist':
+ return self.playlist_result(
+ [self._parse_brightcove_metadata(vid, vid.get('id'), headers)
+ for vid in json_data.get('videos', []) if vid.get('id')],
+ json_data.get('id'), json_data.get('name'),
+ json_data.get('description'))
+
return self._parse_brightcove_metadata(
json_data, video_id, headers=headers)
diff --git a/youtube_dl/extractor/businessinsider.py b/youtube_dl/extractor/businessinsider.py
index dfcf9bc6b..73a57b1e4 100644
--- a/youtube_dl/extractor/businessinsider.py
+++ b/youtube_dl/extractor/businessinsider.py
@@ -9,21 +9,26 @@ class BusinessInsiderIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
- 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
+ 'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
'info_dict': {
- 'id': 'hZRllCfw',
+ 'id': 'cjGDb0X9',
'ext': 'mp4',
- 'title': "Here's how much radiation you're exposed to in everyday life",
- 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
- 'upload_date': '20170709',
- 'timestamp': 1499606400,
- },
- 'params': {
- 'skip_download': True,
+ 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
+ 'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
+ 'upload_date': '20160611',
+ 'timestamp': 1465675620,
},
}, {
'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
- 'only_matching': True,
+ 'md5': '43f438dbc6da0b89f5ac42f68529d84a',
+ 'info_dict': {
+ 'id': '5zJwd4FK',
+ 'ext': 'mp4',
+ 'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
+ 'description': 'md5:2af8975825d38a4fed24717bbe51db49',
+ 'upload_date': '20170705',
+ 'timestamp': 1499270528,
+ },
}, {
'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
'only_matching': True,
@@ -35,7 +40,8 @@ class BusinessInsiderIE(InfoExtractor):
jwplatform_id = self._search_regex(
(r'data-media-id=["\']([a-zA-Z0-9]{8})',
r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
- r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
+ r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
+ r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
webpage, 'jwplatform id')
return self.url_result(
'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py
index 4bf4efe1f..0b11bf11f 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dl/extractor/byutv.py
@@ -3,11 +3,18 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ merge_dicts,
+ parse_duration,
+ url_or_none,
+)
class BYUtvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{
+ # ooyalaVOD
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'info_dict': {
'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH',
@@ -23,6 +30,20 @@ class BYUtvIE(InfoExtractor):
},
'add_ie': ['Ooyala'],
}, {
+ # dvr
+ 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2',
+ 'info_dict': {
+ 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451',
+ 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2',
+ 'ext': 'mp4',
+ 'title': 'Pacific vs. BYU (4/12/19)',
+ 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3',
+ 'duration': 11645,
+ },
+ 'params': {
+ 'skip_download': True
+ },
+ }, {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d',
'only_matching': True,
}, {
@@ -35,24 +56,62 @@ class BYUtvIE(InfoExtractor):
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
- ep = self._download_json(
- 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id,
- query={
+ video = self._download_json(
+ 'https://api.byutv.org/api3/catalog/getvideosforcontent',
+ display_id, query={
'contentid': video_id,
'channel': 'byutv',
'x-byutv-context': 'web$US',
}, headers={
'x-byutv-context': 'web$US',
'x-byutv-platformkey': 'xsaaw9c7y5',
- })['ooyalaVOD']
+ })
+
+ ep = video.get('ooyalaVOD')
+ if ep:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % ep['providerId'],
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ }
+
+ info = {}
+ formats = []
+ for format_id, ep in video.items():
+ if not isinstance(ep, dict):
+ continue
+ video_url = url_or_none(ep.get('videoUrl'))
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ merge_dicts(info, {
+ 'title': ep.get('title'),
+ 'description': ep.get('description'),
+ 'thumbnail': ep.get('imageThumbnail'),
+ 'duration': parse_duration(ep.get('length')),
+ })
+ self._sort_formats(formats)
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % ep['providerId'],
+ return merge_dicts(info, {
'id': video_id,
'display_id': display_id,
- 'title': ep.get('title'),
- 'description': ep.get('description'),
- 'thumbnail': ep.get('imageThumbnail'),
- }
+ 'title': display_id,
+ 'formats': formats,
+ })
diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py
index 79350817f..1eb81b75e 100644
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dl/extractor/cammodels.py
@@ -14,6 +14,7 @@ class CamModelsIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.cammodels.com/cam/AutumnKnight/',
'only_matching': True,
+ 'age_limit': 18
}]
def _real_extract(self, url):
@@ -93,4 +94,5 @@ class CamModelsIE(InfoExtractor):
'title': self._live_title(user_id),
'is_live': True,
'formats': formats,
+ 'age_limit': 18
}
diff --git a/youtube_dl/extractor/camtube.py b/youtube_dl/extractor/camtube.py
index c7d40f849..b3be3bdcf 100644
--- a/youtube_dl/extractor/camtube.py
+++ b/youtube_dl/extractor/camtube.py
@@ -20,6 +20,7 @@ class CamTubeIE(InfoExtractor):
'duration': 1274,
'timestamp': 1528018608,
'upload_date': '20180603',
+ 'age_limit': 18
},
'params': {
'skip_download': True,
@@ -66,4 +67,5 @@ class CamTubeIE(InfoExtractor):
'like_count': like_count,
'creator': creator,
'formats': formats,
+ 'age_limit': 18
}
diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py
index afbc5ea26..bbc5205fd 100644
--- a/youtube_dl/extractor/camwithher.py
+++ b/youtube_dl/extractor/camwithher.py
@@ -25,6 +25,7 @@ class CamWithHerIE(InfoExtractor):
'comment_count': int,
'uploader': 'MileenaK',
'upload_date': '20160322',
+ 'age_limit': 18,
},
'params': {
'skip_download': True,
@@ -84,4 +85,5 @@ class CamWithHerIE(InfoExtractor):
'comment_count': comment_count,
'uploader': uploader,
'upload_date': upload_date,
+ 'age_limit': 18
}
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py
index 174fd9e2b..8667a0d04 100644
--- a/youtube_dl/extractor/canvas.py
+++ b/youtube_dl/extractor/canvas.py
@@ -13,48 +13,76 @@ from ..utils import (
int_or_none,
merge_dicts,
parse_iso8601,
+ str_or_none,
+ url_or_none,
)
class CanvasIE(InfoExtractor):
- _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
- 'md5': '90139b746a0a9bd7bb631283f6e2a64e',
+ 'md5': '68993eda72ef62386a15ea2cf3c93107',
'info_dict': {
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Nachtwacht: De Greystook',
- 'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
+ 'description': 'Nachtwacht: De Greystook',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1468.03,
+ 'duration': 1468.04,
},
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
}, {
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
'only_matching': True,
}]
+ _HLS_ENTRY_PROTOCOLS_MAP = {
+ 'HLS': 'm3u8_native',
+ 'HLS_AES': 'm3u8',
+ }
+ _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site_id'), mobj.group('id')
+ # Old API endpoint, serves more formats but may fail for some videos
data = self._download_json(
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
- % (site_id, video_id), video_id)
+ % (site_id, video_id), video_id, 'Downloading asset JSON',
+ 'Unable to download asset JSON', fatal=False)
+
+ # New API endpoint
+ if not data:
+ token = self._download_json(
+ '%s/tokens' % self._REST_API_BASE, video_id,
+ 'Downloading token', data=b'',
+ headers={'Content-Type': 'application/json'})['vrtPlayerToken']
+ data = self._download_json(
+ '%s/videos/%s' % (self._REST_API_BASE, video_id),
+ video_id, 'Downloading video JSON', fatal=False, query={
+ 'vrtPlayerToken': token,
+ 'client': '%s@PROD' % site_id,
+ }, expected_status=400)
+ message = data.get('message')
+ if message and not data.get('title'):
+ if data.get('code') == 'AUTHENTICATION_REQUIRED':
+ self.raise_login_required(message)
+ raise ExtractorError(message, expected=True)
title = data['title']
description = data.get('description')
formats = []
for target in data['targetUrls']:
- format_url, format_type = target.get('url'), target.get('type')
+ format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type:
continue
- if format_type == 'HLS':
+ format_type = format_type.upper()
+ if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
m3u8_id=format_type, fatal=False))
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
@@ -130,20 +158,20 @@ class CanvasEenIE(InfoExtractor):
},
'skip': 'Pagina niet gevonden',
}, {
- 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
+ 'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
'info_dict': {
- 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
- 'display_id': 'herbekijk-sorry-voor-alles',
+ 'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
+ 'display_id': 'emma-pakt-thilly-aan',
'ext': 'mp4',
- 'title': 'Herbekijk Sorry voor alles',
- 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
+ 'title': 'Emma pakt Thilly aan',
+ 'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 3788.06,
+ 'duration': 118.24,
},
'params': {
'skip_download': True,
},
- 'skip': 'Episode no longer available',
+ 'expected_warnings': ['is not a supported codec'],
}, {
'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
'only_matching': True,
@@ -179,19 +207,44 @@ class VrtNUIE(GigyaBaseIE):
IE_DESC = 'VrtNU.be'
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
+ # Available via old API endpoint
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
'info_dict': {
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'De zwarte weduwe',
- 'description': 'md5:d90c21dced7db869a85db89a623998d4',
+ 'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
'duration': 1457.04,
'thumbnail': r're:^https?://.*\.jpg$',
- 'season': '1',
+ 'season': 'Season 1',
'season_number': 1,
'episode_number': 1,
},
- 'skip': 'This video is only available for registered users'
+ 'skip': 'This video is only available for registered users',
+ 'params': {
+ 'username': '<snip>',
+ 'password': '<snip>',
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # Only available via new API endpoint
+ 'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
+ 'info_dict': {
+ 'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 5',
+ 'description': 'Wie valt door de mand tijdens een missie?',
+ 'duration': 2967.06,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode_number': 5,
+ },
+ 'skip': 'This video is only available for registered users',
+ 'params': {
+ 'username': '<snip>',
+ 'password': '<snip>',
+ },
+ 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
}]
_NETRC_MACHINE = 'vrtnu'
_APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py
index 9ba909a91..b57b86af7 100644
--- a/youtube_dl/extractor/carambatv.py
+++ b/youtube_dl/extractor/carambatv.py
@@ -82,6 +82,12 @@ class CarambaTVPageIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
videomore_url = VideomoreIE._extract_url(webpage)
+ if not videomore_url:
+ videomore_id = self._search_regex(
+ r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id',
+ default=None)
+ if videomore_id:
+ videomore_url = 'videomore:%s' % videomore_id
if videomore_url:
title = self._og_search_title(webpage)
return {
diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py
index 6aeebd7b3..48b33617f 100644
--- a/youtube_dl/extractor/cartoonnetwork.py
+++ b/youtube_dl/extractor/cartoonnetwork.py
@@ -1,20 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .turner import TurnerBaseIE
+from ..utils import int_or_none
class CartoonNetworkIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
_TEST = {
- 'url': 'http://www.cartoonnetwork.com/video/teen-titans-go/starfire-the-cat-lady-clip.html',
+ 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
'info_dict': {
- 'id': '8a250ab04ed07e6c014ef3f1e2f9016c',
+ 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
'ext': 'mp4',
- 'title': 'Starfire the Cat Lady',
- 'description': 'Robin decides to become a cat so that Starfire will finally love him.',
+ 'title': 'How to Draw Upgrade',
+ 'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
},
'params': {
# m3u8 download
@@ -25,18 +24,39 @@ class CartoonNetworkIE(TurnerBaseIE):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- id_type, video_id = re.search(r"_cnglobal\.cvp(Video|Title)Id\s*=\s*'([^']+)';", webpage).groups()
- query = ('id' if id_type == 'Video' else 'titleId') + '=' + video_id
- return self._extract_cvp_info(
- 'http://www.cartoonnetwork.com/video-seo-svc/episodeservices/getCvpPlaylist?networkName=CN2&' + query, video_id, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/toon/big',
- 'tokenizer_src': 'https://token.vgtf.net/token/token_mobile',
- },
- }, {
+
+ def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
+ metadata_re = ''
+ if content_re:
+ metadata_re = r'|video_metadata\.content_' + content_re
+ return self._search_regex(
+ r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
+ webpage, name, fatal=fatal)
+
+ media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
+ title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
+
+ info = self._extract_ngtv_info(
+ media_id, {'networkId': 'cartoonnetwork'}, {
'url': url,
'site_name': 'CartoonNetwork',
- 'auth_required': self._search_regex(
- r'_cnglobal\.cvpFullOrPreviewAuth\s*=\s*(true|false);',
- webpage, 'auth required', default='false') == 'true',
+ 'auth_required': find_field('authType', 'auth type') != 'unauth',
})
+
+ series = find_field(
+ 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage),
+ 'series': series,
+ 'episode': title,
+ })
+
+ for field in ('season', 'episode'):
+ field_name = field + 'Number'
+ info[field + '_number'] = int_or_none(find_field(
+ field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
+
+ return info
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
index 43f95c739..fd5ec6033 100644
--- a/youtube_dl/extractor/cbc.py
+++ b/youtube_dl/extractor/cbc.py
@@ -1,8 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
import json
import re
+from xml.sax.saxutils import escape
from .common import InfoExtractor
from ..compat import (
@@ -216,6 +218,29 @@ class CBCWatchBaseIE(InfoExtractor):
'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
}
_GEO_COUNTRIES = ['CA']
+ _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
+ _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
+ _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
+ _NETRC_MACHINE = 'cbcwatch'
+
+ def _signature(self, email, password):
+ data = json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode()
+ headers = {'content-type': 'application/json'}
+ query = {'apikey': self._API_KEY}
+ resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
+ access_token = resp['access_token']
+
+ # token
+ query = {
+ 'access_token': access_token,
+ 'apikey': self._API_KEY,
+ 'jwtapp': 'jwt',
+ }
+ resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
+ return resp['signature']
def _call_api(self, path, video_id):
url = path if path.startswith('http') else self._API_BASE_URL + path
@@ -239,7 +264,8 @@ class CBCWatchBaseIE(InfoExtractor):
def _real_initialize(self):
if self._valid_device_token():
return
- device = self._downloader.cache.load('cbcwatch', 'device') or {}
+ device = self._downloader.cache.load(
+ 'cbcwatch', self._cache_device_key()) or {}
self._device_id, self._device_token = device.get('id'), device.get('token')
if self._valid_device_token():
return
@@ -248,16 +274,30 @@ class CBCWatchBaseIE(InfoExtractor):
def _valid_device_token(self):
return self._device_id and self._device_token
+ def _cache_device_key(self):
+ email, _ = self._get_login_info()
+ return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
+
def _register_device(self):
- self._device_id = self._device_token = None
result = self._download_xml(
self._API_BASE_URL + 'device/register',
None, 'Acquiring device token',
data=b'<device><type>web</type></device>')
self._device_id = xpath_text(result, 'deviceId', fatal=True)
- self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+ email, password = self._get_login_info()
+ if email and password:
+ signature = self._signature(email, password)
+ data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
+ escape(signature), escape(self._device_id)).encode()
+ url = self._API_BASE_URL + 'device/login'
+ result = self._download_xml(
+ url, None, data=data,
+ headers={'content-type': 'application/xml'})
+ self._device_token = xpath_text(result, 'token', fatal=True)
+ else:
+ self._device_token = xpath_text(result, 'deviceToken', fatal=True)
self._downloader.cache.store(
- 'cbcwatch', 'device', {
+ 'cbcwatch', self._cache_device_key(), {
'id': self._device_id,
'token': self._device_token,
})
@@ -360,7 +400,7 @@ class CBCWatchVideoIE(CBCWatchBaseIE):
class CBCWatchIE(CBCWatchBaseIE):
IE_NAME = 'cbc.ca:watch'
- _VALID_URL = r'https?://watch\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
+ _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
_TESTS = [{
# geo-restricted to Canada, bypassable
'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
@@ -386,6 +426,9 @@ class CBCWatchIE(CBCWatchBaseIE):
'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
},
'playlist_mincount': 30,
+ }, {
+ 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py
index 1799d63ea..4a19a73d2 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@@ -13,13 +13,17 @@ from ..utils import (
class CBSBaseIE(ThePlatformFeedIE):
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
- closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
- return {
- 'en': [{
- 'ext': 'ttml',
- 'url': closed_caption_e.attrib['value'],
- }]
- } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
+ subtitles = {}
+ for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]:
+ cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k)
+ if cc_e is not None:
+ cc_url = cc_e.get('value')
+ if cc_url:
+ subtitles.setdefault(subtitles_lang, []).append({
+ 'ext': ext,
+ 'url': cc_url,
+ })
+ return subtitles
class CBSIE(CBSBaseIE):
@@ -65,7 +69,7 @@ class CBSIE(CBSBaseIE):
last_e = None
for item in items_data.findall('.//item'):
asset_type = xpath_text(item, 'assetType')
- if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'):
+ if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
continue
asset_types.append(asset_type)
query = {
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py
index 51df15fac..345debcf0 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dl/extractor/cbsnews.py
@@ -1,40 +1,62 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+import zlib
+
from .common import InfoExtractor
from .cbs import CBSIE
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
parse_duration,
)
+class CBSNewsEmbedIE(CBSIE):
+ IE_NAME = 'cbsnews:embed'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)'
+ _TESTS = [{
+ 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ item = self._parse_json(zlib.decompress(compat_b64decode(
+ compat_urllib_parse_unquote(self._match_id(url))),
+ -zlib.MAX_WBITS), None)['video']['items'][0]
+ return self._extract_video_info(item['mpxRefId'], 'cbsnews')
+
+
class CBSNewsIE(CBSIE):
IE_NAME = 'cbsnews'
IE_DESC = 'CBS News'
- _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
+ _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)'
_TESTS = [
{
# 60 minutes
'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/',
'info_dict': {
- 'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_',
- 'ext': 'mp4',
- 'title': 'Artificial Intelligence',
- 'description': 'md5:8818145f9974431e0fb58a1b8d69613c',
+ 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4',
+ 'ext': 'flv',
+ 'title': 'Artificial Intelligence, real-life applications',
+ 'description': 'md5:a7aaf27f1b4777244de8b0b442289304',
'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1606,
+ 'duration': 317,
'uploader': 'CBSI-NEW',
- 'timestamp': 1498431900,
- 'upload_date': '20170625',
+ 'timestamp': 1476046464,
+ 'upload_date': '20161009',
},
'params': {
- # m3u8 download
+ # rtmp download
'skip_download': True,
},
},
{
- 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
+ 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/',
'info_dict': {
'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y',
'ext': 'mp4',
@@ -60,37 +82,29 @@ class CBSNewsIE(CBSIE):
# 48 hours
'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
'info_dict': {
- 'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
- 'ext': 'mp4',
'title': 'Cold as Ice',
- 'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
- 'upload_date': '20170604',
- 'timestamp': 1496538000,
- 'uploader': 'CBSI-NEW',
- },
- 'params': {
- 'skip_download': True,
+ 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?',
},
+ 'playlist_mincount': 7,
},
]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
- video_info = self._parse_json(self._html_search_regex(
- r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
- webpage, 'video JSON info', default='{}'), video_id, fatal=False)
+ webpage = self._download_webpage(url, display_id)
- if video_info:
- item = video_info['item'] if 'item' in video_info else video_info
- else:
- state = self._parse_json(self._search_regex(
- r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
- 'playlist JSON info', group='json'), video_id)['state']
- item = state['playlist'][state['pid']]
+ entries = []
+ for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage):
+ entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key()))
+ if entries:
+ return self.playlist_result(
+ entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage),
+ playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage))
+ item = self._parse_json(self._html_search_regex(
+ r'CBSNEWS\.defaultPayload\s*=\s*({.+})',
+ webpage, 'video JSON info'), display_id)['items'][0]
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py
index 734702144..36e6dff72 100644
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@@ -1,9 +1,12 @@
+# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_iso8601,
+ try_get,
+ url_or_none,
)
@@ -18,11 +21,13 @@ class CCCIE(InfoExtractor):
'id': '1839',
'ext': 'mp4',
'title': 'Introduction to Processor Design',
+ 'creator': 'byterazor',
'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20131228',
'timestamp': 1388188800,
'duration': 3710,
+ 'tags': list,
}
}, {
'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download',
@@ -68,6 +73,7 @@ class CCCIE(InfoExtractor):
'id': event_id,
'display_id': display_id,
'title': event_data['title'],
+ 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])),
'description': event_data.get('description'),
'thumbnail': event_data.get('thumb_url'),
'timestamp': parse_iso8601(event_data.get('date')),
@@ -75,3 +81,31 @@ class CCCIE(InfoExtractor):
'tags': event_data.get('tags'),
'formats': formats,
}
+
+
+class CCCPlaylistIE(InfoExtractor):
+ IE_NAME = 'media.ccc.de:lists'
+ _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://media.ccc.de/c/30c3',
+ 'info_dict': {
+ 'title': '30C3',
+ 'id': '30c3',
+ },
+ 'playlist_count': 135,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url).lower()
+
+ conf = self._download_json(
+ 'https://media.ccc.de/public/conferences/' + playlist_id,
+ playlist_id)
+
+ entries = []
+ for e in conf['events']:
+ event_url = url_or_none(e.get('frontend_link'))
+ if event_url:
+ entries.append(self.url_result(event_url, ie=CCCIE.ie_key()))
+
+ return self.playlist_result(entries, playlist_id, conf.get('title'))
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index 46380430f..7cb4efb74 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -147,6 +147,8 @@ class CeskaTelevizeIE(InfoExtractor):
is_live = item.get('type') == 'LIVE'
formats = []
for format_id, stream_url in item.get('streamUrls', {}).items():
+ if 'drmOnly=true' in stream_url:
+ continue
if 'playerType=flash' in stream_url:
stream_formats = self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', 'm3u8_native',
@@ -155,7 +157,7 @@ class CeskaTelevizeIE(InfoExtractor):
stream_formats = self._extract_mpd_formats(
stream_url, playlist_id,
mpd_id='dash-%s' % format_id, fatal=False)
- # See https://github.com/rg3/youtube-dl/issues/12119#issuecomment-280037031
+ # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
if format_id == 'audioDescription':
for f in stream_formats:
f['source_preference'] = -10
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
index 81108e704..09cacf6d3 100644
--- a/youtube_dl/extractor/channel9.py
+++ b/youtube_dl/extractor/channel9.py
@@ -32,7 +32,7 @@ class Channel9IE(InfoExtractor):
'upload_date': '20130828',
'session_code': 'KOS002',
'session_room': 'Arena 1A',
- 'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
+ 'session_speakers': 'count:5',
},
}, {
'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
@@ -65,14 +65,14 @@ class Channel9IE(InfoExtractor):
'skip_download': True,
},
}, {
- 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
+ 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
'info_dict': {
- 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
- 'title': 'Channel 9',
+ 'id': 'Events/DEVintersection/DEVintersection-2016',
+ 'title': 'DEVintersection 2016 Orlando Sessions',
},
- 'playlist_mincount': 100,
+ 'playlist_mincount': 14,
}, {
- 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
+ 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
'only_matching': True,
}, {
'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
@@ -112,11 +112,11 @@ class Channel9IE(InfoExtractor):
episode_data), content_path)
content_id = episode_data['contentId']
is_session = '/Sessions(' in episode_data['api']
- content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
+ content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
if is_session:
- content_url += '?$expand=Speakers'
+ content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
else:
- content_url += '?$expand=Authors'
+ content_url += 'Authors,Body&$expand=Authors'
content_data = self._download_json(content_url, content_id)
title = content_data['Title']
@@ -210,7 +210,7 @@ class Channel9IE(InfoExtractor):
'id': content_id,
'title': title,
'description': clean_html(content_data.get('Description') or content_data.get('Body')),
- 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
+ 'thumbnail': content_data.get('VideoPlayerPreviewImage'),
'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
'timestamp': parse_iso8601(content_data.get('PublishedDate')),
'avg_rating': int_or_none(content_data.get('Rating')),
diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py
index e2b828d8a..a459dcb8d 100644
--- a/youtube_dl/extractor/chaturbate.py
+++ b/youtube_dl/extractor/chaturbate.py
@@ -3,11 +3,15 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ lowercase_escape,
+ url_or_none,
+)
class ChaturbateIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': {
@@ -22,6 +26,9 @@ class ChaturbateIE(InfoExtractor):
},
'skip': 'Room is offline',
}, {
+ 'url': 'https://chaturbate.com/fullvideo/?b=caylin',
+ 'only_matching': True,
+ }, {
'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True,
}]
@@ -32,14 +39,34 @@ class ChaturbateIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(
- url, video_id, headers=self.geo_verification_headers())
+ 'https://chaturbate.com/%s/' % video_id, video_id,
+ headers=self.geo_verification_headers())
- m3u8_urls = []
+ found_m3u8_urls = []
+
+ data = self._parse_json(
+ self._search_regex(
+ r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'data', default='{}', group='value'),
+ video_id, transform_source=lowercase_escape, fatal=False)
+ if data:
+ m3u8_url = url_or_none(data.get('hls_source'))
+ if m3u8_url:
+ found_m3u8_urls.append(m3u8_url)
- for m in re.finditer(
- r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
- m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
- 'url').replace('_fast', '')
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(lowercase_escape(m.group('url')))
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(m.group('url'))
+
+ m3u8_urls = []
+ for found_m3u8_url in found_m3u8_urls:
+ m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
if m3u8_url not in m3u8_urls:
m3u8_urls.append(m3u8_url)
@@ -59,7 +86,12 @@ class ChaturbateIE(InfoExtractor):
formats = []
for m3u8_url in m3u8_urls:
- m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
+ for known_id in ('fast', 'slow'):
+ if '_%s' % known_id in m3u8_url:
+ m3u8_id = known_id
+ break
+ else:
+ m3u8_id = None
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4',
# ffmpeg skips segments for fast m3u8
diff --git a/youtube_dl/extractor/cinemax.py b/youtube_dl/extractor/cinemax.py
new file mode 100644
index 000000000..7f89d33de
--- /dev/null
+++ b/youtube_dl/extractor/cinemax.py
@@ -0,0 +1,29 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .hbo import HBOBaseIE
+
+
+class CinemaxIE(HBOBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903',
+ 'md5': '82e0734bba8aa7ef526c9dd00cf35a05',
+ 'info_dict': {
+ 'id': '20126903',
+ 'ext': 'mp4',
+ 'title': 'S1 Ep 1: Recap',
+ },
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }, {
+ 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ path, video_id = re.match(self._VALID_URL, url).groups()
+ info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
+ info['id'] = video_id
+ return info
diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py
new file mode 100644
index 000000000..da404e4dc
--- /dev/null
+++ b/youtube_dl/extractor/ciscolive.py
@@ -0,0 +1,151 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ clean_html,
+ float_or_none,
+ int_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class CiscoLiveBaseIE(InfoExtractor):
+ # These appear to be constant across all Cisco Live presentations
+ # and are not tied to any user session or event
+ RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
+ RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
+ RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
+
+ HEADERS = {
+ 'Origin': 'https://ciscolive.cisco.com',
+ 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
+ 'rfWidgetId': RAINFOCUS_WIDGET_ID,
+ }
+
+ def _call_api(self, ep, rf_id, query, referrer, note=None):
+ headers = self.HEADERS.copy()
+ headers['Referer'] = referrer
+ return self._download_json(
+ self.RAINFOCUS_API_URL % ep, rf_id, note=note,
+ data=urlencode_postdata(query), headers=headers)
+
+ def _parse_rf_item(self, rf_item):
+ event_name = rf_item.get('eventName')
+ title = rf_item['title']
+ description = clean_html(rf_item.get('abstract'))
+ presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
+ bc_id = rf_item['videos'][0]['url']
+ bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
+ duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
+ location = try_get(rf_item, lambda x: x['times'][0]['room'])
+
+ if duration:
+ duration = duration * 60
+
+ return {
+ '_type': 'url_transparent',
+ 'url': bc_url,
+ 'ie_key': 'BrightcoveNew',
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'creator': presenter_name,
+ 'location': location,
+ 'series': event_name,
+ }
+
+
+class CiscoLiveSessionIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P<id>[^/?&]+)'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
+ 'md5': 'c98acf395ed9c9f766941c70f5352e22',
+ 'info_dict': {
+ 'id': '5803694304001',
+ 'ext': 'mp4',
+ 'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
+ 'description': 'md5:ec4a436019e09a918dec17714803f7cc',
+ 'timestamp': 1530305395,
+ 'upload_date': '20180629',
+ 'uploader_id': '5647924234001',
+ 'location': '16B Mezz.',
+ },
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ rf_id = self._match_id(url)
+ rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
+ return self._parse_rf_item(rf_result['items'][0])
+
+
+class CiscoLiveSearchIE(CiscoLiveBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)'
+ _TESTS = [{
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
+ 'info_dict': {
+ 'title': 'Search query',
+ },
+ 'playlist_count': 5,
+ }, {
+ 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
+
+ @staticmethod
+ def _check_bc_id_exists(rf_item):
+ return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
+
+ def _entries(self, query, url):
+ query['size'] = 50
+ query['from'] = 0
+ for page_num in itertools.count(1):
+ results = self._call_api(
+ 'search', None, query, url,
+ 'Downloading search JSON page %d' % page_num)
+ sl = try_get(results, lambda x: x['sectionList'][0], dict)
+ if sl:
+ results = sl
+ items = results.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ if not self._check_bc_id_exists(item):
+ continue
+ yield self._parse_rf_item(item)
+ size = int_or_none(results.get('size'))
+ if size is not None:
+ query['size'] = size
+ total = int_or_none(results.get('total'))
+ if total is not None and query['from'] + query['size'] > total:
+ break
+ query['from'] += query['size']
+
+ def _real_extract(self, url):
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ query['type'] = 'session'
+ return self.playlist_result(
+ self._entries(query, url), playlist_title='Search query')
diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py
index ab651d1c8..f2ca7a337 100644
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dl/extractor/cliphunter.py
@@ -1,19 +1,10 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-_translation_table = {
- 'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
- 'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
- 'y': 'l', 'z': 'i',
- '$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
-}
-
-
-def _decode(s):
- return ''.join(_translation_table.get(c, c) for c in s)
+from ..utils import (
+ int_or_none,
+ url_or_none,
+)
class CliphunterIE(InfoExtractor):
@@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
formats = []
for format_id, f in gexo_files.items():
- video_url = f.get('url')
+ video_url = url_or_none(f.get('url'))
if not video_url:
continue
fmt = f.get('fmt')
height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({
- 'url': _decode(video_url),
+ 'url': video_url,
'format_id': format_id,
'width': int_or_none(f.get('w')),
'height': int_or_none(height),
diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py
index e6d92cca2..2fdcfbb3a 100644
--- a/youtube_dl/extractor/cloudflarestream.py
+++ b/youtube_dl/extractor/cloudflarestream.py
@@ -1,20 +1,24 @@
# coding: utf-8
from __future__ import unicode_literals
+import base64
import re
from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor):
+ _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
+ _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
+ _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = r'''(?x)
https?://
(?:
- (?:watch\.)?cloudflarestream\.com/|
- embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=
+ (?:watch\.)?%s/|
+ %s
)
- (?P<id>[\da-f]+)
- '''
+ (?P<id>%s)
+ ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
_TESTS = [{
'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
'info_dict': {
@@ -31,6 +35,9 @@ class CloudflareStreamIE(InfoExtractor):
}, {
'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
'only_matching': True,
+ }, {
+ 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
+ 'only_matching': True,
}]
@staticmethod
@@ -38,23 +45,28 @@ class CloudflareStreamIE(InfoExtractor):
return [
mobj.group('url')
for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
+ domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
+ base_url = 'https://%s/%s/' % (domain, video_id)
+ if '.' in video_id:
+ video_id = self._parse_json(base64.urlsafe_b64decode(
+ video_id.split('.')[1]), video_id)['sub']
+ manifest_base_url = base_url + 'manifest/video.'
formats = self._extract_m3u8_formats(
- 'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
- video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False)
+ manifest_base_url + 'm3u8', video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
formats.extend(self._extract_mpd_formats(
- 'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
- video_id, mpd_id='dash', fatal=False))
+ manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
return {
'id': video_id,
'title': video_id,
+ 'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
'formats': formats,
}
diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py
index d354d9f95..6889b0f40 100644
--- a/youtube_dl/extractor/cnbc.py
+++ b/youtube_dl/extractor/cnbc.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+
from .common import InfoExtractor
from ..utils import smuggle_url
@@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
{'force_smil_url': True}),
'id': video_id,
}
+
+
+class CNBCVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
+ _TEST = {
+ 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
+ 'info_dict': {
+ 'id': '7000031301',
+ 'ext': 'mp4',
+ 'title': "Trump: I don't necessarily agree with raising rates",
+ 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
+ 'timestamp': 1531958400,
+ 'upload_date': '20180719',
+ 'uploader': 'NBCU-CNBC',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
+ 'video id')
+ return self.url_result(
+ 'http://video.cnbc.com/gallery/?video=%s' % video_id,
+ CNBCIE.ie_key())
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 5fc311f53..774b71055 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -119,11 +119,7 @@ class CNNBlogsIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
- return {
- '_type': 'url',
- 'url': cnn_url,
- 'ie_key': CNNIE.ie_key(),
- }
+ return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
@@ -145,8 +141,4 @@ class CNNArticleIE(InfoExtractor):
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
- return {
- '_type': 'url',
- 'url': 'http://cnn.com/video/?/video/' + cnn_url,
- 'ie_key': CNNIE.ie_key(),
- }
+ return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
deleted file mode 100644
index 588aad0d9..000000000
--- a/youtube_dl/extractor/comcarcoff.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_iso8601,
-)
-
-
-class ComCarCoffIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
- _TESTS = [{
- 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
- 'info_dict': {
- 'id': '2494164',
- 'ext': 'mp4',
- 'upload_date': '20141127',
- 'timestamp': 1417107600,
- 'duration': 1232,
- 'title': 'Happy Thanksgiving Miranda',
- 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
- },
- 'params': {
- 'skip_download': 'requires ffmpeg',
- }
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- if not display_id:
- display_id = 'comediansincarsgettingcoffee.com'
- webpage = self._download_webpage(url, display_id)
-
- full_data = self._parse_json(
- self._search_regex(
- r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
- display_id)['videoData']
-
- display_id = full_data['activeVideo']['video']
- video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
-
- video_id = compat_str(video_data['mediaId'])
- title = video_data['title']
- formats = self._extract_m3u8_formats(
- video_data['mediaUrl'], video_id, 'mp4')
- self._sort_formats(formats)
-
- thumbnails = [{
- 'url': video_data['images']['thumb'],
- }, {
- 'url': video_data['images']['poster'],
- }]
-
- timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
- video_data.get('pubDate'))
- duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
- video_data.get('duration'))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'timestamp': timestamp,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'season_number': int_or_none(video_data.get('season')),
- 'episode_number': int_or_none(video_data.get('episode')),
- 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
- }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2dbf81e6e..a61753b17 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -15,8 +15,9 @@ import time
import math
from ..compat import (
- compat_cookiejar,
+ compat_cookiejar_Cookie,
compat_cookies,
+ compat_etree_Element,
compat_etree_fromstring,
compat_getpass,
compat_integer_types,
@@ -43,6 +44,7 @@ from ..utils import (
compiled_regex_type,
determine_ext,
determine_protocol,
+ dict_get,
error_to_compat_str,
ExtractorError,
extract_attributes,
@@ -55,13 +57,17 @@ from ..utils import (
JSON_LD_RE,
mimetype2ext,
orderedSet,
+ parse_bitrate,
parse_codecs,
parse_duration,
parse_iso8601,
parse_m3u8_attributes,
+ parse_resolution,
RegexNotFoundError,
sanitized_Request,
sanitize_filename,
+ str_or_none,
+ strip_or_none,
unescapeHTML,
unified_strdate,
unified_timestamp,
@@ -69,6 +75,7 @@ from ..utils import (
update_url_query,
urljoin,
url_basename,
+ url_or_none,
xpath_element,
xpath_text,
xpath_with_ns,
@@ -101,10 +108,26 @@ class InfoExtractor(object):
from worst to best quality.
Potential fields:
- * url Mandatory. The URL of the video file
+ * url The mandatory URL representing the media:
+ for plain file media - HTTP URL of this file,
+ for RTMP - RTMP URL,
+ for HLS - URL of the M3U8 media playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH
+ - HTTP URL to plain file media (in case of
+ unfragmented media)
+ - URL of the MPD manifest or base URL
+ representing the media if MPD manifest
+ is parsed from a string (in case of
+ fragmented media)
+ for MSS - URL of the ISM manifest.
* manifest_url
The URL of the manifest file in case of
- fragmented media (DASH, hls, hds)
+ fragmented media:
+ for HLS - URL of the M3U8 master playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH - URL of the MPD manifest,
+ for MSS - URL of the ISM manifest.
* ext Will be calculated from URL if missing
* format A human-readable description of the format
("mp4 container with h264/opus").
@@ -197,7 +220,7 @@ class InfoExtractor(object):
* "preference" (optional, int) - quality of the image
* "width" (optional, int)
* "height" (optional, int)
- * "resolution" (optional, string "{width}x{height"},
+ * "resolution" (optional, string "{width}x{height}",
deprecated)
* "filesize" (optional, int)
thumbnail: Full URL to a video thumbnail image.
@@ -520,11 +543,11 @@ class InfoExtractor(object):
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
- if (not self._downloader.params.get('geo_bypass_country', None) and
- self._GEO_BYPASS and
- self._downloader.params.get('geo_bypass', True) and
- not self._x_forwarded_for_ip and
- countries):
+ if (not self._downloader.params.get('geo_bypass_country', None)
+ and self._GEO_BYPASS
+ and self._downloader.params.get('geo_bypass', True)
+ and not self._x_forwarded_for_ip
+ and countries):
country_code = random.choice(countries)
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._x_forwarded_for_ip:
@@ -605,6 +628,11 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
+ # Retain reference to error to prevent file object from
+ # being closed before it can be read. Works around the
+ # effects of <https://bugs.python.org/issue15002>
+ # introduced in Python 3.4.1.
+ err.fp._error = err
return err.fp
if errnote is False:
@@ -655,8 +683,8 @@ class InfoExtractor(object):
def __check_blocked(self, content):
first_block = content[:512]
- if ('<title>Access to this site is blocked</title>' in content and
- 'Websense' in first_block):
+ if ('<title>Access to this site is blocked</title>' in content
+ and 'Websense' in first_block):
msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
blocked_iframe = self._html_search_regex(
r'<iframe src="([^"]+)"', content,
@@ -674,8 +702,8 @@ class InfoExtractor(object):
if block_msg:
msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
raise ExtractorError(msg, expected=True)
- if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content and
- 'blocklist.rkn.gov.ru' in content):
+ if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
+ and 'blocklist.rkn.gov.ru' in content):
raise ExtractorError(
'Access to this webpage has been blocked by decision of the Russian government. '
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
@@ -782,7 +810,7 @@ class InfoExtractor(object):
fatal=True, encoding=None, data=None, headers={}, query={},
expected_status=None):
"""
- Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
+ Return a tuple (xml as an compat_etree_Element, URL handle).
See _download_webpage docstring for arguments specification.
"""
@@ -803,7 +831,7 @@ class InfoExtractor(object):
transform_source=None, fatal=True, encoding=None,
data=None, headers={}, query={}, expected_status=None):
"""
- Return the xml as an xml.etree.ElementTree.Element.
+ Return the xml as an compat_etree_Element.
See _download_webpage docstring for arguments specification.
"""
@@ -1052,7 +1080,7 @@ class InfoExtractor(object):
@staticmethod
def _og_regexes(prop):
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
- property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
+ property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
% {'prop': re.escape(prop)})
template = r'<meta[^>]+?%s[^>]+?%s'
return [
@@ -1154,16 +1182,33 @@ class InfoExtractor(object):
'twitter card player')
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
- json_ld = self._search_regex(
- JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
+ json_ld_list = list(re.finditer(JSON_LD_RE, html))
default = kwargs.get('default', NO_DEFAULT)
- if not json_ld:
- return default if default is not NO_DEFAULT else {}
# JSON-LD may be malformed and thus `fatal` should be respected.
# At the same time `default` may be passed that assumes `fatal=False`
# for _search_regex. Let's simulate the same behavior here as well.
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
- return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
+ json_ld = []
+ for mobj in json_ld_list:
+ json_ld_item = self._parse_json(
+ mobj.group('json_ld'), video_id, fatal=fatal)
+ if not json_ld_item:
+ continue
+ if isinstance(json_ld_item, dict):
+ json_ld.append(json_ld_item)
+ elif isinstance(json_ld_item, (list, tuple)):
+ json_ld.extend(json_ld_item)
+ if json_ld:
+ json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
+ if json_ld:
+ return json_ld
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract JSON-LD')
+ else:
+ self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+ return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
if isinstance(json_ld, compat_str):
@@ -1213,10 +1258,10 @@ class InfoExtractor(object):
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
info.update({
- 'url': e.get('contentUrl'),
+ 'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
- 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+ 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
'filesize': float_or_none(e.get('contentSize')),
@@ -1228,22 +1273,35 @@ class InfoExtractor(object):
extract_interaction_statistic(e)
for e in json_ld:
- if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
+ if '@context' in e:
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
- return info
+ continue
if item_type in ('TVEpisode', 'Episode'):
+ episode_name = unescapeHTML(e.get('name'))
info.update({
- 'episode': unescapeHTML(e.get('name')),
+ 'episode': episode_name,
'episode_number': int_or_none(e.get('episodeNumber')),
'description': unescapeHTML(e.get('description')),
})
+ if not info.get('title') and episode_name:
+ info['title'] = episode_name
part_of_season = e.get('partOfSeason')
if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
- info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
+ info.update({
+ 'season': unescapeHTML(part_of_season.get('name')),
+ 'season_number': int_or_none(part_of_season.get('seasonNumber')),
+ })
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
+ elif item_type == 'Movie':
+ info.update({
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('dateCreated')),
+ })
elif item_type in ('Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
@@ -1252,11 +1310,17 @@ class InfoExtractor(object):
})
elif item_type == 'VideoObject':
extract_video_object(e)
- continue
+ if expected_type is None:
+ continue
+ else:
+ break
video = e.get('video')
if isinstance(video, dict) and video.get('@type') == 'VideoObject':
extract_video_object(video)
- break
+ if expected_type is None:
+ continue
+ else:
+ break
return dict((k, v) for k, v in info.items() if v is not None)
@staticmethod
@@ -1383,12 +1447,10 @@ class InfoExtractor(object):
try:
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
return True
- except ExtractorError as e:
- if isinstance(e.cause, compat_urllib_error.URLError):
- self.to_screen(
- '%s: %s URL is invalid, skipping' % (video_id, item))
- return False
- raise
+ except ExtractorError:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, item))
+ return False
def http_scheme(self):
""" Either "http:" or "https:", depending on the user's preferences """
@@ -1416,14 +1478,14 @@ class InfoExtractor(object):
def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
- fatal=True, m3u8_id=None):
+ fatal=True, m3u8_id=None, data=None, headers={}, query={}):
manifest = self._download_xml(
manifest_url, video_id, 'Downloading f4m manifest',
'Unable to download f4m manifest',
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244)
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
transform_source=transform_source,
- fatal=fatal)
+ fatal=fatal, data=data, headers=headers, query=query)
if manifest is False:
return []
@@ -1435,6 +1497,9 @@ class InfoExtractor(object):
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True, m3u8_id=None):
+ if not isinstance(manifest, compat_etree_Element) and not fatal:
+ return []
+
# currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
if akamai_pv is not None and ';' in akamai_pv.text:
@@ -1449,7 +1514,7 @@ class InfoExtractor(object):
manifest_version = '2.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
# Remove unsupported DRM protected media from final formats
- # rendition (see https://github.com/rg3/youtube-dl/issues/8573).
+ # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
@@ -1544,12 +1609,13 @@ class InfoExtractor(object):
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None,
- fatal=True, live=False):
+ fatal=True, live=False, data=None, headers={},
+ query={}):
res = self._download_webpage_handle(
m3u8_url, video_id,
note=note or 'Downloading m3u8 information',
errnote=errnote or 'Failed to download m3u8 information',
- fatal=fatal)
+ fatal=fatal, data=data, headers=headers, query=query)
if res is False:
return []
@@ -1579,7 +1645,8 @@ class InfoExtractor(object):
# References:
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
- # 2. https://github.com/rg3/youtube-dl/issues/12211
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
+ # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
# We should try extracting formats only from master playlists [1, 4.3.4],
# i.e. playlists that describe available qualities. On the other hand
@@ -1651,17 +1718,22 @@ class InfoExtractor(object):
rendition = stream_group[0]
return rendition.get('NAME') or stream_group_id
+ # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
+ # chance to detect video only formats when EXT-X-STREAM-INF tags
+ # precede EXT-X-MEDIA tags in HLS manifest such as [3].
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-MEDIA:'):
+ extract_media(line)
+
for line in m3u8_doc.splitlines():
if line.startswith('#EXT-X-STREAM-INF:'):
last_stream_inf = parse_m3u8_attributes(line)
- elif line.startswith('#EXT-X-MEDIA:'):
- extract_media(line)
elif line.startswith('#') or not line.strip():
continue
else:
tbr = float_or_none(
- last_stream_inf.get('AVERAGE-BANDWIDTH') or
- last_stream_inf.get('BANDWIDTH'), scale=1000)
+ last_stream_inf.get('AVERAGE-BANDWIDTH')
+ or last_stream_inf.get('BANDWIDTH'), scale=1000)
format_id = []
if m3u8_id:
format_id.append(m3u8_id)
@@ -1717,6 +1789,19 @@ class InfoExtractor(object):
# the same GROUP-ID
f['acodec'] = 'none'
formats.append(f)
+
+ # for DailyMotion
+ progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
+ if progressive_uri:
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': progressive_uri,
+ })
+ formats.append(http_f)
+
last_stream_inf = {}
return formats
@@ -1961,15 +2046,17 @@ class InfoExtractor(object):
})
return entries
- def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
+ def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
res = self._download_xml_handle(
mpd_url, video_id,
note=note or 'Downloading MPD manifest',
errnote=errnote or 'Failed to download MPD manifest',
- fatal=fatal)
+ fatal=fatal, data=data, headers=headers, query=query)
if res is False:
return []
mpd_doc, urlh = res
+ if mpd_doc is None:
+ return []
mpd_base_url = base_url(urlh.geturl())
return self._parse_mpd_formats(
@@ -2095,7 +2182,6 @@ class InfoExtractor(object):
bandwidth = int_or_none(representation_attrib.get('bandwidth'))
f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
- 'url': base_url,
'manifest_url': mpd_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')),
@@ -2116,7 +2202,7 @@ class InfoExtractor(object):
# First of, % characters outside $...$ templates
# must be escaped by doubling for proper processing
# by % operator string formatting used further (see
- # https://github.com/rg3/youtube-dl/issues/16867).
+ # https://github.com/ytdl-org/youtube-dl/issues/16867).
t = ''
in_template = False
for c in tmpl:
@@ -2135,7 +2221,7 @@ class InfoExtractor(object):
# @initialization is a regular template like @media one
# so it should be handled just the same way (see
- # https://github.com/rg3/youtube-dl/issues/11605)
+ # https://github.com/ytdl-org/youtube-dl/issues/11605)
if 'initialization' in representation_ms_info:
initialization_template = prepare_template(
'initialization',
@@ -2221,7 +2307,7 @@ class InfoExtractor(object):
elif 'segment_urls' in representation_ms_info:
# Segment URLs with no SegmentTimeline
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
- # https://github.com/rg3/youtube-dl/pull/14844
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
fragments = []
segment_duration = float_or_none(
representation_ms_info['segment_duration'],
@@ -2234,10 +2320,14 @@ class InfoExtractor(object):
fragment['duration'] = segment_duration
fragments.append(fragment)
representation_ms_info['fragments'] = fragments
- # NB: MPD manifest may contain direct URLs to unfragmented media.
- # No fragments key is present in this case.
+ # If there is a fragments key available then we correctly recognized fragmented media.
+ # Otherwise we will assume unfragmented media with direct access. Technically, such
+ # assumption is not necessarily correct since we may simply have no support for
+ # some forms of fragmented media renditions yet, but for now we'll use this fallback.
if 'fragments' in representation_ms_info:
f.update({
+ # NB: mpd_url may be empty when MPD manifest is parsed from a string
+ 'url': mpd_url or base_url,
'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
@@ -2248,11 +2338,15 @@ class InfoExtractor(object):
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
+ else:
+ # Assuming direct URL to unfragmented media.
+ f['url'] = base_url
+
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
# is not necessarily unique within a Period thus formats with
# the same `format_id` are quite possible. There are numerous examples
- # of such manifests (see https://github.com/rg3/youtube-dl/issues/15111,
- # https://github.com/rg3/youtube-dl/issues/13919)
+ # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
+ # https://github.com/ytdl-org/youtube-dl/issues/13919)
full_info = formats_dict.get(representation_id, {}).copy()
full_info.update(f)
formats.append(full_info)
@@ -2260,15 +2354,17 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
- def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
+ def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
res = self._download_xml_handle(
ism_url, video_id,
note=note or 'Downloading ISM manifest',
errnote=errnote or 'Failed to download ISM manifest',
- fatal=fatal)
+ fatal=fatal, data=data, headers=headers, query=query)
if res is False:
return []
ism_doc, urlh = res
+ if ism_doc is None:
+ return []
return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
@@ -2413,7 +2509,7 @@ class InfoExtractor(object):
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
- # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+ # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
# http://www.porntrex.com/maps/videositemap.xml).
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
for media_tag, media_type, media_content in media_tags:
@@ -2422,25 +2518,50 @@ class InfoExtractor(object):
'subtitles': {},
}
media_attributes = extract_attributes(media_tag)
- src = media_attributes.get('src')
+ src = strip_or_none(media_attributes.get('src'))
if src:
_, formats = _media_formats(src, media_type)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
for source_tag in re.findall(r'<source[^>]+>', media_content):
- source_attributes = extract_attributes(source_tag)
- src = source_attributes.get('src')
+ s_attr = extract_attributes(source_tag)
+ # data-video-src and data-src are non standard but seen
+ # several times in the wild
+ src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
if not src:
continue
- f = parse_content_type(source_attributes.get('type'))
+ f = parse_content_type(s_attr.get('type'))
is_plain_url, formats = _media_formats(src, media_type, f)
if is_plain_url:
- # res attribute is not standard but seen several times
- # in the wild
+ # width, height, res, label and title attributes are
+ # all not standard but seen several times in the wild
+ labels = [
+ s_attr.get(lbl)
+ for lbl in ('label', 'title')
+ if str_or_none(s_attr.get(lbl))
+ ]
+ width = int_or_none(s_attr.get('width'))
+ height = (int_or_none(s_attr.get('height'))
+ or int_or_none(s_attr.get('res')))
+ if not width or not height:
+ for lbl in labels:
+ resolution = parse_resolution(lbl)
+ if not resolution:
+ continue
+ width = width or resolution.get('width')
+ height = height or resolution.get('height')
+ for lbl in labels:
+ tbr = parse_bitrate(lbl)
+ if tbr:
+ break
+ else:
+ tbr = None
f.update({
- 'height': int_or_none(source_attributes.get('res')),
- 'format_id': source_attributes.get('label'),
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'format_id': s_attr.get('label') or s_attr.get('title'),
})
f.update(formats[0])
media_info['formats'].append(f)
@@ -2450,7 +2571,7 @@ class InfoExtractor(object):
track_attributes = extract_attributes(track_tag)
kind = track_attributes.get('kind')
if not kind or kind in ('subtitles', 'captions'):
- src = track_attributes.get('src')
+ src = strip_or_none(track_attributes.get('src'))
if not src:
continue
lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
@@ -2607,8 +2728,8 @@ class InfoExtractor(object):
entry = {
'id': this_video_id,
'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
- 'description': video_data.get('description'),
- 'thumbnail': self._proto_relative_url(video_data.get('image')),
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
'timestamp': int_or_none(video_data.get('pubdate')),
'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
'subtitles': subtitles,
@@ -2635,12 +2756,9 @@ class InfoExtractor(object):
for source in jwplayer_sources_data:
if not isinstance(source, dict):
continue
- source_url = self._proto_relative_url(source.get('file'))
- if not source_url:
- continue
- if base_url:
- source_url = compat_urlparse.urljoin(base_url, source_url)
- if source_url in urls:
+ source_url = urljoin(
+ base_url, self._proto_relative_url(source.get('file')))
+ if not source_url or source_url in urls:
continue
urls.append(source_url)
source_type = source.get('type') or ''
@@ -2725,7 +2843,7 @@ class InfoExtractor(object):
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs):
- cookie = compat_cookiejar.Cookie(
+ cookie = compat_cookiejar_Cookie(
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
@@ -2737,6 +2855,33 @@ class InfoExtractor(object):
self._downloader.cookiejar.add_cookie_header(req)
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+ def _apply_first_set_cookie_header(self, url_handle, cookie):
+ """
+ Apply first Set-Cookie header instead of the last. Experimental.
+
+ Some sites (e.g. [1-3]) may serve two cookies under the same name
+ in Set-Cookie header and expect the first (old) one to be set rather
+ than second (new). However, as of RFC6265 the newer one cookie
+ should be set into cookie store what actually happens.
+ We will workaround this issue by resetting the cookie to
+ the first one manually.
+ 1. https://new.vk.com/
+ 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
+ 3. https://learning.oreilly.com/
+ """
+ for header, cookies in url_handle.headers.items():
+ if header.lower() != 'set-cookie':
+ continue
+ if sys.version_info[0] >= 3:
+ cookies = cookies.encode('iso-8859-1')
+ cookies = cookies.decode('utf-8')
+ cookie_value = re.search(
+ r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+ if cookie_value:
+ value, domain = cookie_value.groups()
+ self._set_cookie(domain, cookie, value)
+ break
+
def get_testcases(self, include_onlymatching=False):
t = getattr(self, '_TEST', None)
if t:
@@ -2767,8 +2912,8 @@ class InfoExtractor(object):
return not any_restricted
def extract_subtitles(self, *args, **kwargs):
- if (self._downloader.params.get('writesubtitles', False) or
- self._downloader.params.get('listsubtitles')):
+ if (self._downloader.params.get('writesubtitles', False)
+ or self._downloader.params.get('listsubtitles')):
return self._get_subtitles(*args, **kwargs)
return {}
@@ -2793,8 +2938,8 @@ class InfoExtractor(object):
return ret
def extract_automatic_captions(self, *args, **kwargs):
- if (self._downloader.params.get('writeautomaticsub', False) or
- self._downloader.params.get('listsubtitles')):
+ if (self._downloader.params.get('writeautomaticsub', False)
+ or self._downloader.params.get('listsubtitles')):
return self._get_automatic_captions(*args, **kwargs)
return {}
@@ -2802,9 +2947,9 @@ class InfoExtractor(object):
raise NotImplementedError('This method must be implemented by subclasses')
def mark_watched(self, *args, **kwargs):
- if (self._downloader.params.get('mark_watched', False) and
- (self._get_login_info()[0] is not None or
- self._downloader.params.get('cookiefile') is not None)):
+ if (self._downloader.params.get('mark_watched', False)
+ and (self._get_login_info()[0] is not None
+ or self._downloader.params.get('cookiefile') is not None)):
self._mark_watched(*args, **kwargs)
def _mark_watched(self, *args, **kwargs):
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
index 79f7a9cd1..7e12499b1 100644
--- a/youtube_dl/extractor/commonmistakes.py
+++ b/youtube_dl/extractor/commonmistakes.py
@@ -32,19 +32,19 @@ class CommonMistakesIE(InfoExtractor):
class UnicodeBOMIE(InfoExtractor):
- IE_DESC = False
- _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
-
- # Disable test for python 3.2 since BOM is broken in re in this version
- # (see https://github.com/rg3/youtube-dl/issues/9751)
- _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
- 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- real_url = self._match_id(url)
- self.report_warning(
- 'Your URL starts with a Byte Order Mark (BOM). '
- 'Removing the BOM and looking for "%s" ...' % real_url)
- return self.url_result(real_url)
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ # Disable test for python 3.2 since BOM is broken in re in this version
+ # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
+ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
diff --git a/youtube_dl/extractor/contv.py b/youtube_dl/extractor/contv.py
new file mode 100644
index 000000000..84b462d40
--- /dev/null
+++ b/youtube_dl/extractor/contv.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class CONtvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
+ 'info_dict': {
+ 'id': 'CEG10022949',
+ 'ext': 'mp4',
+ 'title': 'Days Of Thrills & Laughter',
+ 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
+ 'upload_date': '20180703',
+ 'timestamp': 1530634789.61,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
+ 'info_dict': {
+ 'id': 'CLIP-show_fotld_bts',
+ 'title': 'Fight of the Living Dead: Behind the Scenes Bites',
+ },
+ 'playlist_mincount': 7,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ details = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
+ video_id, query={'device': 'web'})
+
+ if details.get('type') == 'episodic':
+ seasons = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
+ video_id)
+ entries = []
+ for season in seasons:
+ for episode in season.get('episodes', []):
+ episode_id = episode.get('id')
+ if not episode_id:
+ continue
+ entries.append(self.url_result(
+ 'https://www.contv.com/details-movie/' + episode_id,
+ CONtvIE.ie_key(), episode_id))
+ return self.playlist_result(entries, video_id, details.get('title'))
+
+ m_details = details['details']
+ title = details['title']
+
+ formats = []
+
+ media_hls_url = m_details.get('media_hls_url')
+ if media_hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_hls_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+
+ media_mp4_url = m_details.get('media_mp4_url')
+ if media_mp4_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': media_mp4_url,
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ captions = m_details.get('captions') or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url
+ })
+
+ thumbnails = []
+ for image in m_details.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ description = None
+ for p in ('large_', 'medium_', 'small_', ''):
+ d = m_details.get(p + 'description')
+ if d:
+ description = d
+ break
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'timestamp': float_or_none(details.get('metax_added_on'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(m_details.get('duration'), 1000),
+ 'view_count': int_or_none(details.get('num_watched')),
+ 'like_count': int_or_none(details.get('num_fav')),
+ 'categories': details.get('category'),
+ 'tags': details.get('tags'),
+ 'season_number': int_or_none(details.get('season')),
+ 'episode_number': int_or_none(details.get('episode')),
+ 'release_year': int_or_none(details.get('pub_year')),
+ }
diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py
index 807a29eea..e11aadf14 100644
--- a/youtube_dl/extractor/corus.py
+++ b/youtube_dl/extractor/corus.py
@@ -4,7 +4,12 @@ from __future__ import unicode_literals
import re
from .theplatform import ThePlatformFeedIE
-from ..utils import int_or_none
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+)
class CorusIE(ThePlatformFeedIE):
@@ -12,24 +17,49 @@ class CorusIE(ThePlatformFeedIE):
https?://
(?:www\.)?
(?P<domain>
- (?:globaltv|etcanada)\.com|
- (?:hgtv|foodnetwork|slice|history|showcase)\.ca
+ (?:
+ globaltv|
+ etcanada|
+ seriesplus|
+ wnetwork|
+ ytv
+ )\.com|
+ (?:
+ hgtv|
+ foodnetwork|
+ slice|
+ history|
+ showcase|
+ bigbrothercanada|
+ abcspark|
+ disney(?:channel|lachaine)
+ )\.ca
+ )
+ /(?:[^/]+/)*
+ (?:
+ video\.html\?.*?\bv=|
+ videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
+ )
+ (?P<id>
+ [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
+ (?:[A-Z]{4})?\d{12,20}
)
- /(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
- (?P<id>\d+)
'''
_TESTS = [{
'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
- 'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
'info_dict': {
'id': '870923331648',
'ext': 'mp4',
'title': 'Movie Night Popcorn with Bryan',
'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
- 'uploader': 'SHWM-NEW',
'upload_date': '20170206',
'timestamp': 1486392197,
},
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON'],
}, {
'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
'only_matching': True,
@@ -42,54 +72,89 @@ class CorusIE(ThePlatformFeedIE):
}, {
'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
'only_matching': True,
+ }, {
+ 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
+ 'only_matching': True
}]
-
- _TP_FEEDS = {
- 'globaltv': {
- 'feed_id': 'ChQqrem0lNUp',
- 'account_id': 2269680845,
- },
- 'etcanada': {
- 'feed_id': 'ChQqrem0lNUp',
- 'account_id': 2269680845,
- },
- 'hgtv': {
- 'feed_id': 'L0BMHXi2no43',
- 'account_id': 2414428465,
- },
- 'foodnetwork': {
- 'feed_id': 'ukK8o58zbRmJ',
- 'account_id': 2414429569,
- },
- 'slice': {
- 'feed_id': '5tUJLgV2YNJ5',
- 'account_id': 2414427935,
- },
- 'history': {
- 'feed_id': 'tQFx_TyyEq4J',
- 'account_id': 2369613659,
- },
- 'showcase': {
- 'feed_id': '9H6qyshBZU3E',
- 'account_id': 2414426607,
- },
+ _GEO_BYPASS = False
+ _SITE_MAP = {
+ 'globaltv': 'series',
+ 'etcanada': 'series',
+ 'foodnetwork': 'food',
+ 'bigbrothercanada': 'series',
+ 'disneychannel': 'disneyen',
+ 'disneylachaine': 'disneyfr',
}
def _real_extract(self, url):
domain, video_id = re.match(self._VALID_URL, url).groups()
- feed_info = self._TP_FEEDS[domain.split('.')[0]]
- return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
- 'episode_number': int_or_none(e.get('pl1$episode')),
- 'season_number': int_or_none(e.get('pl1$season')),
- 'series': e.get('pl1$show'),
- }, {
- 'HLS': {
- 'manifest': 'm3u',
- },
- 'DesktopHLS Default': {
- 'manifest': 'm3u',
- },
- 'MP4 MBR': {
- 'manifest': 'm3u',
- },
- }, feed_info['account_id'])
+ site = domain.split('.')[0]
+ path = self._SITE_MAP.get(site, site)
+ if path != 'series':
+ path = 'migration/' + path
+ video = self._download_json(
+ 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
+ video_id, query={'byId': video_id},
+ headers={'Accept': 'application/json'})[0]
+ title = video['title']
+
+ formats = []
+ for source in video.get('sources', []):
+ smil_url = source.get('file')
+ if not smil_url:
+ continue
+ source_type = source.get('type')
+ note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
+ resp = self._download_webpage(
+ smil_url, video_id, note, fatal=False,
+ headers=self.geo_verification_headers())
+ if not resp:
+ continue
+ error = self._parse_json(resp, video_id, fatal=False)
+ if error:
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['CA'])
+ raise ExtractorError(error['description'])
+ smil = self._parse_xml(resp, video_id, fatal=False)
+ if smil is None:
+ continue
+ namespace = self._parse_smil_namespace(smil)
+ formats.extend(self._parse_smil_formats(
+ smil, smil_url, video_id, namespace))
+ if not formats and video.get('drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for track in video.get('tracks', []):
+ track_url = track.get('file')
+ if not track_url:
+ continue
+ lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
+ subtitles.setdefault(lang, []).append({'url': track_url})
+
+ metadata = video.get('metadata') or {}
+ get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('availableDate'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(metadata.get('duration')),
+ 'series': dict_get(video, ('show', 'pl1$show')),
+ 'season_number': get_number('season'),
+ 'episode_number': get_number('episode'),
+ }
diff --git a/youtube_dl/extractor/crackle.py b/youtube_dl/extractor/crackle.py
index 8dd9d6687..49bf3a4f9 100644
--- a/youtube_dl/extractor/crackle.py
+++ b/youtube_dl/extractor/crackle.py
@@ -1,7 +1,10 @@
# coding: utf-8
from __future__ import unicode_literals, division
+import hashlib
+import hmac
import re
+import time
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -48,6 +51,21 @@ class CrackleIE(InfoExtractor):
'only_matching': True,
}]
+ _MEDIA_FILE_SLOTS = {
+ '360p.mp4': {
+ 'width': 640,
+ 'height': 360,
+ },
+ '480p.mp4': {
+ 'width': 768,
+ 'height': 432,
+ },
+ '480p_1mbps.mp4': {
+ 'width': 852,
+ 'height': 480,
+ },
+ }
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -59,13 +77,16 @@ class CrackleIE(InfoExtractor):
for country in countries:
try:
+ # Authorization generation algorithm is reverse engineered from:
+ # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
+ media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
+ timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
+ h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
media = self._download_json(
- 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
- % (video_id, country), video_id,
- 'Downloading media JSON as %s' % country,
- 'Unable to download media JSON', query={
- 'disableProtocols': 'true',
- 'format': 'json'
+ media_detail_url, video_id, 'Downloading media JSON as %s' % country,
+ 'Unable to download media JSON', headers={
+ 'Accept': 'application/json',
+ 'Authorization': '|'.join([h, timestamp, '117', '1']),
})
except ExtractorError as e:
# 401 means geo restriction, trying next country
@@ -95,6 +116,20 @@ class CrackleIE(InfoExtractor):
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False))
+ elif format_url.endswith('.ism/Manifest'):
+ formats.extend(self._extract_ism_formats(
+ format_url, video_id, ism_id='mss', fatal=False))
+ else:
+ mfs_path = e.get('Type')
+ mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
+ if not mfs_info:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'format_id': 'http-' + mfs_path.split('.')[0],
+ 'width': mfs_info['width'],
+ 'height': mfs_info['height'],
+ })
self._sort_formats(formats)
description = media.get('Description')
diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py
deleted file mode 100644
index f7815b905..000000000
--- a/youtube_dl/extractor/criterion.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class CriterionIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
- _TEST = {
- 'url': 'http://www.criterion.com/films/184-le-samourai',
- 'md5': 'bc51beba55685509883a9a7830919ec3',
- 'info_dict': {
- 'id': '184',
- 'ext': 'mp4',
- 'title': 'Le Samouraï',
- 'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- final_url = self._search_regex(
- r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
- title = self._og_search_title(webpage)
- description = self._html_search_meta('description', webpage)
- thumbnail = self._search_regex(
- r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
- webpage, 'thumbnail url')
-
- return {
- 'id': video_id,
- 'url': final_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- }
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index af786d096..bc2d1fa8b 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -11,7 +11,9 @@ from .common import InfoExtractor
from .vrv import VRVIE
from ..compat import (
compat_b64decode,
+ compat_etree_Element,
compat_etree_fromstring,
+ compat_str,
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
@@ -24,9 +26,9 @@ from ..utils import (
intlist_to_bytes,
int_or_none,
lowercase_escape,
+ merge_dicts,
remove_end,
sanitized_Request,
- unified_strdate,
urlencode_postdata,
xpath_text,
)
@@ -45,7 +47,7 @@ class CrunchyrollBaseIE(InfoExtractor):
data['req'] = 'RpcApi' + method
data = compat_urllib_parse_urlencode(data).encode('utf-8')
return self._download_xml(
- 'http://www.crunchyroll.com/xml/',
+ 'https://www.crunchyroll.com/xml/',
video_id, note, fatal=False, data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded',
})
@@ -55,22 +57,11 @@ class CrunchyrollBaseIE(InfoExtractor):
if username is None:
return
- self._download_webpage(
- 'https://www.crunchyroll.com/?a=formhandler',
- None, 'Logging in', 'Wrong login info',
- data=urlencode_postdata({
- 'formname': 'RpcApiUser_Login',
- 'next_url': 'https://www.crunchyroll.com/acct/membership',
- 'name': username,
- 'password': password,
- }))
-
- '''
login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
def is_logged(webpage):
- return '<title>Redirecting' in webpage
+ return 'href="/logout"' in webpage
# Already logged in
if is_logged(login_page):
@@ -109,24 +100,10 @@ class CrunchyrollBaseIE(InfoExtractor):
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
- '''
def _real_initialize(self):
self._login()
- def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
- else sanitized_Request(url_or_request))
- # Accept-Language must be set explicitly to accept any language to avoid issues
- # similar to https://github.com/rg3/youtube-dl/issues/6797.
- # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
- # should be imposed or not (from what I can see it just takes the first language
- # ignoring the priority and requires it to correspond the IP). By the way this causes
- # Crunchyroll to not work in georestriction cases in some browsers that don't place
- # the locale lang first in header. However allowing any language seems to workaround the issue.
- request.add_header('Accept-Language', '*')
- return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-
@staticmethod
def _add_skip_wall(url):
parsed_url = compat_urlparse.urlparse(url)
@@ -135,7 +112,7 @@ class CrunchyrollBaseIE(InfoExtractor):
# > This content may be inappropriate for some people.
# > Are you sure you want to continue?
# since it's not disabled by default in crunchyroll account's settings.
- # See https://github.com/rg3/youtube-dl/issues/7202.
+ # See https://github.com/ytdl-org/youtube-dl/issues/7202.
qs['skip_wall'] = ['1']
return compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
@@ -143,7 +120,7 @@ class CrunchyrollBaseIE(InfoExtractor):
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
IE_NAME = 'crunchyroll'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@@ -160,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
# rtmp
'skip_download': True,
},
+ 'skip': 'Video gone',
}, {
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
'info_dict': {
@@ -181,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'info_dict': {
'id': '702409',
'ext': 'mp4',
- 'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
- 'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
+ 'title': compat_str,
+ 'description': compat_str,
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'TV TOKYO',
- 'upload_date': '20160508',
+ 'uploader': 'Re:Zero Partners',
+ 'timestamp': 1462098900,
+ 'upload_date': '20160501',
},
'params': {
# m3u8 download
@@ -196,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'info_dict': {
'id': '727589',
'ext': 'mp4',
- 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
- 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
+ 'title': compat_str,
+ 'description': compat_str,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Kadokawa Pictures Inc.',
- 'upload_date': '20170118',
- 'series': "KONOSUBA -God's blessing on this wonderful world!",
+ 'timestamp': 1484130900,
+ 'upload_date': '20170111',
+ 'series': compat_str,
'season': "KONOSUBA -God's blessing on this wonderful world! 2",
'season_number': 2,
'episode': 'Give Me Deliverance From This Judicial Injustice!',
@@ -224,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'info_dict': {
'id': '535080',
'ext': 'mp4',
- 'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
- 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
+ 'title': compat_str,
+ 'description': compat_str,
'uploader': 'Marvelous AQL Inc.',
- 'upload_date': '20091021',
+ 'timestamp': 1255512600,
+ 'upload_date': '20091014',
},
'params': {
# Just test metadata extraction
@@ -248,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
# just test metadata extraction
'skip_download': True,
},
+ 'skip': 'Video gone',
}, {
# A video with a vastly different season name compared to the series name
'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
'info_dict': {
'id': '590532',
'ext': 'mp4',
- 'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
- 'description': 'Mahiro and Nyaruko talk about official certification.',
+ 'title': compat_str,
+ 'description': compat_str,
'uploader': 'TV TOKYO',
+ 'timestamp': 1330956000,
'upload_date': '20120305',
'series': 'Nyarko-san: Another Crawling Chaos',
'season': 'Haiyoru! Nyaruani (ONA)',
@@ -268,6 +251,9 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
}, {
'url': 'http://www.crunchyroll.com/media-723735',
'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+ 'only_matching': True,
}]
_FORMAT_IDS = {
@@ -277,6 +263,19 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
'1080': ('80', '108'),
}
+ def _download_webpage(self, url_or_request, *args, **kwargs):
+ request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ else sanitized_Request(url_or_request))
+ # Accept-Language must be set explicitly to accept any language to avoid issues
+ # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
+ # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+ # should be imposed or not (from what I can see it just takes the first language
+ # ignoring the priority and requires it to correspond the IP). By the way this causes
+ # Crunchyroll to not work in georestriction cases in some browsers that don't place
+ # the locale lang first in header. However allowing any language seems to workaround the issue.
+ request.add_header('Accept-Language', '*')
+ return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
+
def _decrypt_subtitles(self, data, iv, id):
data = bytes_to_intlist(compat_b64decode(data))
iv = bytes_to_intlist(compat_b64decode(iv))
@@ -398,7 +397,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id,
})
- if sub_doc is None:
+ if not isinstance(sub_doc, compat_etree_Element):
continue
sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
@@ -450,23 +449,21 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
webpage, 'language', default=None, group='lang')
video_title = self._html_search_regex(
- r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
- webpage, 'video_title')
+ (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
+ r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
+ webpage, 'video_title', default=None)
+ if not video_title:
+ video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
video_title = re.sub(r' {2,}', ' ', video_title)
video_description = (self._parse_json(self._html_search_regex(
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
if video_description:
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
- video_upload_date = self._html_search_regex(
- [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
- webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
- if video_upload_date:
- video_upload_date = unified_strdate(video_upload_date)
video_uploader = self._html_search_regex(
# try looking for both an uploader that's a link and one that's not
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
- webpage, 'video_uploader', fatal=False)
+ webpage, 'video_uploader', default=False)
formats = []
for stream in media.get('streams', []):
@@ -515,7 +512,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_quality': stream_quality,
'current_page': url,
})
- if streamdata is not None:
+ if isinstance(streamdata, compat_etree_Element):
stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None:
stream_infos.append(stream_info)
@@ -526,7 +523,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'video_format': stream_format,
'video_encode_quality': stream_quality,
})
- if stream_info is not None:
+ if isinstance(stream_info, compat_etree_Element):
stream_infos.append(stream_info)
for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id')
@@ -598,23 +595,36 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
series = self._html_search_regex(
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False)
- season = xpath_text(metadata, 'series_title')
- episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
- episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
+ season = episode = episode_number = duration = thumbnail = None
+
+ if isinstance(metadata, compat_etree_Element):
+ season = xpath_text(metadata, 'series_title')
+ episode = xpath_text(metadata, 'episode_title')
+ episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+ duration = float_or_none(media_metadata.get('duration'), 1000)
+ thumbnail = xpath_text(metadata, 'episode_image_url')
+
+ if not episode:
+ episode = media_metadata.get('title')
+ if not episode_number:
+ episode_number = int_or_none(media_metadata.get('episode_number'))
+ if not thumbnail:
+ thumbnail = media_metadata.get('thumbnail', {}).get('url')
season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
webpage, 'season number', default=None))
- return {
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts({
'id': video_id,
'title': video_title,
'description': video_description,
- 'duration': float_or_none(media_metadata.get('duration'), 1000),
- 'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
+ 'duration': duration,
+ 'thumbnail': thumbnail,
'uploader': video_uploader,
- 'upload_date': video_upload_date,
'series': series,
'season': season,
'season_number': season_number,
@@ -622,7 +632,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'episode_number': episode_number,
'subtitles': subtitles,
'formats': formats,
- }
+ }, info)
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
@@ -657,9 +667,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
webpage = self._download_webpage(
self._add_skip_wall(url), show_id,
headers=self.geo_verification_headers())
- title = self._html_search_regex(
- r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
- webpage, 'title')
+ title = self._html_search_meta('name', webpage, default=None)
+
episode_paths = re.findall(
r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
webpage)
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dl/extractor/ctsnews.py
index d565335cf..679f1d92e 100644
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dl/extractor/ctsnews.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import unified_timestamp
+from .youtube import YoutubeIE
class CtsNewsIE(InfoExtractor):
@@ -14,8 +15,8 @@ class CtsNewsIE(InfoExtractor):
'info_dict': {
'id': '201501291578109',
'ext': 'mp4',
- 'title': '以色列.真主黨交火 3人死亡',
- 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人...',
+ 'title': '以色列.真主黨交火 3人死亡 - 華視新聞網',
+ 'description': '以色列和黎巴嫩真主黨,爆發五年最嚴重衝突,雙方砲轟交火,兩名以軍死亡,還有一名西班牙籍的聯合國維和人員也不幸罹難。大陸陝西、河南、安徽、江蘇和湖北五個省份出現大暴雪,嚴重影響陸空交通,不過九華山卻出現...',
'timestamp': 1422528540,
'upload_date': '20150129',
}
@@ -26,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
'info_dict': {
'id': '201309031304098',
'ext': 'mp4',
- 'title': '韓國31歲童顏男 貌如十多歲小孩',
+ 'title': '韓國31歲童顏男 貌如十多歲小孩 - 華視新聞網',
'description': '越有年紀的人,越希望看起來年輕一點,而南韓卻有一位31歲的男子,看起來像是11、12歲的小孩,身...',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1378205880,
@@ -62,8 +63,7 @@ class CtsNewsIE(InfoExtractor):
video_url = mp4_feed['source_url']
else:
self.to_screen('Not CTSPlayer video, trying Youtube...')
- youtube_url = self._search_regex(
- r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')
+ youtube_url = YoutubeIE._extract_url(page)
return self.url_result(youtube_url, ie='Youtube')
diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dl/extractor/curiositystream.py
index 35b1e7a34..e4a7fca6c 100644
--- a/youtube_dl/extractor/curiositystream.py
+++ b/youtube_dl/extractor/curiositystream.py
@@ -46,8 +46,24 @@ class CuriosityStreamBaseIE(InfoExtractor):
self._handle_errors(result)
self._auth_token = result['message']['auth_token']
- def _extract_media_info(self, media):
- video_id = compat_str(media['id'])
+
+class CuriosityStreamIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://app.curiositystream.com/video/2',
+ 'md5': '262bb2f257ff301115f1973540de8983',
+ 'info_dict': {
+ 'id': '2',
+ 'ext': 'mp4',
+ 'title': 'How Did You Develop The Internet?',
+ 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ media = self._call_api('media/' + video_id, video_id)
title = media['title']
formats = []
@@ -114,38 +130,21 @@ class CuriosityStreamBaseIE(InfoExtractor):
}
-class CuriosityStreamIE(CuriosityStreamBaseIE):
- IE_NAME = 'curiositystream'
- _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://app.curiositystream.com/video/2',
- 'md5': '262bb2f257ff301115f1973540de8983',
- 'info_dict': {
- 'id': '2',
- 'ext': 'mp4',
- 'title': 'How Did You Develop The Internet?',
- 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- media = self._call_api('media/' + video_id, video_id)
- return self._extract_media_info(media)
-
-
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream:collection'
- _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+ _TESTS = [{
'url': 'https://app.curiositystream.com/collection/2',
'info_dict': {
'id': '2',
'title': 'Curious Minds: The Internet',
'description': 'How is the internet shaping our lives in the 21st Century?',
},
- 'playlist_mincount': 12,
- }
+ 'playlist_mincount': 17,
+ }, {
+ 'url': 'https://curiositystream.com/series/2',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
collection_id = self._match_id(url)
@@ -153,7 +152,10 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
'collections/' + collection_id, collection_id)
entries = []
for media in collection.get('media', []):
- entries.append(self._extract_media_info(media))
+ media_id = compat_str(media.get('id'))
+ entries.append(self.url_result(
+ 'https://curiositystream.com/video/' + media_id,
+ CuriosityStreamIE.ie_key(), media_id))
return self.playlist_result(
entries, collection_id,
collection.get('title'), collection.get('description'))
diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dl/extractor/cwtv.py
index 224a1fb5d..73382431b 100644
--- a/youtube_dl/extractor/cwtv.py
+++ b/youtube_dl/extractor/cwtv.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
@@ -66,16 +67,19 @@ class CWTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- video_data = self._download_json(
+ data = self._download_json(
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
- video_id)['video']
+ video_id)
+ if data.get('result') != 'ok':
+ raise ExtractorError(data['msg'], expected=True)
+ video_data = data['video']
title = video_data['title']
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))
if episode and season:
- episode = episode.lstrip(season)
+ episode = episode[len(season):]
return {
'_type': 'url_transparent',
diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dl/extractor/dailymail.py
index af3978035..67b88fd56 100644
--- a/youtube_dl/extractor/dailymail.py
+++ b/youtube_dl/extractor/dailymail.py
@@ -45,10 +45,13 @@ class DailyMailIE(InfoExtractor):
sources_url = (try_get(
video_data,
(lambda x: x['plugins']['sources']['url'],
- lambda x: x['sources']['url']), compat_str) or
- 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
+ lambda x: x['sources']['url']), compat_str)
+ or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
video_sources = self._download_json(sources_url, video_id)
+ body = video_sources.get('body')
+ if body:
+ video_sources = body
formats = []
for rendition in video_sources['renditions']:
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 040f0bd02..b8529050c 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,61 +1,105 @@
# coding: utf-8
from __future__ import unicode_literals
-import base64
import functools
-import hashlib
-import itertools
import json
-import random
import re
-import string
from .common import InfoExtractor
-from ..compat import compat_struct_pack
+from ..compat import compat_HTTPError
from ..utils import (
- determine_ext,
- error_to_compat_str,
+ age_restricted,
+ clean_html,
ExtractorError,
int_or_none,
- mimetype2ext,
OnDemandPagedList,
- parse_iso8601,
- sanitized_Request,
- str_to_int,
+ try_get,
unescapeHTML,
urlencode_postdata,
)
class DailymotionBaseInfoExtractor(InfoExtractor):
- @staticmethod
- def _build_request(url):
- """Build a request with the family filter disabled"""
- request = sanitized_Request(url)
- request.add_header('Cookie', 'family_filter=off; ff=off')
- return request
+ _FAMILY_FILTER = None
+ _HEADERS = {
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://www.dailymotion.com',
+ }
+ _NETRC_MACHINE = 'dailymotion'
- def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
- request = self._build_request(url)
- return self._download_webpage_handle(request, *args, **kwargs)
+ def _get_dailymotion_cookies(self):
+ return self._get_cookies('https://www.dailymotion.com/')
- def _download_webpage_no_ff(self, url, *args, **kwargs):
- request = self._build_request(url)
- return self._download_webpage(request, *args, **kwargs)
+ @staticmethod
+ def _get_cookie_value(cookies, name):
+ cookie = cookies.get(name)
+ if cookie:
+ return cookie.value
+
+ def _set_dailymotion_cookie(self, name, value):
+ self._set_cookie('www.dailymotion.com', name, value)
+
+ def _real_initialize(self):
+ cookies = self._get_dailymotion_cookies()
+ ff = self._get_cookie_value(cookies, 'ff')
+ self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
+ self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
+
+ def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
+ if not self._HEADERS.get('Authorization'):
+ cookies = self._get_dailymotion_cookies()
+ token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
+ if not token:
+ data = {
+ 'client_id': 'f1a362d288c1b98099c7',
+ 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
+ }
+ username, password = self._get_login_info()
+ if username:
+ data.update({
+ 'grant_type': 'password',
+ 'password': password,
+ 'username': username,
+ })
+ else:
+ data['grant_type'] = 'client_credentials'
+ try:
+ token = self._download_json(
+ 'https://graphql.api.dailymotion.com/oauth/token',
+ None, 'Downloading Access Token',
+ data=urlencode_postdata(data))['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), xid)['error_description'], expected=True)
+ raise
+ self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
+ self._HEADERS['Authorization'] = 'Bearer ' + token
+
+ resp = self._download_json(
+ 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
+ 'query': '''{
+ %s(xid: "%s"%s) {
+ %s
+ }
+}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
+ }).encode(), headers=self._HEADERS)
+ obj = resp['data'][object_type]
+ if not obj:
+ raise ExtractorError(resp['errors'][0]['message'], expected=True)
+ return obj
class DailymotionIE(DailymotionBaseInfoExtractor):
- _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P<id>[^/?_]+)'
+ _VALID_URL = r'''(?ix)
+ https?://
+ (?:
+ (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
+ (?:www\.)?lequipe\.fr/video
+ )
+ /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
+ '''
IE_NAME = 'dailymotion'
-
- _FORMATS = [
- ('stream_h264_ld_url', 'ld'),
- ('stream_h264_url', 'standard'),
- ('stream_h264_hq_url', 'hq'),
- ('stream_h264_hd_url', 'hd'),
- ('stream_h264_hd1080_url', 'hd180'),
- ]
-
_TESTS = [{
'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
'md5': '074b95bdee76b9e3654137aee9c79dfe',
@@ -64,7 +108,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'ext': 'mp4',
'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
- 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
'duration': 187,
'timestamp': 1493651285,
'upload_date': '20170501',
@@ -130,259 +173,171 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, {
'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/x791mem',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
+ 'only_matching': True,
}]
+ _GEO_BYPASS = False
+ _COMMON_MEDIA_FIELDS = '''description
+ geoblockedCountries {
+ allowed
+ }
+ xid'''
@staticmethod
def _extract_urls(webpage):
+ urls = []
# Look for embedded Dailymotion player
- matches = re.findall(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
- return list(map(lambda m: unescapeHTML(m[1]), matches))
+ # https://developer.dailymotion.com/player#player-parameters
+ for mobj in re.finditer(
+ r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
+ urls.append(unescapeHTML(mobj.group('url')))
+ for mobj in re.finditer(
+ r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
+ urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
+ return urls
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage_no_ff(
- 'https://www.dailymotion.com/video/%s' % video_id, video_id)
-
- age_limit = self._rta_search(webpage)
-
- description = self._og_search_description(
- webpage, default=None) or self._html_search_meta(
- 'description', webpage, 'description')
-
- view_count_str = self._search_regex(
- (r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
- r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
- webpage, 'view count', default=None)
- if view_count_str:
- view_count_str = re.sub(r'\s', '', view_count_str)
- view_count = str_to_int(view_count_str)
- comment_count = int_or_none(self._search_regex(
- r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
- webpage, 'comment count', default=None))
-
- player_v5 = self._search_regex(
- [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
- r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
- r'buildPlayer\(({.+?})\);',
- r'var\s+config\s*=\s*({.+?});',
- # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
- r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
- webpage, 'player v5', default=None)
- if player_v5:
- player = self._parse_json(player_v5, video_id)
- metadata = player['metadata']
-
- if metadata.get('error', {}).get('type') == 'password_protected':
- password = self._downloader.params.get('videopassword')
- if password:
- r = int(metadata['id'][1:], 36)
- us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
- t = ''.join(random.choice(string.ascii_letters) for i in range(10))
- n = us64e(compat_struct_pack('I', r))
- i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
- metadata = self._download_json(
- 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
-
- self._check_error(metadata)
-
- formats = []
- for quality, media_list in metadata['qualities'].items():
- for media in media_list:
- media_url = media.get('url')
- if not media_url:
- continue
- type_ = media.get('type')
- if type_ == 'application/vnd.lumberjack.manifest':
- continue
- ext = mimetype2ext(type_) or determine_ext(media_url)
- if ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- media_url, video_id, 'mp4', preference=-1,
- m3u8_id='hls', fatal=False)
- for f in m3u8_formats:
- f['url'] = f['url'].split('#')[0]
- formats.append(f)
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
- else:
- f = {
- 'url': media_url,
- 'format_id': 'http-%s' % quality,
- 'ext': ext,
- }
- m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
- if m:
- f.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- formats.append(f)
- self._sort_formats(formats)
-
- title = metadata['title']
- duration = int_or_none(metadata.get('duration'))
- timestamp = int_or_none(metadata.get('created_time'))
- thumbnail = metadata.get('poster_url')
- uploader = metadata.get('owner', {}).get('screenname')
- uploader_id = metadata.get('owner', {}).get('id')
-
- subtitles = {}
- subtitles_data = metadata.get('subtitles', {}).get('data', {})
- if subtitles_data and isinstance(subtitles_data, dict):
- for subtitle_lang, subtitle in subtitles_data.items():
- subtitles[subtitle_lang] = [{
- 'ext': determine_ext(subtitle_url),
- 'url': subtitle_url,
- } for subtitle_url in subtitle.get('urls', [])]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'age_limit': age_limit,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- # vevo embed
- vevo_id = self._search_regex(
- r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
- webpage, 'vevo embed', default=None)
- if vevo_id:
- return self.url_result('vevo:%s' % vevo_id, 'Vevo')
-
- # fallback old player
- embed_page = self._download_webpage_no_ff(
- 'https://www.dailymotion.com/embed/video/%s' % video_id,
- video_id, 'Downloading embed page')
-
- timestamp = parse_iso8601(self._html_search_meta(
- 'video:release_date', webpage, 'upload date'))
-
- info = self._parse_json(
- self._search_regex(
- r'var info = ({.*?}),$', embed_page,
- 'video info', flags=re.MULTILINE),
- video_id)
-
- self._check_error(info)
+ video_id, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if playlist_id:
+ if not self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+ return self.url_result(
+ 'http://www.dailymotion.com/playlist/' + playlist_id,
+ 'DailymotionPlaylist', playlist_id)
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ password = self._downloader.params.get('videopassword')
+ media = self._call_api(
+ 'media', video_id, '''... on Video {
+ %s
+ stats {
+ likes {
+ total
+ }
+ views {
+ total
+ }
+ }
+ }
+ ... on Live {
+ %s
+ audienceCount
+ isOnAir
+ }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
+ 'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
+ xid = media['xid']
+
+ metadata = self._download_json(
+ 'https://www.dailymotion.com/player/metadata/video/' + xid,
+ xid, 'Downloading metadata JSON',
+ query={'app': 'com.dailymotion.neon'})
+
+ error = metadata.get('error')
+ if error:
+ title = error.get('title') or error['raw_message']
+ # See https://developer.dailymotion.com/api#access-error
+ if error.get('code') == 'DM007':
+ allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
+ self.raise_geo_restricted(msg=title, countries=allowed_countries)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, title), expected=True)
+ title = metadata['title']
+ is_live = media.get('isOnAir')
formats = []
- for (key, format_id) in self._FORMATS:
- video_url = info.get(key)
- if video_url is not None:
- m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
- if m_size is not None:
- width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
+ for quality, media_list in metadata['qualities'].items():
+ for m in media_list:
+ media_url = m.get('url')
+ media_type = m.get('type')
+ if not media_url or media_type == 'application/vnd.lumberjack.manifest':
+ continue
+ if media_type == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
else:
- width, height = None, None
- formats.append({
- 'url': video_url,
- 'ext': 'mp4',
- 'format_id': format_id,
- 'width': width,
- 'height': height,
- })
+ f = {
+ 'url': media_url,
+ 'format_id': 'http-' + quality,
+ }
+ m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
+ if m:
+ width, height, fps = map(int_or_none, m.groups())
+ f.update({
+ 'fps': fps,
+ 'height': height,
+ 'width': width,
+ })
+ formats.append(f)
+ for f in formats:
+ f['url'] = f['url'].split('#')[0]
+ if not f.get('fps') and f['format_id'].endswith('@60'):
+ f['fps'] = 60
self._sort_formats(formats)
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, webpage)
-
- title = self._og_search_title(webpage, default=None)
- if title is None:
- title = self._html_search_regex(
- r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
- 'title')
+ subtitles = {}
+ subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
+ for subtitle_lang, subtitle in subtitles_data.items():
+ subtitles[subtitle_lang] = [{
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
+
+ thumbnails = []
+ for height, poster_url in metadata.get('posters', {}).items():
+ thumbnails.append({
+ 'height': int_or_none(height),
+ 'id': height,
+ 'url': poster_url,
+ })
+
+ owner = metadata.get('owner') or {}
+ stats = media.get('stats') or {}
+ get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
return {
'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(media.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(metadata.get('duration')) or None,
+ 'timestamp': int_or_none(metadata.get('created_time')),
+ 'uploader': owner.get('screenname'),
+ 'uploader_id': owner.get('id') or metadata.get('screenname'),
+ 'age_limit': 18 if metadata.get('explicit') else 0,
+ 'tags': metadata.get('tags'),
+ 'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
+ 'like_count': get_count('like'),
'formats': formats,
- 'uploader': info['owner.screenname'],
- 'timestamp': timestamp,
- 'title': title,
- 'description': description,
- 'subtitles': video_subtitles,
- 'thumbnail': info['thumbnail_url'],
- 'age_limit': age_limit,
- 'view_count': view_count,
- 'duration': info['duration']
+ 'subtitles': subtitles,
+ 'is_live': is_live,
}
- def _check_error(self, info):
- error = info.get('error')
- if error:
- title = error.get('title') or error['message']
- # See https://developer.dailymotion.com/api#access-error
- if error.get('code') == 'DM007':
- self.raise_geo_restricted(msg=title)
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, title), expected=True)
-
- def _get_subtitles(self, video_id, webpage):
- try:
- sub_list = self._download_webpage(
- 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
- video_id, note=False)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
- return {}
- info = json.loads(sub_list)
- if (info['total'] > 0):
- sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
- return sub_lang_list
- self._downloader.report_warning('video doesn\'t have subtitles')
- return {}
-
-class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
- IE_NAME = 'dailymotion:playlist'
- _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
- _TESTS = [{
- 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
- 'info_dict': {
- 'title': 'SPORT',
- 'id': 'xv4bw',
- },
- 'playlist_mincount': 20,
- }]
+class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
_PAGE_SIZE = 100
- def _fetch_page(self, playlist_id, authorizaion, page):
+ def _fetch_page(self, playlist_id, page):
page += 1
- videos = self._download_json(
- 'https://graphql.api.dailymotion.com',
- playlist_id, 'Downloading page %d' % page,
- data=json.dumps({
- 'query': '''{
- collection(xid: "%s") {
- videos(first: %d, page: %d) {
- pageInfo {
- hasNextPage
- nextPage
- }
+ videos = self._call_api(
+ self._OBJECT_TYPE, playlist_id,
+ '''videos(allowExplicit: %s, first: %d, page: %d) {
edges {
node {
xid
url
}
}
- }
- }
-}''' % (playlist_id, self._PAGE_SIZE, page)
- }).encode(), headers={
- 'Authorization': authorizaion,
- 'Origin': 'https://www.dailymotion.com',
- })['data']['collection']['videos']
+ }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
+ 'Downloading page %d' % page)['videos']
for edge in videos['edges']:
node = edge['node']
yield self.url_result(
@@ -390,86 +345,49 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- api = self._parse_json(self._search_regex(
- r'__PLAYER_CONFIG__\s*=\s*({.+?});',
- webpage, 'player config'), playlist_id)['context']['api']
- auth = self._download_json(
- api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
- playlist_id, data=urlencode_postdata({
- 'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
- 'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
- 'grant_type': 'client_credentials',
- }))
- authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
entries = OnDemandPagedList(functools.partial(
- self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
+ self._fetch_page, playlist_id), self._PAGE_SIZE)
return self.playlist_result(
- entries, playlist_id,
- self._og_search_title(webpage))
+ entries, playlist_id)
-class DailymotionUserIE(DailymotionBaseInfoExtractor):
+class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:playlist'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+ 'info_dict': {
+ 'id': 'xv4bw',
+ },
+ 'playlist_mincount': 20,
+ }]
+ _OBJECT_TYPE = 'collection'
+
+
+class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
- _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
- _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
_TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': {
'id': 'nqtv',
- 'title': 'Rémi Gaillard',
},
- 'playlist_mincount': 100,
+ 'playlist_mincount': 152,
}, {
'url': 'http://www.dailymotion.com/user/UnderProject',
'info_dict': {
'id': 'UnderProject',
- 'title': 'UnderProject',
},
- 'playlist_mincount': 1800,
- 'expected_warnings': [
- 'Stopped at duplicated page',
- ],
+ 'playlist_mincount': 1000,
'skip': 'Takes too long time',
+ }, {
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 148,
+ 'params': {
+ 'age_limit': 0,
+ },
}]
-
- def _extract_entries(self, id):
- video_ids = set()
- processed_urls = set()
- for pagenum in itertools.count(1):
- page_url = self._PAGE_TEMPLATE % (id, pagenum)
- webpage, urlh = self._download_webpage_handle_no_ff(
- page_url, id, 'Downloading page %s' % pagenum)
- if urlh.geturl() in processed_urls:
- self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
- page_url, urlh.geturl()), id)
- break
-
- processed_urls.add(urlh.geturl())
-
- for video_id in re.findall(r'data-xid="(.+?)"', webpage):
- if video_id not in video_ids:
- yield self.url_result(
- 'http://www.dailymotion.com/video/%s' % video_id,
- DailymotionIE.ie_key(), video_id)
- video_ids.add(video_id)
-
- if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
- break
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user = mobj.group('user')
- webpage = self._download_webpage(
- 'https://www.dailymotion.com/user/%s' % user, user)
- full_user = unescapeHTML(self._html_search_regex(
- r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
- webpage, 'user'))
-
- return {
- '_type': 'playlist',
- 'id': user,
- 'title': full_user,
- 'entries': self._extract_entries(user),
- }
+ _OBJECT_TYPE = 'channel'
diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py
deleted file mode 100644
index dbc1aa5d4..000000000
--- a/youtube_dl/extractor/daisuki.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from __future__ import unicode_literals
-
-import base64
-import json
-import random
-import re
-
-from .common import InfoExtractor
-from ..aes import (
- aes_cbc_decrypt,
- aes_cbc_encrypt,
-)
-from ..compat import compat_b64decode
-from ..utils import (
- bytes_to_intlist,
- bytes_to_long,
- extract_attributes,
- ExtractorError,
- intlist_to_bytes,
- js_to_json,
- int_or_none,
- long_to_bytes,
- pkcs1pad,
-)
-
-
-class DaisukiMottoIE(InfoExtractor):
- _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
-
- _TEST = {
- 'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
- 'info_dict': {
- 'id': 'V2e',
- 'ext': 'mp4',
- 'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
- 'subtitles': {
- 'mul': [{
- 'ext': 'ttml',
- }],
- },
- },
- 'params': {
- 'skip_download': True, # AES-encrypted HLS stream
- },
- }
-
- # The public key in PEM format can be found in clientlibs_anime_watch.min.js
- _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- flashvars = self._parse_json(self._search_regex(
- r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
- video_id, transform_source=js_to_json)
-
- iv = [0] * 16
-
- data = {}
- for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
- data[key] = flashvars.get(key, '')
-
- encrypted_rtn = None
-
- # Some AES keys are rejected. Try it with different AES keys
- for idx in range(5):
- aes_key = [random.randint(0, 254) for _ in range(32)]
- padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
-
- n, e = self._RSA_KEY
- encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
- init_data = self._download_json(
- 'http://motto.daisuki.net/fastAPI/bgn/init/',
- video_id, query={
- 's': flashvars.get('s', ''),
- 'c': flashvars.get('ss3_prm', ''),
- 'e': url,
- 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
- bytes_to_intlist(json.dumps(data)),
- aes_key, iv))).decode('ascii'),
- 'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
- }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
-
- if 'rtn' in init_data:
- encrypted_rtn = init_data['rtn']
- break
-
- self._sleep(5, video_id)
-
- if encrypted_rtn is None:
- raise ExtractorError('Failed to fetch init data')
-
- rtn = self._parse_json(
- intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
- compat_b64decode(encrypted_rtn)),
- aes_key, iv)).decode('utf-8').rstrip('\0'),
- video_id)
-
- title = rtn['title_str']
-
- formats = self._extract_m3u8_formats(
- rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
-
- subtitles = {}
- caption_url = rtn.get('caption_url')
- if caption_url:
- # mul: multiple languages
- subtitles['mul'] = [{
- 'url': caption_url,
- 'ext': 'ttml',
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DaisukiMottoPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
-
- _TEST = {
- 'url': 'http://motto.daisuki.net/information/',
- 'info_dict': {
- 'title': 'DRAGON BALL SUPER',
- },
- 'playlist_mincount': 117,
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = []
- for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
- attr = extract_attributes(li)
- ad_id = attr.get('data-ad_id')
- product_id = attr.get('data-product_id')
- if ad_id and product_id:
- episode_id = attr.get('data-chapter')
- entries.append({
- '_type': 'url_transparent',
- 'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
- 'episode_id': episode_id,
- 'episode_number': int_or_none(episode_id),
- 'ie_key': 'DaisukiMotto',
- })
-
- return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index 76f021892..137095577 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -2,25 +2,21 @@
from __future__ import unicode_literals
-import re
import itertools
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
compat_urlparse,
)
-from ..utils import (
- int_or_none,
- str_to_int,
- xpath_text,
- unescapeHTML,
-)
-class DaumIE(InfoExtractor):
+class DaumBaseIE(InfoExtractor):
+ _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
+
+
+class DaumIE(DaumBaseIE):
_VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
IE_NAME = 'daum.net'
@@ -36,6 +32,9 @@ class DaumIE(InfoExtractor):
'duration': 2117,
'view_count': int,
'comment_count': int,
+ 'uploader_id': 186139,
+ 'uploader': '콘간지',
+ 'timestamp': 1387310323,
},
}, {
'url': 'http://m.tvpot.daum.net/v/65139429',
@@ -44,11 +43,14 @@ class DaumIE(InfoExtractor):
'ext': 'mp4',
'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
'description': 'md5:79794514261164ff27e36a21ad229fc5',
- 'upload_date': '20150604',
+ 'upload_date': '20150118',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 154,
'view_count': int,
'comment_count': int,
+ 'uploader': 'MBC 예능',
+ 'uploader_id': 132251,
+ 'timestamp': 1421604228,
},
}, {
'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
@@ -59,12 +61,15 @@ class DaumIE(InfoExtractor):
'id': 'vwIpVpCQsT8$',
'ext': 'flv',
'title': '01-Korean War ( Trouble on the horizon )',
- 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
+ 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
'upload_date': '20080223',
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 249,
'view_count': int,
'comment_count': int,
+ 'uploader': '까칠한 墮落始祖 황비홍님의',
+ 'uploader_id': 560824,
+ 'timestamp': 1203770745,
},
}, {
# Requires dte_type=WEB (#9972)
@@ -73,60 +78,24 @@ class DaumIE(InfoExtractor):
'info_dict': {
'id': 's3794Uf1NZeZ1qMpGpeqeRU',
'ext': 'mp4',
- 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
- 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
- 'upload_date': '20160611',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'upload_date': '20170129',
+ 'uploader': '쇼! 음악중심',
+ 'uploader_id': 2653210,
+ 'timestamp': 1485684628,
},
}]
def _real_extract(self, url):
video_id = compat_urllib_parse_unquote(self._match_id(url))
- movie_data = self._download_json(
- 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
- video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
-
- # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
- if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
- return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
-
- info = self._download_xml(
- 'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
- 'Downloading video info', query={'vid': video_id})
-
- formats = []
- for format_el in movie_data['output_list']['output_list']:
- profile = format_el['profile']
- format_query = compat_urllib_parse_urlencode({
- 'vid': video_id,
- 'profile': profile,
- })
- url_doc = self._download_xml(
- 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
- video_id, note='Downloading video data for %s format' % profile)
- format_url = url_doc.find('result/url').text
- formats.append({
- 'url': format_url,
- 'format_id': profile,
- 'width': int_or_none(format_el.get('width')),
- 'height': int_or_none(format_el.get('height')),
- 'filesize': int_or_none(format_el.get('filesize')),
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': info.find('TITLE').text,
- 'formats': formats,
- 'thumbnail': xpath_text(info, 'THUMB_URL'),
- 'description': xpath_text(info, 'CONTENTS'),
- 'duration': int_or_none(xpath_text(info, 'DURATION')),
- 'upload_date': info.find('REGDTTM').text[:8],
- 'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
- 'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
- }
+ if not video_id.isdigit():
+ video_id += '@my'
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
-class DaumClipIE(InfoExtractor):
+class DaumClipIE(DaumBaseIE):
_VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
IE_NAME = 'daum.net:clip'
_URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
@@ -142,6 +111,9 @@ class DaumClipIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
'duration': 3868,
'view_count': int,
+ 'uploader': 'GOMeXP',
+ 'uploader_id': 6667,
+ 'timestamp': 1377911092,
},
}, {
'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
@@ -154,22 +126,8 @@ class DaumClipIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- clip_info = self._download_json(
- 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id,
- video_id, 'Downloading clip info')['clip_bean']
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
- 'title': unescapeHTML(clip_info['title']),
- 'thumbnail': clip_info.get('thumb_url'),
- 'description': clip_info.get('contents'),
- 'duration': int_or_none(clip_info.get('duration')),
- 'upload_date': clip_info.get('up_date')[:8],
- 'view_count': int_or_none(clip_info.get('play_count')),
- 'ie_key': 'Daum',
- }
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
class DaumListIE(InfoExtractor):
diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dl/extractor/dbtv.py
index f232f0dc5..aaedf2e3d 100644
--- a/youtube_dl/extractor/dbtv.py
+++ b/youtube_dl/extractor/dbtv.py
@@ -7,50 +7,51 @@ from .common import InfoExtractor
class DBTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dbtv\.no/(?:[^/]+/)?(?P<id>[0-9]+)(?:#(?P<display_id>.+))?'
+ _VALID_URL = r'https?://(?:www\.)?dagbladet\.no/video/(?:(?:embed|(?P<display_id>[^/]+))/)?(?P<id>[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8})'
_TESTS = [{
- 'url': 'http://dbtv.no/3649835190001#Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
- 'md5': '2e24f67936517b143a234b4cadf792ec',
+ 'url': 'https://www.dagbladet.no/video/PynxJnNWChE/',
+ 'md5': 'b8f850ba1860adbda668d367f9b77699',
'info_dict': {
- 'id': '3649835190001',
- 'display_id': 'Skulle_teste_ut_fornøyelsespark,_men_kollegaen_var_bare_opptatt_av_bikinikroppen',
+ 'id': 'PynxJnNWChE',
'ext': 'mp4',
'title': 'Skulle teste ut fornøyelsespark, men kollegaen var bare opptatt av bikinikroppen',
- 'description': 'md5:1504a54606c4dde3e4e61fc97aa857e0',
+ 'description': 'md5:49cc8370e7d66e8a2ef15c3b4631fd3f',
'thumbnail': r're:https?://.*\.jpg',
- 'timestamp': 1404039863,
- 'upload_date': '20140629',
- 'duration': 69.544,
- 'uploader_id': '1027729757001',
+ 'upload_date': '20160916',
+ 'duration': 69,
+ 'uploader_id': 'UCk5pvsyZJoYJBd7_oFPTlRQ',
+ 'uploader': 'Dagbladet',
},
- 'add_ie': ['BrightcoveNew']
+ 'add_ie': ['Youtube']
}, {
- 'url': 'http://dbtv.no/3649835190001',
+ 'url': 'https://www.dagbladet.no/video/embed/xlGmyIeN9Jo/?autoplay=false',
'only_matching': True,
}, {
- 'url': 'http://www.dbtv.no/lazyplayer/4631135248001',
- 'only_matching': True,
- }, {
- 'url': 'http://dbtv.no/vice/5000634109001',
- 'only_matching': True,
- }, {
- 'url': 'http://dbtv.no/filmtrailer/3359293614001',
+ 'url': 'https://www.dagbladet.no/video/truer-iran-bor-passe-dere/PalfB2Cw',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dbtv\.no/(?:lazy)?player/\d+.*?)\1',
+ r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
webpage)]
def _real_extract(self, url):
- video_id, display_id = re.match(self._VALID_URL, url).groups()
-
- return {
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+ info = {
'_type': 'url_transparent',
- 'url': 'http://players.brightcove.net/1027729757001/default_default/index.html?videoId=%s' % video_id,
'id': video_id,
'display_id': display_id,
- 'ie_key': 'BrightcoveNew',
}
+ if len(video_id) == 11:
+ info.update({
+ 'url': video_id,
+ 'ie_key': 'Youtube',
+ })
+ else:
+ info.update({
+ 'url': 'jwplatform:' + video_id,
+ 'ie_key': 'JWPlatform',
+ })
+ return info
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index 769a219df..e700f8d86 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -16,10 +16,11 @@ class DctpTvIE(InfoExtractor):
_TESTS = [{
# 4x3
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+ 'md5': '3ffbd1556c3fe210724d7088fad723e3',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
- 'ext': 'flv',
+ 'ext': 'm4v',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'thumbnail': r're:^https?://.*\.jpg$',
@@ -27,10 +28,6 @@ class DctpTvIE(InfoExtractor):
'timestamp': 1302172322,
'upload_date': '20110407',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
}, {
# 16x9
'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
@@ -59,33 +56,26 @@ class DctpTvIE(InfoExtractor):
uuid = media['uuid']
title = media['title']
- ratio = '16x9' if media.get('is_wide') else '4x3'
- play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
-
- servers = self._download_json(
- 'http://www.dctp.tv/streaming_servers/', display_id,
- note='Downloading server list JSON', fatal=False)
-
- if servers:
- endpoint = next(
- server['endpoint']
- for server in servers
- if url_or_none(server.get('endpoint')) and
- 'cloudfront' in server['endpoint'])
- else:
- endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
-
- app = self._search_regex(
- r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
-
- formats = [{
- 'url': endpoint,
- 'app': app,
- 'play_path': play_path,
- 'page_url': url,
- 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
- 'ext': 'flv',
- }]
+ is_wide = media.get('is_wide')
+ formats = []
+
+ def add_formats(suffix):
+ templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
+ formats.extend([{
+ 'format_id': 'hls-' + suffix,
+ 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
+ 'protocol': 'm3u8_native',
+ }, {
+ 'format_id': 's3-' + suffix,
+ 'url': templ % 'completed-media.s3.amazonaws.com',
+ }, {
+ 'format_id': 'http-' + suffix,
+ 'url': templ % 'cdn-media.dctp.tv',
+ }])
+
+ add_formats('0500_' + ('16x9' if is_wide else '4x3'))
+ if is_wide:
+ add_formats('720p')
thumbnails = []
images = media.get('images')
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
index 3589bd428..e0139cc86 100644
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@@ -5,37 +5,43 @@ import re
import string
from .discoverygo import DiscoveryGoBaseIE
-from ..compat import (
- compat_str,
- compat_urllib_parse_unquote,
-)
-from ..utils import (
- ExtractorError,
- try_get,
-)
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>
- discovery|
- investigationdiscovery|
- discoverylife|
- animalplanet|
- ahctv|
- destinationamerica|
- sciencechannel|
- tlc|
- velocity
- )\.com(?P<path>/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+))'''
+ _VALID_URL = r'''(?x)https?://
+ (?P<site>
+ go\.discovery|
+ www\.
+ (?:
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc
+ )|
+ watch\.
+ (?:
+ hgtv|
+ foodnetwork|
+ travelchannel|
+ diynetwork|
+ cookingchanneltv|
+ motortrend
+ )
+ )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
_TESTS = [{
- 'url': 'https://www.discovery.com/tv-shows/cash-cab/videos/dave-foley',
+ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
'info_dict': {
- 'id': '5a2d9b4d6b66d17a5026e1fd',
+ 'id': '5a2f35ce6b66d17a5026e29e',
'ext': 'mp4',
- 'title': 'Dave Foley',
- 'description': 'md5:4b39bcafccf9167ca42810eb5f28b01f',
- 'duration': 608,
+ 'title': 'Riding with Matthew Perry',
+ 'description': 'md5:a34333153e79bc4526019a5129e7f878',
+ 'duration': 84,
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -43,20 +49,20 @@ class DiscoveryIE(DiscoveryGoBaseIE):
}, {
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
'only_matching': True,
+ }, {
+ 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
+ 'only_matching': True,
+ }, {
+ # using `show_slug` is important to get the correct video data
+ 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
+ 'only_matching': True,
}]
_GEO_COUNTRIES = ['US']
_GEO_BYPASS = False
+ _API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url):
- site, path, display_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, display_id)
-
- react_data = self._parse_json(self._search_regex(
- r'window\.__reactTransmitPacket\s*=\s*({.+?});',
- webpage, 'react data'), display_id)
- content_blocks = react_data['layout'][path]['contentBlocks']
- video = next(cb for cb in content_blocks if cb.get('type') == 'video')['content']['items'][0]
- video_id = video['id']
+ site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
access_token = None
cookies = self._get_cookies(url)
@@ -66,26 +72,36 @@ class DiscoveryIE(DiscoveryGoBaseIE):
if auth_storage_cookie and auth_storage_cookie.value:
auth_storage = self._parse_json(compat_urllib_parse_unquote(
compat_urllib_parse_unquote(auth_storage_cookie.value)),
- video_id, fatal=False) or {}
+ display_id, fatal=False) or {}
access_token = auth_storage.get('a') or auth_storage.get('access_token')
if not access_token:
access_token = self._download_json(
- 'https://www.%s.com/anonymous' % site, display_id, query={
+ 'https://%s.com/anonymous' % site, display_id,
+ 'Downloading token JSON metadata', query={
'authRel': 'authorization',
- 'client_id': try_get(
- react_data, lambda x: x['application']['apiClientId'],
- compat_str) or '3020a40c2356a645b4b4',
+ 'client_id': '3020a40c2356a645b4b4',
'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
- 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
+ 'redirectUri': 'https://www.discovery.com/',
})['access_token']
+ headers = self.geo_verification_headers()
+ headers['Authorization'] = 'Bearer ' + access_token
+
try:
+ video = self._download_json(
+ self._API_BASE_URL + 'content/videos',
+ display_id, 'Downloading content JSON metadata',
+ headers=headers, query={
+ 'embed': 'show.name',
+ 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
+ 'slug': display_id,
+ 'show_slug': show_slug,
+ })[0]
+ video_id = video['id']
stream = self._download_json(
- 'https://api.discovery.com/v1/streaming/video/' + video_id,
- display_id, headers={
- 'Authorization': 'Bearer ' + access_token,
- })
+ self._API_BASE_URL + 'streaming/video/' + video_id,
+ display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
e_description = self._parse_json(
diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py
index fba1ef221..607a54948 100644
--- a/youtube_dl/extractor/discoverynetworks.py
+++ b/youtube_dl/extractor/discoverynetworks.py
@@ -3,63 +3,38 @@ from __future__ import unicode_literals
import re
-from .brightcove import BrightcoveLegacyIE
from .dplay import DPlayIE
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
-)
-from ..utils import smuggle_url
class DiscoveryNetworksDeIE(DPlayIE):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
- (?:
- .*\#(?P<id>\d+)|
- (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
- programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
- )'''
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
_TESTS = [{
- 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
+ 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
'info_dict': {
- 'id': '3235167922001',
+ 'id': '78867',
'ext': 'mp4',
- 'title': 'Breaking Amish: Die Welt da draußen',
- 'description': (
- 'Vier Amische und eine Mennonitin wagen in New York'
- ' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
- ' ihrem spannenden Weg.'),
- 'timestamp': 1396598084,
- 'upload_date': '20140404',
- 'uploader_id': '1659832546',
+ 'title': 'Die Welt da draußen',
+ 'description': 'md5:61033c12b73286e409d99a41742ef608',
+ 'timestamp': 1554069600,
+ 'upload_date': '20190331',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
},
}, {
- 'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
+ 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
'only_matching': True,
}, {
- 'url': 'http://www.discovery.de/#5332316765001',
+ 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
'only_matching': True,
}]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- alternate_id = mobj.group('alternate_id')
- if alternate_id:
- self._initialize_geo_bypass({
- 'countries': ['DE'],
- })
- return self._get_disco_api_info(
- url, '%s/%s' % (mobj.group('programme'), alternate_id),
- 'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
- brightcove_id = mobj.group('id')
- if not brightcove_id:
- title = mobj.group('title')
- webpage = self._download_webpage(url, title)
- brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
- brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
- brightcove_legacy_url).query)['@videoPlayer'][0]
- return self.url_result(smuggle_url(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
- 'BrightcoveNew', brightcove_id)
+ domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
+ country = 'GB' if domain == 'dplay.co.uk' else 'DE'
+ realm = 'questuk' if country == 'GB' else domain.replace('.', '')
+ return self._get_disco_api_info(
+ url, '%s/%s' % (programme, alternate_id),
+ 'sonic-eu1-prod.disco-api.com', realm, country)
diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py
index c05f601e2..c345e0274 100644
--- a/youtube_dl/extractor/dispeak.py
+++ b/youtube_dl/extractor/dispeak.py
@@ -58,10 +58,17 @@ class DigitallySpeakingIE(InfoExtractor):
stream_name = xpath_text(a_format, 'streamName', fatal=True)
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
url = video_root + video_path
- vbr = xpath_text(a_format, 'bitrate')
+ bitrate = xpath_text(a_format, 'bitrate')
+ tbr = int_or_none(bitrate)
+ vbr = int_or_none(self._search_regex(
+ r'-(\d+)\.mp4', video_path, 'vbr', default=None))
+ abr = tbr - vbr if tbr and vbr else None
video_formats.append({
+ 'format_id': bitrate,
'url': url,
- 'vbr': int_or_none(vbr),
+ 'tbr': tbr,
+ 'vbr': vbr,
+ 'abr': abr,
})
return video_formats
diff --git a/youtube_dl/extractor/dlive.py b/youtube_dl/extractor/dlive.py
new file mode 100644
index 000000000..d95c67a5b
--- /dev/null
+++ b/youtube_dl/extractor/dlive.py
@@ -0,0 +1,97 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class DLiveVODIE(InfoExtractor):
+ IE_NAME = 'dlive:vod'
+ _VALID_URL = r'https?://(?:www\.)?dlive\.tv/p/(?P<uploader_id>.+?)\+(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://dlive.tv/p/pdp+3mTzOl4WR',
+ 'info_dict': {
+ 'id': '3mTzOl4WR',
+ 'ext': 'mp4',
+ 'title': 'Minecraft with james charles epic',
+ 'upload_date': '20190701',
+ 'timestamp': 1562011015,
+ 'uploader_id': 'pdp',
+ }
+ }, {
+ 'url': 'https://dlive.tv/p/pdpreplay+D-RD-xSZg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ uploader_id, vod_id = re.match(self._VALID_URL, url).groups()
+ broadcast = self._download_json(
+ 'https://graphigo.prd.dlive.tv/', vod_id,
+ data=json.dumps({'query': '''query {
+ pastBroadcast(permlink:"%s+%s") {
+ content
+ createdAt
+ length
+ playbackUrl
+ title
+ thumbnailUrl
+ viewCount
+ }
+}''' % (uploader_id, vod_id)}).encode())['data']['pastBroadcast']
+ title = broadcast['title']
+ formats = self._extract_m3u8_formats(
+ broadcast['playbackUrl'], vod_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+ return {
+ 'id': vod_id,
+ 'title': title,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'description': broadcast.get('content'),
+ 'thumbnail': broadcast.get('thumbnailUrl'),
+ 'timestamp': int_or_none(broadcast.get('createdAt'), 1000),
+ 'view_count': int_or_none(broadcast.get('viewCount')),
+ }
+
+
+class DLiveStreamIE(InfoExtractor):
+ IE_NAME = 'dlive:stream'
+ _VALID_URL = r'https?://(?:www\.)?dlive\.tv/(?!p/)(?P<id>[\w.-]+)'
+
+ def _real_extract(self, url):
+ display_name = self._match_id(url)
+ user = self._download_json(
+ 'https://graphigo.prd.dlive.tv/', display_name,
+ data=json.dumps({'query': '''query {
+ userByDisplayName(displayname:"%s") {
+ livestream {
+ content
+ createdAt
+ title
+ thumbnailUrl
+ watchingCount
+ }
+ username
+ }
+}''' % display_name}).encode())['data']['userByDisplayName']
+ livestream = user['livestream']
+ title = livestream['title']
+ username = user['username']
+ formats = self._extract_m3u8_formats(
+ 'https://live.prd.dlive.tv/hls/live/%s.m3u8' % username,
+ display_name, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': display_name,
+ 'title': self._live_title(title),
+ 'uploader': display_name,
+ 'uploader_id': username,
+ 'formats': formats,
+ 'description': livestream.get('content'),
+ 'thumbnail': livestream.get('thumbnailUrl'),
+ 'is_live': True,
+ 'timestamp': int_or_none(livestream.get('createdAt'), 1000),
+ 'view_count': int_or_none(livestream.get('watchingCount')),
+ }
diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py
index ebf59512c..a7b9db568 100644
--- a/youtube_dl/extractor/dplay.py
+++ b/youtube_dl/extractor/dplay.py
@@ -1,74 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
-import time
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
- remove_end,
- try_get,
- unified_strdate,
unified_timestamp,
- update_url_query,
- urljoin,
- USER_AGENTS,
)
class DPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
+ _VALID_URL = r'''(?x)https?://
+ (?P<domain>
+ (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
+ (?P<subdomain_country>es|it)\.dplay\.com
+ )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
_TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL
- 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
+ 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
'info_dict': {
- 'id': '3172',
- 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
+ 'id': '13628',
+ 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
'ext': 'mp4',
'title': 'Svensken lär sig njuta av livet',
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
- 'duration': 2650,
- 'timestamp': 1365454320,
+ 'duration': 2649.856,
+ 'timestamp': 1365453720,
'upload_date': '20130408',
- 'creator': 'Kanal 5 (Home)',
+ 'creator': 'Kanal 5',
'series': 'Nugammalt - 77 händelser som format Sverige',
'season_number': 1,
'episode_number': 1,
- 'age_limit': 0,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
},
}, {
# geo restricted, via secure api, unsigned download hls URL
- 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
+ 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
'info_dict': {
- 'id': '70816',
- 'display_id': 'mig-og-min-mor/season-6-episode-12',
+ 'id': '104465',
+ 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
'ext': 'mp4',
- 'title': 'Episode 12',
- 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
- 'duration': 2563,
- 'timestamp': 1429696800,
- 'upload_date': '20150422',
- 'creator': 'Kanal 4 (Home)',
- 'series': 'Mig og min mor',
- 'season_number': 6,
- 'episode_number': 12,
- 'age_limit': 0,
+ 'title': 'Ted Bundy: Mind Of A Monster',
+ 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
+ 'duration': 5290.027,
+ 'timestamp': 1570694400,
+ 'upload_date': '20191010',
+ 'creator': 'ID - Investigation Discovery',
+ 'series': 'Ted Bundy: Mind Of A Monster',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
},
- }, {
- # geo restricted, via direct unsigned hls URL
- 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
- 'only_matching': True,
}, {
# disco-api
'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
@@ -89,19 +83,59 @@ class DPlayIE(InfoExtractor):
'format': 'bestvideo',
'skip_download': True,
},
+ 'skip': 'Available for Premium users',
}, {
-
- 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
+ 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
+ 'md5': '2b808ffb00fc47b884a172ca5d13053c',
+ 'info_dict': {
+ 'id': '6918',
+ 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+ 'ext': 'mp4',
+ 'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
+ 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'upload_date': '20160524',
+ 'timestamp': 1464076800,
+ 'series': 'Biografie imbarazzanti',
+ 'season_number': 1,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
+ 'info_dict': {
+ 'id': '21652',
+ 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
+ 'ext': 'mp4',
+ 'title': 'Episodio 1',
+ 'description': 'md5:b9dcff2071086e003737485210675f69',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'upload_date': '20180709',
+ 'timestamp': 1531173540,
+ 'series': 'La fiebre del oro',
+ 'season_number': 8,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
'only_matching': True,
}, {
- 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
+ 'url': 'https://www.dplay.jp/video/gold-rush/24086',
'only_matching': True,
}]
- def _get_disco_api_info(self, url, display_id, disco_host, realm):
- disco_base = 'https://' + disco_host
+ def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
+ geo_countries = [country.upper()]
+ self._initialize_geo_bypass({
+ 'countries': geo_countries,
+ })
+ disco_base = 'https://%s/' % disco_host
token = self._download_json(
- '%s/token' % disco_base, display_id, 'Downloading token',
+ disco_base + 'token', display_id, 'Downloading token',
query={
'realm': realm,
})['data']['attributes']['token']
@@ -110,17 +144,35 @@ class DPlayIE(InfoExtractor):
'Authorization': 'Bearer ' + token,
}
video = self._download_json(
- '%s/content/videos/%s' % (disco_base, display_id), display_id,
+ disco_base + 'content/videos/' + display_id, display_id,
headers=headers, query={
- 'include': 'show'
+ 'fields[channel]': 'name',
+ 'fields[image]': 'height,src,width',
+ 'fields[show]': 'name',
+ 'fields[tag]': 'name',
+ 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+ 'include': 'images,primaryChannel,show,tags'
})
video_id = video['data']['id']
info = video['data']['attributes']
- title = info['name']
+ title = info['name'].strip()
formats = []
- for format_id, format_dict in self._download_json(
- '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
- display_id, headers=headers)['data']['attributes']['streaming'].items():
+ try:
+ streaming = self._download_json(
+ disco_base + 'playback/videoPlaybackInfo/' + video_id,
+ display_id, headers=headers)['data']['attributes']['streaming']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+ error = info['errors'][0]
+ error_code = error.get('code')
+ if error_code == 'access.denied.geoblocked':
+ self.raise_geo_restricted(countries=geo_countries)
+ elif error_code == 'access.denied.missingpackage':
+ self.raise_login_required()
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+ raise
+ for format_id, format_dict in streaming.items():
if not isinstance(format_dict, dict):
continue
format_url = format_dict.get('url')
@@ -142,235 +194,54 @@ class DPlayIE(InfoExtractor):
})
self._sort_formats(formats)
- series = None
- try:
- included = video.get('included')
- if isinstance(included, list):
- show = next(e for e in included if e.get('type') == 'show')
- series = try_get(
- show, lambda x: x['attributes']['name'], compat_str)
- except StopIteration:
- pass
+ creator = series = None
+ tags = []
+ thumbnails = []
+ included = video.get('included') or []
+ if isinstance(included, list):
+ for e in included:
+ attributes = e.get('attributes')
+ if not attributes:
+ continue
+ e_type = e.get('type')
+ if e_type == 'channel':
+ creator = attributes.get('name')
+ elif e_type == 'image':
+ src = attributes.get('src')
+ if src:
+ thumbnails.append({
+ 'url': src,
+ 'width': int_or_none(attributes.get('width')),
+ 'height': int_or_none(attributes.get('height')),
+ })
+ if e_type == 'show':
+ series = attributes.get('name')
+ elif e_type == 'tag':
+ name = attributes.get('name')
+ if name:
+ tags.append(name)
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': info.get('description'),
- 'duration': float_or_none(
- info.get('videoDuration'), scale=1000),
+ 'duration': float_or_none(info.get('videoDuration'), 1000),
'timestamp': unified_timestamp(info.get('publishStart')),
'series': series,
'season_number': int_or_none(info.get('seasonNumber')),
'episode_number': int_or_none(info.get('episodeNumber')),
- 'age_limit': int_or_none(info.get('minimum_age')),
+ 'creator': creator,
+ 'tags': tags,
+ 'thumbnails': thumbnails,
'formats': formats,
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
- domain = mobj.group('domain')
-
- self._initialize_geo_bypass({
- 'countries': [mobj.group('country').upper()],
- })
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._search_regex(
- r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
-
- if not video_id:
- host = mobj.group('host')
- return self._get_disco_api_info(
- url, display_id, 'disco-api.' + host, host.replace('.', ''))
-
- info = self._download_json(
- 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
- video_id)['data'][0]
-
- title = info['title']
-
- PROTOCOLS = ('hls', 'hds')
- formats = []
-
- def extract_formats(protocol, manifest_url):
- if protocol == 'hls':
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, ext='mp4',
- entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
- # Sometimes final URLs inside m3u8 are unsigned, let's fix this
- # ourselves. Also fragments' URLs are only served signed for
- # Safari user agent.
- query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
- for m3u8_format in m3u8_formats:
- m3u8_format.update({
- 'url': update_url_query(m3u8_format['url'], query),
- 'http_headers': {
- 'User-Agent': USER_AGENTS['Safari'],
- },
- })
- formats.extend(m3u8_formats)
- elif protocol == 'hds':
- formats.extend(self._extract_f4m_formats(
- manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
- video_id, f4m_id=protocol, fatal=False))
-
- domain_tld = domain.split('.')[-1]
- if domain_tld in ('se', 'dk', 'no'):
- for protocol in PROTOCOLS:
- # Providing dsc-geo allows to bypass geo restriction in some cases
- self._set_cookie(
- 'secure.dplay.%s' % domain_tld, 'dsc-geo',
- json.dumps({
- 'countryCode': domain_tld.upper(),
- 'expiry': (time.time() + 20 * 60) * 1000,
- }))
- stream = self._download_json(
- 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
- % (domain_tld, video_id, protocol), video_id,
- 'Downloading %s stream JSON' % protocol, fatal=False)
- if stream and stream.get(protocol):
- extract_formats(protocol, stream[protocol])
-
- # The last resort is to try direct unsigned hls/hds URLs from info dictionary.
- # Sometimes this does work even when secure API with dsc-geo has failed (e.g.
- # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
- if not formats:
- for protocol in PROTOCOLS:
- if info.get(protocol):
- extract_formats(protocol, info[protocol])
-
- self._sort_formats(formats)
-
- subtitles = {}
- for lang in ('se', 'sv', 'da', 'nl', 'no'):
- for format_id in ('web_vtt', 'vtt', 'srt'):
- subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
- if subtitle_url:
- subtitles.setdefault(lang, []).append({'url': subtitle_url})
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': info.get('video_metadata_longDescription'),
- 'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
- 'timestamp': int_or_none(info.get('video_publish_date')),
- 'creator': info.get('video_metadata_homeChannel'),
- 'series': info.get('video_metadata_show'),
- 'season_number': int_or_none(info.get('season')),
- 'episode_number': int_or_none(info.get('episode')),
- 'age_limit': int_or_none(info.get('minimum_age')),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DPlayItIE(InfoExtractor):
- _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
- _GEO_COUNTRIES = ['IT']
- _TEST = {
- 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
- 'md5': '2b808ffb00fc47b884a172ca5d13053c',
- 'info_dict': {
- 'id': '6918',
- 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
- 'ext': 'mp4',
- 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
- 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- 'upload_date': '20160524',
- 'series': 'Biografie imbarazzanti',
- 'season_number': 1,
- 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
- 'episode_number': 1,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- title = remove_end(self._og_search_title(webpage), ' | Dplay')
-
- video_id = None
-
- info = self._search_regex(
- r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
- webpage, 'playback JSON', default=None)
- if info:
- for _ in range(2):
- info = self._parse_json(info, display_id, fatal=False)
- if not info:
- break
- else:
- video_id = try_get(info, lambda x: x['data']['id'])
-
- if not info:
- info_url = self._search_regex(
- (r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
- webpage, 'info url', group='url')
-
- info_url = urljoin(url, info_url)
- video_id = info_url.rpartition('/')[-1]
-
- try:
- info = self._download_json(
- info_url, display_id, headers={
- 'Authorization': 'Bearer %s' % self._get_cookies(url).get(
- 'dplayit_token').value,
- 'Referer': url,
- })
- if isinstance(info, compat_str):
- info = self._parse_json(info, display_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
- info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
- error = info['errors'][0]
- if error.get('code') == 'access.denied.geoblocked':
- self.raise_geo_restricted(
- msg=error.get('detail'), countries=self._GEO_COUNTRIES)
- raise ExtractorError(info['errors'][0]['detail'], expected=True)
- raise
-
- hls_url = info['data']['attributes']['streaming']['hls']['url']
-
- formats = self._extract_m3u8_formats(
- hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- self._sort_formats(formats)
-
- series = self._html_search_regex(
- r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
- webpage, 'series', fatal=False)
- episode = self._search_regex(
- r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
- webpage, 'episode', fatal=False)
-
- mobj = re.search(
- r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
- webpage)
- if mobj:
- season_number = int(mobj.group('season_number'))
- episode_number = int(mobj.group('episode_number'))
- upload_date = unified_strdate(mobj.group('upload_date'))
- else:
- season_number = episode_number = upload_date = None
-
- return {
- 'id': compat_str(video_id or display_id),
- 'display_id': display_id,
- 'title': title,
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'series': series,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'upload_date': upload_date,
- 'formats': formats,
- }
+ domain = mobj.group('domain').lstrip('www.')
+ country = mobj.group('country') or mobj.group('subdomain_country')
+ host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
+ return self._get_disco_api_info(
+ url, display_id, host, 'dplay' + country, country)
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
deleted file mode 100644
index db1de699f..000000000
--- a/youtube_dl/extractor/dramafever.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urlparse,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- parse_age_limit,
- parse_duration,
- unified_timestamp,
- url_or_none,
-)
-
-
-class DramaFeverBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'dramafever'
-
- _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
-
- _consumer_secret = None
-
- def _get_consumer_secret(self):
- mainjs = self._download_webpage(
- 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
- None, 'Downloading main.js', fatal=False)
- if not mainjs:
- return self._CONSUMER_SECRET
- return self._search_regex(
- r"var\s+cs\s*=\s*'([^']+)'", mainjs,
- 'consumer secret', default=self._CONSUMER_SECRET)
-
- def _real_initialize(self):
- self._consumer_secret = self._get_consumer_secret()
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'username': username,
- 'password': password,
- }
-
- try:
- response = self._download_json(
- 'https://www.dramafever.com/api/users/login', None, 'Logging in',
- data=json.dumps(login_form).encode('utf-8'), headers={
- 'x-consumer-key': self._consumer_secret,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404):
- response = self._parse_json(
- e.cause.read().decode('utf-8'), None)
- else:
- raise
-
- # Successful login
- if response.get('result') or response.get('guid') or response.get('user_guid'):
- return
-
- errors = response.get('errors')
- if errors and isinstance(errors, list):
- error = errors[0]
- message = error.get('message') or error['reason']
- raise ExtractorError('Unable to login: %s' % message, expected=True)
- raise ExtractorError('Unable to log in')
-
-
-class DramaFeverIE(DramaFeverBaseIE):
- IE_NAME = 'dramafever'
- _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
- _TESTS = [{
- 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
- 'info_dict': {
- 'id': '4274.1',
- 'ext': 'wvm',
- 'title': 'Heirs - Episode 1',
- 'description': 'md5:362a24ba18209f6276e032a651c50bc2',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 3783,
- 'timestamp': 1381354993,
- 'upload_date': '20131009',
- 'series': 'Heirs',
- 'season_number': 1,
- 'episode': 'Episode 1',
- 'episode_number': 1,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
- 'info_dict': {
- 'id': '4826.4',
- 'ext': 'flv',
- 'title': 'Mnet Asian Music Awards 2015',
- 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
- 'episode': 'Mnet Asian Music Awards 2015 - Part 3',
- 'episode_number': 4,
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1450213200,
- 'upload_date': '20151215',
- 'duration': 5359,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
- 'only_matching': True,
- }]
-
- def _call_api(self, path, video_id, note, fatal=False):
- return self._download_json(
- 'https://www.dramafever.com/api/5/' + path,
- video_id, note=note, headers={
- 'x-consumer-key': self._consumer_secret,
- }, fatal=fatal)
-
- def _get_subtitles(self, video_id):
- subtitles = {}
- subs = self._call_api(
- 'video/%s/subtitles/webvtt/' % video_id, video_id,
- 'Downloading subtitles JSON', fatal=False)
- if not subs or not isinstance(subs, list):
- return subtitles
- for sub in subs:
- if not isinstance(sub, dict):
- continue
- sub_url = url_or_none(sub.get('url'))
- if not sub_url:
- continue
- subtitles.setdefault(
- sub.get('code') or sub.get('language') or 'en', []).append({
- 'url': sub_url
- })
- return subtitles
-
- def _real_extract(self, url):
- video_id = self._match_id(url).replace('/', '.')
-
- series_id, episode_number = video_id.split('.')
-
- video = self._call_api(
- 'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
- 'Downloading video JSON')
-
- formats = []
- download_assets = video.get('download_assets')
- if download_assets and isinstance(download_assets, dict):
- for format_id, format_dict in download_assets.items():
- if not isinstance(format_dict, dict):
- continue
- format_url = url_or_none(format_dict.get('url'))
- if not format_url:
- continue
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'filesize': int_or_none(video.get('filesize')),
- })
-
- stream = self._call_api(
- 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
- fatal=False)
- if stream:
- stream_url = stream.get('stream_url')
- if stream_url:
- formats.extend(self._extract_m3u8_formats(
- stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
-
- title = video.get('title') or 'Episode %s' % episode_number
- description = video.get('description')
- thumbnail = video.get('thumbnail')
- timestamp = unified_timestamp(video.get('release_date'))
- duration = parse_duration(video.get('duration'))
- age_limit = parse_age_limit(video.get('tv_rating'))
- series = video.get('series_title')
- season_number = int_or_none(video.get('season'))
-
- if series:
- title = '%s - %s' % (series, title)
-
- subtitles = self.extract_subtitles(video_id)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'age_limit': age_limit,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': int_or_none(episode_number),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DramaFeverSeriesIE(DramaFeverBaseIE):
- IE_NAME = 'dramafever:series'
- _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
- _TESTS = [{
- 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
- 'info_dict': {
- 'id': '4512',
- 'title': 'Cooking with Shin',
- 'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
- },
- 'playlist_count': 4,
- }, {
- 'url': 'http://www.dramafever.com/drama/124/IRIS/',
- 'info_dict': {
- 'id': '124',
- 'title': 'IRIS',
- 'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
- },
- 'playlist_count': 20,
- }]
-
- _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
-
- def _real_extract(self, url):
- series_id = self._match_id(url)
-
- series = self._download_json(
- 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
- % (self._consumer_secret, series_id),
- series_id, 'Downloading series JSON')['series'][series_id]
-
- title = clean_html(series['name'])
- description = clean_html(series.get('description') or series.get('description_short'))
-
- entries = []
- for page_num in itertools.count(1):
- episodes = self._download_json(
- 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
- % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
- series_id, 'Downloading episodes JSON page #%d' % page_num)
- for episode in episodes.get('value', []):
- episode_url = episode.get('episode_url')
- if not episode_url:
- continue
- entries.append(self.url_result(
- compat_urlparse.urljoin(url, episode_url),
- 'DramaFever', episode.get('guid')))
- if page_num == episodes['num_pages']:
- break
-
- return self.playlist_result(entries, series_id, title, description)
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 8d31258c1..848d387d1 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -114,7 +114,7 @@ class DreiSatIE(InfoExtractor):
video_url, video_id, fatal=False))
elif ext == 'm3u8':
# the certificates are misconfigured (see
- # https://github.com/rg3/youtube-dl/issues/8665)
+ # https://github.com/ytdl-org/youtube-dl/issues/8665)
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dl/extractor/drtuber.py
index 5c41c8022..2baea585b 100644
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dl/extractor/drtuber.py
@@ -4,7 +4,9 @@ import re
from .common import InfoExtractor
from ..utils import (
+ int_or_none,
NO_DEFAULT,
+ parse_duration,
str_to_int,
)
@@ -65,6 +67,9 @@ class DrTuberIE(InfoExtractor):
})
self._sort_formats(formats)
+ duration = int_or_none(video_data.get('duration')) or parse_duration(
+ video_data.get('duration_format'))
+
title = self._html_search_regex(
(r'<h1[^>]+class=["\']title[^>]+>([^<]+)',
r'<title>([^<]+)\s*@\s+DrTuber',
@@ -103,4 +108,5 @@ class DrTuberIE(InfoExtractor):
'comment_count': comment_count,
'categories': categories,
'age_limit': self._rta_search(webpage),
+ 'duration': duration,
}
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index f757745ba..390e79f8c 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,42 +1,68 @@
# coding: utf-8
from __future__ import unicode_literals
+import binascii
+import hashlib
+import re
+
+
from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import compat_urllib_parse_unquote
from ..utils import (
+ bytes_to_intlist,
ExtractorError,
int_or_none,
+ intlist_to_bytes,
float_or_none,
mimetype2ext,
- parse_iso8601,
- remove_end,
+ str_or_none,
+ try_get,
+ unified_timestamp,
update_url_query,
+ url_or_none,
)
class DRTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
+ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
+ )
+ (?P<id>[\da-z_-]+)
+ '''
_GEO_BYPASS = False
_GEO_COUNTRIES = ['DK']
IE_NAME = 'drtv'
_TESTS = [{
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
- 'md5': '7ae17b4e18eb5d29212f424a7511c184',
+ 'md5': '25e659cccc9a2ed956110a299fdf5983',
'info_dict': {
'id': 'klassen-darlig-taber-10',
'ext': 'mp4',
'title': 'Klassen - Dårlig taber (10)',
'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
- 'timestamp': 1471991907,
- 'upload_date': '20160823',
+ 'timestamp': 1539085800,
+ 'upload_date': '20181009',
'duration': 606.84,
+ 'series': 'Klassen',
+ 'season': 'Klassen I',
+ 'season_number': 1,
+ 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
+ 'episode': 'Episode 10',
+ 'episode_number': 10,
+ 'release_year': 2016,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
'info_dict': {
- 'id': 'christiania-pusher-street-ryddes-drdkrjpo',
+ 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
'ext': 'mp4',
- 'title': 'LIVE Christianias rydning af Pusher Street er i gang',
+ 'title': 'christiania pusher street ryddes drdkrjpo',
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
'timestamp': 1472800279,
'upload_date': '20160902',
@@ -45,22 +71,46 @@ class DRTVIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
'info_dict': {
'id': 'historien-om-danmark-stenalder',
'ext': 'mp4',
- 'title': 'Historien om Danmark: Stenalder (1)',
+ 'title': 'Historien om Danmark: Stenalder',
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
- 'timestamp': 1490401996,
- 'upload_date': '20170325',
- 'duration': 3502.04,
+ 'timestamp': 1546628400,
+ 'upload_date': '20190104',
+ 'duration': 3502.56,
'formats': 'mincount:20',
},
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
+ 'info_dict': {
+ 'id': '00951930010',
+ 'ext': 'mp4',
+ 'title': 'Bonderøven (1:8)',
+ 'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
+ 'timestamp': 1546542000,
+ 'upload_date': '20190103',
+ 'duration': 2576.6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -74,20 +124,45 @@ class DRTVIE(InfoExtractor):
video_id = self._search_regex(
(r'data-(?:material-identifier|episode-slug)="([^"]+)"',
- r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
- webpage, 'video id')
+ r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
+ webpage, 'video id', default=None)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
+ webpage, 'urn', default=None)
+ if video_id:
+ video_id = compat_urllib_parse_unquote(video_id)
- programcard = self._download_json(
- 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
- video_id, 'Downloading video JSON')
- data = programcard['Data'][0]
+ _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
+ query = {'expanded': 'true'}
- title = remove_end(self._og_search_title(
- webpage, default=None), ' | TV | DR') or data['Title']
+ if video_id:
+ programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
+ else:
+ programcard_url = _PROGRAMCARD_BASE
+ page = self._parse_json(
+ self._search_regex(
+ r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
+ 'data'), '1')['cache']['page']
+ page = page[list(page.keys())[0]]
+ item = try_get(
+ page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
+ dict)
+ video_id = item['customId'].split(':')[-1]
+ query['productionnumber'] = video_id
+
+ data = self._download_json(
+ programcard_url, video_id, 'Downloading video JSON', query=query)
+
+ title = str_or_none(data.get('Title')) or re.sub(
+ r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
+ self._og_search_title(webpage))
description = self._og_search_description(
webpage, default=None) or data.get('Description')
- timestamp = parse_iso8601(data.get('CreatedTime'))
+ timestamp = unified_timestamp(
+ data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
thumbnail = None
duration = None
@@ -97,10 +172,34 @@ class DRTVIE(InfoExtractor):
formats = []
subtitles = {}
- for asset in data['Assets']:
+ assets = []
+ primary_asset = data.get('PrimaryAsset')
+ if isinstance(primary_asset, dict):
+ assets.append(primary_asset)
+ secondary_assets = data.get('SecondaryAssets')
+ if isinstance(secondary_assets, list):
+ for secondary_asset in secondary_assets:
+ if isinstance(secondary_asset, dict):
+ assets.append(secondary_asset)
+
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
+
+ def decrypt_uri(e):
+ n = int(e[2:10], 16)
+ a = e[10 + n:]
+ data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
+ key = bytes_to_intlist(hashlib.sha256(
+ ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
+ iv = bytes_to_intlist(hex_to_bytes(a))
+ decrypted = aes_cbc_decrypt(data, key, iv)
+ return intlist_to_bytes(
+ decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
+
+ for asset in assets:
kind = asset.get('Kind')
if kind == 'Image':
- thumbnail = asset.get('Uri')
+ thumbnail = url_or_none(asset.get('Uri'))
elif kind in ('VideoResource', 'AudioResource'):
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
restricted_to_denmark = asset.get('RestrictedToDenmark')
@@ -108,13 +207,27 @@ class DRTVIE(InfoExtractor):
for link in asset.get('Links', []):
uri = link.get('Uri')
if not uri:
+ encrypted_uri = link.get('EncryptedUri')
+ if not encrypted_uri:
+ continue
+ try:
+ uri = decrypt_uri(encrypted_uri)
+ except Exception:
+ self.report_warning(
+ 'Unable to decrypt EncryptedUri', video_id)
+ continue
+ uri = url_or_none(uri)
+ if not uri:
continue
target = link.get('Target')
format_id = target or ''
- preference = None
- if asset_target in ('SpokenSubtitles', 'SignLanguage'):
+ if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
preference = -1
format_id += '-%s' % asset_target
+ elif asset_target == 'Default':
+ preference = 1
+ else:
+ preference = None
if target == 'HDS':
f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
@@ -140,19 +253,22 @@ class DRTVIE(InfoExtractor):
'vcodec': 'none' if kind == 'AudioResource' else None,
'preference': preference,
})
- subtitles_list = asset.get('SubtitlesList')
- if isinstance(subtitles_list, list):
- LANGS = {
- 'Danish': 'da',
- }
- for subs in subtitles_list:
- if not subs.get('Uri'):
- continue
- lang = subs.get('Language') or 'da'
- subtitles.setdefault(LANGS.get(lang, lang), []).append({
- 'url': subs['Uri'],
- 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
- })
+ subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
+ if isinstance(subtitles_list, list):
+ LANGS = {
+ 'Danish': 'da',
+ }
+ for subs in subtitles_list:
+ if not isinstance(subs, dict):
+ continue
+ sub_uri = url_or_none(subs.get('Uri'))
+ if not sub_uri:
+ continue
+ lang = subs.get('Language') or 'da'
+ subtitles.setdefault(LANGS.get(lang, lang), []).append({
+ 'url': sub_uri,
+ 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
+ })
if not formats and restricted_to_denmark:
self.raise_geo_restricted(
@@ -170,6 +286,13 @@ class DRTVIE(InfoExtractor):
'duration': duration,
'formats': formats,
'subtitles': subtitles,
+ 'series': str_or_none(data.get('SeriesTitle')),
+ 'season': str_or_none(data.get('SeasonTitle')),
+ 'season_number': int_or_none(data.get('SeasonNumber')),
+ 'season_id': str_or_none(data.get('SeasonUrn')),
+ 'episode': str_or_none(data.get('EpisodeTitle')),
+ 'episode_number': int_or_none(data.get('EpisodeNumber')),
+ 'release_year': int_or_none(data.get('ProductionYear')),
}
diff --git a/youtube_dl/extractor/dtube.py b/youtube_dl/extractor/dtube.py
index 5887887e1..114d2dbe3 100644
--- a/youtube_dl/extractor/dtube.py
+++ b/youtube_dl/extractor/dtube.py
@@ -15,16 +15,16 @@ from ..utils import (
class DTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
_TEST = {
- 'url': 'https://d.tube/#!/v/benswann/zqd630em',
- 'md5': 'a03eaa186618ffa7a3145945543a251e',
+ 'url': 'https://d.tube/#!/v/broncnutz/x380jtr1',
+ 'md5': '9f29088fa08d699a7565ee983f56a06e',
'info_dict': {
- 'id': 'zqd630em',
+ 'id': 'x380jtr1',
'ext': 'mp4',
- 'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
- 'description': 'md5:700d164e066b87f9eac057949e4227c2',
- 'uploader_id': 'benswann',
- 'upload_date': '20180222',
- 'timestamp': 1519328958,
+ 'title': 'Lefty 3-Rings is Back Baby!! NCAA Picks',
+ 'description': 'md5:60be222088183be3a42f196f34235776',
+ 'uploader_id': 'broncnutz',
+ 'upload_date': '20190107',
+ 'timestamp': 1546854054,
},
'params': {
'format': '480p',
@@ -48,7 +48,7 @@ class DTubeIE(InfoExtractor):
def canonical_url(h):
if not h:
return None
- return 'https://ipfs.io/ipfs/' + h
+ return 'https://video.dtube.top/ipfs/' + h
formats = []
for q in ('240', '480', '720', '1080', ''):
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
index be2e3d378..d9d9afdec 100644
--- a/youtube_dl/extractor/dumpert.py
+++ b/youtube_dl/extractor/dumpert.py
@@ -1,20 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..compat import compat_b64decode
from ..utils import (
+ int_or_none,
qualities,
- sanitized_Request,
)
class DumpertIE(InfoExtractor):
- _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
+ _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
_TESTS = [{
- 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
+ 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
'info_dict': {
'id': '6646981/951bc60f',
@@ -24,46 +21,60 @@ class DumpertIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
- 'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
+ 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- protocol = mobj.group('protocol')
-
- url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
- req = sanitized_Request(url)
- req.add_header('Cookie', 'nsfw=1; cpc=10')
- webpage = self._download_webpage(req, video_id)
-
- files_base64 = self._search_regex(
- r'data-files="([^"]+)"', webpage, 'data files')
-
- files = self._parse_json(
- compat_b64decode(files_base64).decode('utf-8'),
- video_id)
+ video_id = self._match_id(url).replace('_', '/')
+ item = self._download_json(
+ 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
+ video_id)['items'][0]
+ title = item['title']
+ media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
quality = qualities(['flv', 'mobile', 'tablet', '720p'])
-
- formats = [{
- 'url': video_url,
- 'format_id': format_id,
- 'quality': quality(format_id),
- } for format_id, video_url in files.items() if format_id != 'still']
+ formats = []
+ for variant in media.get('variants', []):
+ uri = variant.get('uri')
+ if not uri:
+ continue
+ version = variant.get('version')
+ formats.append({
+ 'url': uri,
+ 'format_id': version,
+ 'quality': quality(version),
+ })
self._sort_formats(formats)
- title = self._html_search_meta(
- 'title', webpage) or self._og_search_title(webpage)
- description = self._html_search_meta(
- 'description', webpage) or self._og_search_description(webpage)
- thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
+ thumbnails = []
+ stills = item.get('stills') or {}
+ for t in ('thumb', 'still'):
+ for s in ('', '-medium', '-large'):
+ still_id = t + s
+ still_url = stills.get(still_id)
+ if not still_url:
+ continue
+ thumbnails.append({
+ 'id': still_id,
+ 'url': still_url,
+ })
+
+ stats = item.get('stats') or {}
return {
'id': video_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'formats': formats
+ 'description': item.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'duration': int_or_none(media.get('duration')),
+ 'like_count': int_or_none(stats.get('kudos_total')),
+ 'view_count': int_or_none(stats.get('views_total')),
}
diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py
index 20996962a..de7f6d670 100644
--- a/youtube_dl/extractor/dvtv.py
+++ b/youtube_dl/extractor/dvtv.py
@@ -10,16 +10,16 @@ from ..utils import (
int_or_none,
js_to_json,
mimetype2ext,
+ try_get,
unescapeHTML,
+ parse_iso8601,
)
class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/'
-
_VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
-
_TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
'md5': '67cb83e4a955d36e1b5d31993134a0c2',
@@ -28,11 +28,13 @@ class DVTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně',
'duration': 1484,
+ 'upload_date': '20141217',
+ 'timestamp': 1418792400,
}
}, {
'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/',
'info_dict': {
- 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci',
+ 'title': r'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci',
'id': '973eb3bc854e11e498be002590604f2e',
},
'playlist': [{
@@ -84,6 +86,8 @@ class DVTVIE(InfoExtractor):
'ext': 'mp4',
'title': 'Zeman si jen léčí mindráky, Sobotku nenávidí a Babiš se mu teď hodí, tvrdí Kmenta',
'duration': 1103,
+ 'upload_date': '20170511',
+ 'timestamp': 1494514200,
},
'params': {
'skip_download': True,
@@ -91,43 +95,59 @@ class DVTVIE(InfoExtractor):
}, {
'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/',
'only_matching': True,
+ }, {
+ # Test live stream video (liveStarter) parsing
+ 'url': 'https://video.aktualne.cz/dvtv/zive-mistryne-sveta-eva-samkova-po-navratu-ze-sampionatu/r~182654c2288811e990fd0cc47ab5f122/',
+ 'md5': '2e552e483f2414851ca50467054f9d5d',
+ 'info_dict': {
+ 'id': '8d116360288011e98c840cc47ab5f122',
+ 'ext': 'mp4',
+ 'title': 'Živě: Mistryně světa Eva Samková po návratu ze šampionátu',
+ 'upload_date': '20190204',
+ 'timestamp': 1549289591,
+ },
+ 'params': {
+ # Video content is no longer available
+ 'skip_download': True,
+ },
}]
- def _parse_video_metadata(self, js, video_id, live_js=None):
+ def _parse_video_metadata(self, js, video_id, timestamp):
data = self._parse_json(js, video_id, transform_source=js_to_json)
- if live_js:
- data.update(self._parse_json(
- live_js, video_id, transform_source=js_to_json))
-
title = unescapeHTML(data['title'])
+ live_starter = try_get(data, lambda x: x['plugins']['liveStarter'], dict)
+ if live_starter:
+ data.update(live_starter)
+
formats = []
- for video in data['sources']:
- video_url = video.get('file')
- if not video_url:
- continue
- video_type = video.get('type')
- ext = determine_ext(video_url, mimetype2ext(video_type))
- if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif video_type == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id='dash', fatal=False))
- else:
- label = video.get('label')
- height = self._search_regex(
- r'^(\d+)[pP]', label or '', 'height', default=None)
- format_id = ['http']
- for f in (ext, label):
- if f:
- format_id.append(f)
- formats.append({
- 'url': video_url,
- 'format_id': '-'.join(format_id),
- 'height': int_or_none(height),
- })
+ for tracks in data.get('tracks', {}).values():
+ for video in tracks:
+ video_url = video.get('src')
+ if not video_url:
+ continue
+ video_type = video.get('type')
+ ext = determine_ext(video_url, mimetype2ext(video_type))
+ if video_type == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif video_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ label = video.get('label')
+ height = self._search_regex(
+ r'^(\d+)[pP]', label or '', 'height', default=None)
+ format_id = ['http']
+ for f in (ext, label):
+ if f:
+ format_id.append(f)
+ formats.append({
+ 'url': video_url,
+ 'format_id': '-'.join(format_id),
+ 'height': int_or_none(height),
+ })
self._sort_formats(formats)
return {
@@ -136,41 +156,29 @@ class DVTVIE(InfoExtractor):
'description': data.get('description'),
'thumbnail': data.get('image'),
'duration': int_or_none(data.get('duration')),
- 'timestamp': int_or_none(data.get('pubtime')),
+ 'timestamp': int_or_none(timestamp),
'formats': formats
}
def _real_extract(self, url):
video_id = self._match_id(url)
-
webpage = self._download_webpage(url, video_id)
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'published time', default=None))
- # live content
- live_item = self._search_regex(
- r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});',
- webpage, 'video', default=None)
+ items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage)
+ if items:
+ return self.playlist_result(
+ [self._parse_video_metadata(i, video_id, timestamp) for i in items],
+ video_id, self._html_search_meta('twitter:title', webpage))
- # single video
item = self._search_regex(
- r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});',
+ r'(?s)BBXPlayer\.setup\((.+?)\);',
webpage, 'video', default=None)
-
if item:
- return self._parse_video_metadata(item, video_id, live_item)
-
- # playlist
- items = re.findall(
- r"(?s)BBX\.context\.assets\['[0-9a-f]{32}'\]\.push\(({.+?})\);",
- webpage)
- if not items:
- items = re.findall(r'(?s)var\s+asset\s*=\s*({.+?});\n', webpage)
-
- if items:
- return {
- '_type': 'playlist',
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'entries': [self._parse_video_metadata(i, video_id) for i in items]
- }
+ # remove function calls (ex. htmldeentitize)
+ # TODO this should be fixed in a general way in the js_to_json
+ item = re.sub(r'\w+?\((.+)\)', r'\1', item)
+ return self._parse_video_metadata(item, video_id, timestamp)
raise ExtractorError('Could not find neither video nor playlist')
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dl/extractor/einthusan.py
index 4485bf8c1..4e0f8bc81 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dl/extractor/einthusan.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import json
+import re
from .common import InfoExtractor
from ..compat import (
@@ -18,7 +19,7 @@ from ..utils import (
class EinthusanIE(InfoExtractor):
- _VALID_URL = r'https?://einthusan\.tv/movie/watch/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://einthusan.tv/movie/watch/9097/',
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
@@ -32,6 +33,12 @@ class EinthusanIE(InfoExtractor):
}, {
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.com/movie/watch/9097/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
+ 'only_matching': True,
}]
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
@@ -41,7 +48,9 @@ class EinthusanIE(InfoExtractor):
)).decode('utf-8'), video_id)
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
@@ -53,7 +62,7 @@ class EinthusanIE(InfoExtractor):
page_id = self._html_search_regex(
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
video_data = self._download_json(
- 'https://einthusan.tv/ajax/movie/watch/%s/' % video_id, video_id,
+ 'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
data=urlencode_postdata({
'xEvent': 'UIVideoPlayer.PingOutcome',
'xJson': json.dumps({
diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py
index c050bf9df..fe42821c7 100644
--- a/youtube_dl/extractor/eporner.py
+++ b/youtube_dl/extractor/eporner.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
encode_base_n,
ExtractorError,
@@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, display_id)
- video_id = self._match_id(compat_str(urlh.geturl()))
+ video_id = self._match_id(urlh.geturl())
hash = self._search_regex(
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dl/extractor/escapist.py
index 4d8a3c134..4cd815ebc 100644
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dl/extractor/escapist.py
@@ -1,14 +1,11 @@
from __future__ import unicode_literals
-import json
-
from .common import InfoExtractor
from ..utils import (
determine_ext,
clean_html,
int_or_none,
float_or_none,
- sanitized_Request,
)
@@ -36,7 +33,7 @@ def _decrypt_config(key, string):
class EscapistIE(InfoExtractor):
- _VALID_URL = r'https?://?(?:www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
+ _VALID_URL = r'https?://?(?:(?:www|v1)\.)?escapistmagazine\.com/videos/view/[^/]+/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
@@ -61,6 +58,12 @@ class EscapistIE(InfoExtractor):
'duration': 304,
'uploader': 'The Escapist',
}
+ }, {
+ 'url': 'http://escapistmagazine.com/videos/view/the-escapist-presents/6618',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v1.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -74,19 +77,20 @@ class EscapistIE(InfoExtractor):
video_id = ims_video['videoID']
key = ims_video['hash']
- config_req = sanitized_Request(
- 'http://www.escapistmagazine.com/videos/'
- 'vidconfig.php?videoID=%s&hash=%s' % (video_id, key))
- config_req.add_header('Referer', url)
- config = self._download_webpage(config_req, video_id, 'Downloading video config')
+ config = self._download_webpage(
+ 'http://www.escapistmagazine.com/videos/vidconfig.php',
+ video_id, 'Downloading video config', headers={
+ 'Referer': url,
+ }, query={
+ 'videoID': video_id,
+ 'hash': key,
+ })
- data = json.loads(_decrypt_config(key, config))
+ data = self._parse_json(_decrypt_config(key, config), video_id)
video_data = data['videoData']
title = clean_html(video_data['title'])
- duration = float_or_none(video_data.get('duration'), 1000)
- uploader = video_data.get('publisher')
formats = [{
'url': video['src'],
@@ -99,8 +103,9 @@ class EscapistIE(InfoExtractor):
'id': video_id,
'formats': formats,
'title': title,
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage) or data.get('poster'),
'description': self._og_search_description(webpage),
- 'duration': duration,
- 'uploader': uploader,
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'uploader': video_data.get('publisher'),
+ 'series': video_data.get('show'),
}
diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py
index 127c69b2e..6cf05e6da 100644
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dl/extractor/espn.py
@@ -29,7 +29,8 @@ class ESPNIE(OnceIE):
(?:
.*?\?.*?\bid=|
/_/id/
- )
+ )|
+ [^/]+/video/
)
)|
(?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/
@@ -94,6 +95,9 @@ class ESPNIE(OnceIE):
}, {
'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets',
'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -212,17 +216,14 @@ class FiveThirtyEightIE(InfoExtractor):
_TEST = {
'url': 'http://fivethirtyeight.com/features/how-the-6-8-raiders-can-still-make-the-playoffs/',
'info_dict': {
- 'id': '21846851',
- 'ext': 'mp4',
+ 'id': '56032156',
+ 'ext': 'flv',
'title': 'FiveThirtyEight: The Raiders can still make the playoffs',
'description': 'Neil Paine breaks down the simplest scenario that will put the Raiders into the playoffs at 8-8.',
- 'timestamp': 1513960621,
- 'upload_date': '20171222',
},
'params': {
'skip_download': True,
},
- 'expected_warnings': ['Unable to download f4m manifest'],
}
def _real_extract(self, url):
@@ -230,9 +231,8 @@ class FiveThirtyEightIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- video_id = self._search_regex(
- r'data-video-id=["\'](?P<id>\d+)',
- webpage, 'video id', group='id')
+ embed_url = self._search_regex(
+ r'<iframe[^>]+src=["\'](https?://fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/\d+)',
+ webpage, 'embed url')
- return self.url_result(
- 'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())
+ return self.url_result(embed_url, 'AbcNewsVideo')
diff --git a/youtube_dl/extractor/expressen.py b/youtube_dl/extractor/expressen.py
index 934571472..f79365038 100644
--- a/youtube_dl/extractor/expressen.py
+++ b/youtube_dl/extractor/expressen.py
@@ -82,8 +82,8 @@ class ExpressenIE(InfoExtractor):
title = info.get('titleRaw') or data['title']
description = info.get('descriptionRaw')
thumbnail = info.get('socialMediaImage') or data.get('image')
- duration = int_or_none(info.get('videoTotalSecondsDuration') or
- data.get('totalSecondsDuration'))
+ duration = int_or_none(info.get('videoTotalSecondsDuration')
+ or data.get('totalSecondsDuration'))
timestamp = unified_timestamp(info.get('publishDate'))
return {
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 464c8d690..4b3092028 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -18,9 +18,10 @@ from .acast import (
ACastIE,
ACastChannelIE,
)
-from .addanime import AddAnimeIE
from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
from .adobetv import (
+ AdobeTVEmbedIE,
AdobeTVIE,
AdobeTVShowIE,
AdobeTVChannelIE,
@@ -38,9 +39,7 @@ from .alphaporno import AlphaPornoIE
from .amcnetworks import AMCNetworksIE
from .americastestkitchen import AmericasTestKitchenIE
from .animeondemand import AnimeOnDemandIE
-from .anitube import AnitubeIE
from .anvato import AnvatoIE
-from .anysex import AnySexIE
from .aol import AolIE
from .allocine import AllocineIE
from .aliexpress import AliExpressLiveIE
@@ -59,17 +58,8 @@ from .ard import (
ARDMediathekIE,
)
from .arte import (
- ArteTvIE,
ArteTVPlus7IE,
- ArteTVCreativeIE,
- ArteTVConcertIE,
- ArteTVInfoIE,
- ArteTVFutureIE,
- ArteTVCinemaIE,
- ArteTVDDCIE,
- ArteTVMagazineIE,
ArteTVEmbedIE,
- TheOperaPlatformIE,
ArteTVPlaylistIE,
)
from .asiancrush import (
@@ -88,13 +78,8 @@ from .awaan import (
AWAANLiveIE,
AWAANSeasonIE,
)
-from .azmedien import (
- AZMedienIE,
- AZMedienPlaylistIE,
- AZMedienShowPlaylistIE,
-)
+from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
-from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
from .bbc import (
BBCCoUkIE,
@@ -112,11 +97,15 @@ from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
from .beatport import BeatportIE
from .bet import BetIE
+from .bfi import BFIPlayerIE
from .bigflix import BigflixIE
from .bild import BildIE
from .bilibili import (
BiliBiliIE,
BiliBiliBangumiIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
@@ -177,11 +166,15 @@ from .cbs import CBSIE
from .cbslocal import CBSLocalIE
from .cbsinteractive import CBSInteractiveIE
from .cbsnews import (
+ CBSNewsEmbedIE,
CBSNewsIE,
CBSNewsLiveVideoIE,
)
from .cbssports import CBSSportsIE
-from .ccc import CCCIE
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
from .ccma import CCMAIE
from .cctv import CCTVIE
from .cda import CDAIE
@@ -198,6 +191,11 @@ from .chirbit import (
ChirbitProfileIE,
)
from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
from .cjsw import CJSWIE
from .cliphunter import CliphunterIE
from .clippit import ClippitIE
@@ -209,7 +207,10 @@ from .cloudy import CloudyIE
from .clubic import ClubicIE
from .clyp import ClypIE
from .cmt import CMTIE
-from .cnbc import CNBCIE
+from .cnbc import (
+ CNBCIE,
+ CNBCVideoIE,
+)
from .cnn import (
CNNIE,
CNNBlogsIE,
@@ -223,17 +224,16 @@ from .comedycentral import (
ComedyCentralTVIE,
ToshIE,
)
-from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
MmsIE,
RtmpIE,
)
from .condenast import CondeNastIE
+from .contv import CONtvIE
from .corus import CorusIE
from .cracked import CrackedIE
from .crackle import CrackleIE
-from .criterion import CriterionIE
from .crooksandliars import CrooksAndLiarsIE
from .crunchyroll import (
CrunchyrollIE,
@@ -254,10 +254,6 @@ from .dailymotion import (
DailymotionPlaylistIE,
DailymotionUserIE,
)
-from .daisuki import (
- DaisukiMottoIE,
- DaisukiMottoPlaylistIE,
-)
from .daum import (
DaumIE,
DaumClipIE,
@@ -276,14 +272,7 @@ from .douyutv import (
DouyuShowIE,
DouyuTVIE,
)
-from .dplay import (
- DPlayIE,
- DPlayItIE,
-)
-from .dramafever import (
- DramaFeverIE,
- DramaFeverSeriesIE,
-)
+from .dplay import DPlayIE
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE
@@ -362,7 +351,6 @@ from .firsttv import FirstTVIE
from .fivemin import FiveMinIE
from .fivetv import FiveTVIE
from .flickr import FlickrIE
-from .flipagram import FlipagramIE
from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE
from .formula1 import Formula1IE
@@ -373,7 +361,10 @@ from .fourtube import (
FuxIE,
)
from .fox import FOXIE
-from .fox9 import FOX9IE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
from .foxgay import FoxgayIE
from .foxnews import (
FoxNewsIE,
@@ -401,18 +392,11 @@ from .frontendmasters import (
FrontendMastersCourseIE
)
from .funimation import FunimationIE
-from .funk import (
- FunkMixIE,
- FunkChannelIE,
-)
-from .funnyordie import FunnyOrDieIE
+from .funk import FunkIE
from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
+from .gaia import GaiaIE
from .gameinformer import GameInformerIE
-from .gameone import (
- GameOneIE,
- GameOnePlaylistIE,
-)
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gaskrank import GaskrankIE
@@ -428,7 +412,6 @@ from .globo import (
GloboArticleIE,
)
from .go import GoIE
-from .go90 import Go90IE
from .godtube import GodTubeIE
from .golem import GolemIE
from .googledrive import GoogleDriveIE
@@ -437,17 +420,14 @@ from .googlesearch import GoogleSearchIE
from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE
-from .hark import HarkIE
-from .hbo import (
- HBOIE,
- HBOEpisodeIE,
-)
+from .hbo import HBOIE
from .hearthisat import HearThisAtIE
from .heise import HeiseIE
from .hellporno import HellPornoIE
from .helsinki import HelsinkiIE
from .hentaistigma import HentaiStigmaIE
from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
from .hidive import HiDiveIE
from .historicfilms import HistoricFilmsIE
from .hitbox import HitboxIE, HitboxLiveIE
@@ -466,8 +446,11 @@ from .hrti import (
)
from .huajiao import HuajiaoIE
from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+)
from .hypem import HypemIE
-from .iconosquare import IconosquareIE
from .ign import (
IGNIE,
OneUPIE,
@@ -480,12 +463,17 @@ from .imdb import (
from .imgur import (
ImgurIE,
ImgurAlbumIE,
+ ImgurGalleryIE,
)
from .ina import InaIE
from .inc import IncIE
from .indavideo import IndavideoEmbedIE
from .infoq import InfoQIE
-from .instagram import InstagramIE, InstagramUserIE
+from .instagram import (
+ InstagramIE,
+ InstagramUserIE,
+ InstagramTagIE,
+)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
from .iprima import IPrimaIE
@@ -510,7 +498,6 @@ from .jeuxvideo import JeuxVideoIE
from .jove import JoveIE
from .joj import JojIE
from .jwplatform import JWPlatformIE
-from .jpopsukitv import JpopsukiIE
from .kakao import KakaoIE
from .kaltura import KalturaIE
from .kanalplay import KanalPlayIE
@@ -521,10 +508,9 @@ from .keezmovies import KeezMoviesIE
from .ketnet import KetnetIE
from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
-from .keek import KeekIE
from .konserthusetplay import KonserthusetPlayIE
-from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kusi import KUSIIE
@@ -540,6 +526,7 @@ from .la7 import LA7IE
from .laola1tv import (
Laola1TvEmbedIE,
Laola1TvIE,
+ EHFTVIE,
ITTFIE,
)
from .lci import LCIIE
@@ -547,8 +534,12 @@ from .lcp import (
LcpPlayIE,
LcpIE,
)
-from .learnr import LearnrIE
from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
from .leeco import (
LeIE,
LePlaylistIE,
@@ -569,7 +560,13 @@ from .limelight import (
LimelightChannelListIE,
)
from .line import LineTVIE
+from .linkedin import (
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE
+from .livejournal import LiveJournalIE
from .liveleak import (
LiveLeakIE,
LiveLeakEmbedIE,
@@ -588,13 +585,12 @@ from .lynda import (
LyndaCourseIE
)
from .m6 import M6IE
-from .macgamestore import MacGameStoreIE
from .mailru import (
MailRuIE,
MailRuMusicIE,
MailRuMusicSearchIE,
)
-from .makertv import MakerTVIE
+from .malltv import MallTVIE
from .mangomolo import (
MangomoloVideoIE,
MangomoloLiveIE,
@@ -608,7 +604,11 @@ from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
from .mediaset import MediasetIE
-from .mediasite import MediasiteIE
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
from .medici import MediciIE
from .megaphone import MegaphoneIE
from .meipai import MeipaiIE
@@ -623,22 +623,23 @@ from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE,
)
-from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
-from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mit import TechTVMITIE, OCWMITIE
from .mitele import MiTeleIE
from .mixcloud import (
MixcloudIE,
MixcloudUserIE,
MixcloudPlaylistIE,
- MixcloudStreamIE,
)
from .mlb import MLBIE
from .mnet import MnetIE
from .moevideo import MoeVideoIE
-from .mofosex import MofosexIE
+from .mofosex import (
+ MofosexIE,
+ MofosexEmbedIE,
+)
from .mojvideo import MojvideoIE
from .morningstar import MorningstarIE
from .motherless import (
@@ -655,10 +656,9 @@ from .mtv import (
MTVVideoIE,
MTVServicesEmbeddedIE,
MTVDEIE,
- MTV81IE,
+ MTVJapanIE,
)
from .muenchentv import MuenchenTVIE
-from .musicplayon import MusicPlayOnIE
from .mwave import MwaveIE, MwaveMeetGreetIE
from .mychannels import MyChannelsIE
from .myspace import MySpaceIE, MySpaceAlbumIE
@@ -670,8 +670,7 @@ from .myvi import (
from .myvidster import MyVidsterIE
from .nationalgeographic import (
NationalGeographicVideoIE,
- NationalGeographicIE,
- NationalGeographicEpisodeGuideIE,
+ NationalGeographicTVIE,
)
from .naver import NaverIE
from .nba import NBAIE
@@ -719,7 +718,6 @@ from .nexx import (
NexxIE,
NexxEmbedIE,
)
-from .nfb import NFBIE
from .nfl import NFLIE
from .nhk import NhkVodIE
from .nhl import NHLIE
@@ -746,13 +744,6 @@ from .nova import (
NovaEmbedIE,
NovaIE,
)
-from .novamov import (
- AuroraVidIE,
- CloudTimeIE,
- NowVideoIE,
- VideoWeedIE,
- WholeCloudIE,
-)
from .nowness import (
NownessIE,
NownessPlaylistIE,
@@ -782,6 +773,8 @@ from .nrk import (
NRKTVSeasonIE,
NRKTVSeriesIE,
)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
from .ntvde import NTVDeIE
from .ntvru import NTVRuIE
from .nytimes import (
@@ -805,20 +798,29 @@ from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
)
-from .openload import OpenloadIE
from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
ORFFM4IE,
ORFFM4StoryIE,
ORFOE1IE,
+ ORFOE3IE,
+ ORFNOEIE,
+ ORFWIEIE,
+ ORFBGLIE,
+ ORFOOEIE,
+ ORFSTMIE,
+ ORFKTNIE,
+ ORFSBGIE,
+ ORFTIRIE,
+ ORFVBGIE,
ORFIPTVIE,
)
+from .outsidetv import OutsideTVIE
from .packtpub import (
PacktPubIE,
PacktPubCourseIE,
)
-from .pandatv import PandaTVIE
from .pandoratv import PandoraTVIE
from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
@@ -841,7 +843,12 @@ from .picarto import (
from .piksel import PikselIE
from .pinkbike import PinkbikeIE
from .pladform import PladformIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE
from .playtvak import PlaytvakIE
from .playvid import PlayvidIE
@@ -856,15 +863,16 @@ from .polskieradio import (
PolskieRadioIE,
PolskieRadioCategoryIE,
)
+from .popcorntimes import PopcorntimesIE
from .popcorntv import PopcornTVIE
from .porn91 import Porn91IE
from .porncom import PornComIE
-from .pornflip import PornFlipIE
from .pornhd import PornHdIE
from .pornhub import (
PornHubIE,
- PornHubPlaylistIE,
- PornHubUserVideosIE,
+ PornHubUserIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
)
from .pornotube import PornotubeIE
from .pornovoisines import PornoVoisinesIE
@@ -874,8 +882,6 @@ from .puhutv import (
PuhuTVSerieIE,
)
from .presstv import PressTVIE
-from .primesharetv import PrimeShareTVIE
-from .promptfile import PromptFileIE
from .prosiebensat1 import ProSiebenSat1IE
from .puls4 import Puls4IE
from .pyvideo import PyvideoIE
@@ -910,7 +916,10 @@ from .raywenderlich import (
)
from .rbmaradio import RBMARadioIE
from .rds import RDSIE
-from .redbulltv import RedBullTVIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullTVRrnContentIE,
+)
from .reddit import (
RedditIE,
RedditRIE,
@@ -924,10 +933,6 @@ from .rentv import (
from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE
-from .revision3 import (
- Revision3EmbedIE,
- Revision3IE,
-)
from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE
from .ro220 import Ro220IE
@@ -949,9 +954,7 @@ from .rts import RTSIE
from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
from .rtvnh import RTVNHIE
from .rtvs import RTVSIE
-from .rudo import RudoIE
from .ruhd import RUHDIE
-from .ruleporn import RulePornIE
from .rutube import (
RutubeIE,
RutubeChannelIE,
@@ -973,11 +976,17 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE
from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE
-from .scrippsnetworks import ScrippsNetworksWatchIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
from .seeker import SeekerIE
from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
-from .servingsys import ServingSysIE
from .servus import ServusIE
from .sevenplus import SevenPlusIE
from .sexu import SexuIE
@@ -1001,7 +1010,10 @@ from .skynewsarabia import (
SkyNewsArabiaIE,
SkyNewsArabiaArticleIE,
)
-from .skysports import SkySportsIE
+from .sky import (
+ SkyNewsIE,
+ SkySportsIE,
+)
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
@@ -1015,6 +1027,7 @@ from .snotr import SnotrIE
from .sohu import SohuIE
from .sonyliv import SonyLIVIE
from .soundcloud import (
+ SoundcloudEmbedIE,
SoundcloudIE,
SoundcloudSetIE,
SoundcloudUserIE,
@@ -1033,7 +1046,10 @@ from .southpark import (
SouthParkEsIE,
SouthParkNlIE
)
-from .spankbang import SpankBangIE
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
from .spankwire import SpankwireIE
from .spiegel import SpiegelIE, SpiegelArticleIE
from .spiegeltv import SpiegeltvIE
@@ -1043,7 +1059,7 @@ from .spike import (
)
from .stitcher import StitcherIE
from .sport5 import Sport5IE
-from .sportbox import SportBoxEmbedIE
+from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
@@ -1055,12 +1071,16 @@ from .srmediathek import SRMediathekIE
from .stanfordoc import StanfordOpenClassroomIE
from .steam import SteamIE
from .streamable import StreamableIE
-from .streamango import StreamangoIE
from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE
from .streetvoice import StreetVoiceIE
from .stretchinternet import StretchInternetIE
+from .stv import STVPlayerIE
from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
from .svt import (
SVTIE,
SVTPageIE,
@@ -1078,12 +1098,17 @@ from .tass import TassIE
from .tastytrade import TastyTradeIE
from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
from .teachertube import (
TeacherTubeIE,
TeacherTubeUserIE,
)
from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .tele5 import Tele5IE
@@ -1094,12 +1119,14 @@ from .telegraaf import TelegraafIE
from .telemb import TeleMBIE
from .telequebec import (
TeleQuebecIE,
+ TeleQuebecSquatIE,
TeleQuebecEmissionIE,
TeleQuebecLiveIE,
)
from .teletask import TeleTaskIE
from .telewebion import TelewebionIE
from .tennistv import TennisTVIE
+from .tenplay import TenPlayIE
from .testurl import TestURLIE
from .tf1 import TF1IE
from .tfo import TFOIE
@@ -1116,6 +1143,10 @@ from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+)
from .tinypic import TinyPicIE
from .tmz import (
TMZIE,
@@ -1134,6 +1165,7 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
+from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE
from .tubitv import TubiTvIE
@@ -1147,13 +1179,16 @@ from .tunein import (
)
from .tunepk import TunePkIE
from .turbo import TurboIE
-from .tutv import TutvIE
from .tv2 import (
TV2IE,
TV2ArticleIE,
+ KatsomoIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
)
from .tv2hu import TV2HuIE
-from .tv3 import TV3IE
from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE
@@ -1172,13 +1207,15 @@ from .tvnet import TVNetIE
from .tvnoe import TVNoeIE
from .tvnow import (
TVNowIE,
- TVNowListIE,
+ TVNowNewIE,
+ TVNowSeasonIE,
+ TVNowAnnualIE,
TVNowShowIE,
)
from .tvp import (
TVPEmbedIE,
TVPIE,
- TVPSeriesIE,
+ TVPWebsiteIE,
)
from .tvplay import (
TVPlayIE,
@@ -1190,6 +1227,7 @@ from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import TwitCastingIE
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -1206,15 +1244,23 @@ from .twitter import (
TwitterCardIE,
TwitterIE,
TwitterAmplifyIE,
+ TwitterBroadcastIE,
)
from .udemy import (
UdemyIE,
UdemyCourseIE
)
from .udn import UDNEmbedIE
-from .ufctv import UFCTVIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
from .uktvplay import UKTVPlayIE
from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
@@ -1223,10 +1269,6 @@ from .uplynk import (
UplynkIE,
UplynkPreplayIE,
)
-from .upskill import (
- UpskillIE,
- UpskillCourseIE,
-)
from .urort import UrortIE
from .urplay import URPlayIE
from .usanetwork import USANetworkIE
@@ -1240,7 +1282,6 @@ from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
-from .vessel import VesselIE
from .vesti import VestiIE
from .vevo import (
VevoIE,
@@ -1262,13 +1303,11 @@ from .viddler import ViddlerIE
from .videa import VideaIE
from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE
-from .videomega import VideoMegaIE
from .videomore import (
VideomoreIE,
VideomoreVideoIE,
VideomoreSeasonIE,
)
-from .videopremium import VideoPremiumIE
from .videopress import VideoPressIE
from .vidio import VidioIE
from .vidlii import VidLiiIE
@@ -1283,7 +1322,6 @@ from .viewlift import (
ViewLiftIE,
ViewLiftEmbedIE,
)
-from .viewster import ViewsterIE
from .viidea import ViideaIE
from .vimeo import (
VimeoIE,
@@ -1295,6 +1333,7 @@ from .vimeo import (
VimeoReviewIE,
VimeoUserIE,
VimeoWatchLaterIE,
+ VHXEmbedIE,
)
from .vimple import VimpleIE
from .vine import (
@@ -1330,7 +1369,6 @@ from .voxmedia import (
VoxMediaVolumeIE,
VoxMediaIE,
)
-from .vporn import VpornIE
from .vrt import VRTIE
from .vrak import VrakIE
from .vrv import (
@@ -1344,6 +1382,7 @@ from .vuclip import VuClipIE
from .vvvvid import VVVVIDIE
from .vyborymos import VyboryMosIE
from .vzaar import VzaarIE
+from .wakanim import WakanimIE
from .walla import WallaIE
from .washingtonpost import (
WashingtonPostIE,
@@ -1367,27 +1406,24 @@ from .webofstories import (
WebOfStoriesPlaylistIE,
)
from .weibo import (
- WeiboIE,
+ WeiboIE,
WeiboMobileIE
)
from .weiqitv import WeiqiTVIE
-from .wimp import WimpIE
from .wistia import WistiaIE
from .worldstarhiphop import WorldStarHipHopIE
-from .wrzuta import (
- WrzutaIE,
- WrzutaPlaylistIE,
-)
from .wsj import (
WSJIE,
WSJArticleIE,
)
+from .wwe import WWEIE
from .xbef import XBefIE
from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE
from .xhamster import (
XHamsterIE,
XHamsterEmbedIE,
+ XHamsterUserIE,
)
from .xiami import (
XiamiSongIE,
@@ -1409,13 +1445,17 @@ from .xxxymovies import XXXYMoviesIE
from .yahoo import (
YahooIE,
YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
+ YahooJapanNewsIE,
)
+from .yandexdisk import YandexDiskIE
from .yandexmusic import (
YandexMusicTrackIE,
YandexMusicAlbumIE,
YandexMusicPlaylistIE,
)
-from .yandexdisk import YandexDiskIE
+from .yandexvideo import YandexVideoIE
from .yapfiles import YapFilesIE
from .yesjapan import YesJapanIE
from .yinyuetai import YinYueTaiIE
@@ -1466,6 +1506,7 @@ from .zattoo import (
QuantumTVIE,
QuicklineIE,
QuicklineLiveIE,
+ SaltTVIE,
SAKTVIE,
VTXTVIE,
WalyTVIE,
@@ -1474,3 +1515,4 @@ from .zattoo import (
)
from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE
+from .zype import ZypeIE
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index 97cfe0fc3..610d66745 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
- _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
+ _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
_TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
@@ -334,7 +334,7 @@ class FacebookIE(InfoExtractor):
if not video_data:
server_js_data = self._parse_json(
self._search_regex(
- r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
webpage, 'js data', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
video_data = extract_from_jsmods_instances(server_js_data)
@@ -379,6 +379,7 @@ class FacebookIE(InfoExtractor):
if not video_data:
raise ExtractorError('Cannot parse data')
+ subtitles = {}
formats = []
for f in video_data:
format_id = f['stream_type']
@@ -402,9 +403,17 @@ class FacebookIE(InfoExtractor):
if dash_manifest:
formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+ subtitles_src = f[0].get('subtitles_src')
+ if subtitles_src:
+ subtitles.setdefault('en', []).append({'url': subtitles_src})
if not formats:
raise ExtractorError('Cannot find video formats')
+ # Downloads with browser's User-Agent are rate limited. Working around
+ # with non-browser User-Agent.
+ for f in formats:
+ f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
+
self._sort_formats(formats)
video_title = self._html_search_regex(
@@ -424,11 +433,11 @@ class FacebookIE(InfoExtractor):
uploader = clean_html(get_element_by_id(
'fbPhotoPageAuthorName', webpage)) or self._search_regex(
r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
- fatal=False) or self._og_search_title(webpage, fatal=False)
+ default=None) or self._og_search_title(webpage, fatal=False)
timestamp = int_or_none(self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None))
- thumbnail = self._og_search_thumbnail(webpage)
+ thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
view_count = parse_count(self._search_regex(
r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
@@ -442,6 +451,7 @@ class FacebookIE(InfoExtractor):
'timestamp': timestamp,
'thumbnail': thumbnail,
'view_count': view_count,
+ 'subtitles': subtitles,
}
return webpage, info_dict
@@ -456,15 +466,18 @@ class FacebookIE(InfoExtractor):
return info_dict
if '/posts/' in url:
- entries = [
- self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
- for vid in self._parse_json(
- self._search_regex(
- r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
- webpage, 'video ids', group='ids'),
- video_id)]
-
- return self.playlist_result(entries, video_id)
+ video_id_json = self._search_regex(
+ r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
+ default='')
+ if video_id_json:
+ entries = [
+ self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+ for vid in self._parse_json(video_id_json, video_id)]
+ return self.playlist_result(entries, video_id)
+
+ # Single Video?
+ video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
+ return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
else:
_, info_dict = self._extract_from_url(
self._VIDEO_PAGE_TEMPLATE % video_id,
diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dl/extractor/fivetv.py
index 9f9863746..c4c0f1b3d 100644
--- a/youtube_dl/extractor/fivetv.py
+++ b/youtube_dl/extractor/fivetv.py
@@ -9,7 +9,7 @@ from ..utils import int_or_none
class FiveTVIE(InfoExtractor):
_VALID_URL = r'''(?x)
- http://
+ https?://
(?:www\.)?5-tv\.ru/
(?:
(?:[^/]+/)+(?P<id>\d+)|
@@ -39,6 +39,7 @@ class FiveTVIE(InfoExtractor):
'duration': 180,
},
}, {
+ # redirect to https://www.5-tv.ru/projects/1000095/izvestia-glavnoe/
'url': 'http://www.5-tv.ru/glavnoe/#itemDetails',
'info_dict': {
'id': 'glavnoe',
@@ -46,6 +47,7 @@ class FiveTVIE(InfoExtractor):
'title': r're:^Итоги недели с \d+ по \d+ \w+ \d{4} года$',
'thumbnail': r're:^https?://.*\.jpg$',
},
+ 'skip': 'redirect to «Известия. Главное» project page',
}, {
'url': 'http://www.5-tv.ru/glavnoe/broadcasts/508645/',
'only_matching': True,
@@ -70,7 +72,7 @@ class FiveTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_url = self._search_regex(
- [r'<div[^>]+?class="flowplayer[^>]+?data-href="([^"]+)"',
+ [r'<div[^>]+?class="(?:flow)?player[^>]+?data-href="([^"]+)"',
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
webpage, 'video url')
diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py
deleted file mode 100644
index b7be40f1b..000000000
--- a/youtube_dl/extractor/flipagram.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- float_or_none,
- try_get,
- unified_timestamp,
-)
-
-
-class FlipagramIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'https://flipagram.com/f/nyvTSJMKId',
- 'md5': '888dcf08b7ea671381f00fab74692755',
- 'info_dict': {
- 'id': 'nyvTSJMKId',
- 'ext': 'mp4',
- 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
- 'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
- 'duration': 35.571,
- 'timestamp': 1461244995,
- 'upload_date': '20160421',
- 'uploader': 'kitty juria',
- 'uploader_id': 'sjuria101',
- 'creator': 'kitty juria',
- 'view_count': int,
- 'like_count': int,
- 'repost_count': int,
- 'comment_count': int,
- 'comments': list,
- 'formats': 'mincount:2',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_data = self._parse_json(
- self._search_regex(
- r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
- video_id)
-
- flipagram = video_data['flipagram']
- video = flipagram['video']
-
- json_ld = self._search_json_ld(webpage, video_id, default={})
- title = json_ld.get('title') or flipagram['captionText']
- description = json_ld.get('description') or flipagram.get('captionText')
-
- formats = [{
- 'url': video['url'],
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- 'filesize': int_or_none(video_data.get('size')),
- }]
-
- preview_url = try_get(
- flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
- if preview_url:
- formats.append({
- 'url': preview_url,
- 'ext': 'm4a',
- 'vcodec': 'none',
- })
-
- self._sort_formats(formats)
-
- counts = flipagram.get('counts', {})
- user = flipagram.get('user', {})
- video_data = flipagram.get('video', {})
-
- thumbnails = [{
- 'url': self._proto_relative_url(cover['url']),
- 'width': int_or_none(cover.get('width')),
- 'height': int_or_none(cover.get('height')),
- 'filesize': int_or_none(cover.get('size')),
- } for cover in flipagram.get('covers', []) if cover.get('url')]
-
- # Note that this only retrieves comments that are initially loaded.
- # For videos with large amounts of comments, most won't be retrieved.
- comments = []
- for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
- text = comment.get('comment')
- if not text or not isinstance(text, list):
- continue
- comments.append({
- 'author': comment.get('user', {}).get('name'),
- 'author_id': comment.get('user', {}).get('username'),
- 'id': comment.get('id'),
- 'text': text[0],
- 'timestamp': unified_timestamp(comment.get('created')),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': float_or_none(flipagram.get('duration'), 1000),
- 'thumbnails': thumbnails,
- 'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
- 'uploader': user.get('name'),
- 'uploader_id': user.get('username'),
- 'creator': user.get('name'),
- 'view_count': int_or_none(counts.get('plays')),
- 'like_count': int_or_none(counts.get('likes')),
- 'repost_count': int_or_none(counts.get('reflips')),
- 'comment_count': int_or_none(counts.get('comments')),
- 'comments': comments,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dl/extractor/fourtube.py
index a9a1f911e..be4e81342 100644
--- a/youtube_dl/extractor/fourtube.py
+++ b/youtube_dl/extractor/fourtube.py
@@ -22,8 +22,6 @@ from ..utils import (
class FourTubeBaseIE(InfoExtractor):
- _TKN_HOST = 'tkn.kodicdn.com'
-
def _extract_formats(self, url, video_id, media_id, sources):
token_url = 'https://%s/%s/desktop/%s' % (
self._TKN_HOST, media_id, '+'.join(sources))
@@ -120,6 +118,7 @@ class FourTubeIE(FourTubeBaseIE):
IE_NAME = '4tube'
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
+ _TKN_HOST = 'token.4tube.com'
_TESTS = [{
'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
'md5': '6516c8ac63b03de06bc8eac14362db4f',
@@ -149,6 +148,7 @@ class FourTubeIE(FourTubeBaseIE):
class FuxIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
+ _TKN_HOST = 'token.fux.com'
_TESTS = [{
'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
'info_dict': {
@@ -280,6 +280,7 @@ class PornTubeIE(FourTubeBaseIE):
class PornerBrosIE(FourTubeBaseIE):
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
_URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
+ _TKN_HOST = 'token.pornerbros.com'
_TESTS = [{
'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
'md5': '6516c8ac63b03de06bc8eac14362db4f',
diff --git a/youtube_dl/extractor/fox.py b/youtube_dl/extractor/fox.py
index 11d6c9c32..04f4bdba6 100644
--- a/youtube_dl/extractor/fox.py
+++ b/youtube_dl/extractor/fox.py
@@ -1,17 +1,22 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
+import uuid
+
from .adobepass import AdobePassIE
-from .uplynk import UplynkPreplayIE
-from ..compat import compat_str
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
- HEADRequest,
+ ExtractorError,
int_or_none,
parse_age_limit,
parse_duration,
try_get,
unified_timestamp,
- update_url_query,
)
@@ -31,6 +36,7 @@ class FOXIE(AdobePassIE):
'upload_date': '20170901',
'creator': 'FOX',
'series': 'Gotham',
+ 'age_limit': 14,
},
'params': {
'skip_download': True,
@@ -44,48 +50,76 @@ class FOXIE(AdobePassIE):
'url': 'https://www.fox.com/watch/30056b295fb57f7452aeeb4920bc3024/',
'only_matching': True,
}]
+ _GEO_BYPASS = False
+ _HOME_PAGE_URL = 'https://www.fox.com/'
+ _API_KEY = 'abdcbed02c124d393b39e818a4312055'
+ _access_token = None
+
+ def _call_api(self, path, video_id, data=None):
+ headers = {
+ 'X-Api-Key': self._API_KEY,
+ }
+ if self._access_token:
+ headers['Authorization'] = 'Bearer ' + self._access_token
+ try:
+ return self._download_json(
+ 'https://api2.fox.com/v2.0/' + path,
+ video_id, data=data, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ entitlement_issues = self._parse_json(
+ e.cause.read().decode(), video_id)['entitlementIssues']
+ for e in entitlement_issues:
+ if e.get('errorCode') == 1005:
+ raise ExtractorError(
+ 'This video is only available via cable service provider '
+ 'subscription. You may want to use --cookies.', expected=True)
+ messages = ', '.join([e['message'] for e in entitlement_issues])
+ raise ExtractorError(messages, expected=True)
+ raise
+
+ def _real_initialize(self):
+ if not self._access_token:
+ mvpd_auth = self._get_cookies(self._HOME_PAGE_URL).get('mvpd-auth')
+ if mvpd_auth:
+ self._access_token = (self._parse_json(compat_urllib_parse_unquote(
+ mvpd_auth.value), None, fatal=False) or {}).get('accessToken')
+ if not self._access_token:
+ self._access_token = self._call_api(
+ 'login', None, json.dumps({
+ 'deviceId': compat_str(uuid.uuid4()),
+ }).encode())['accessToken']
def _real_extract(self, url):
video_id = self._match_id(url)
- video = self._download_json(
- 'https://api.fox.com/fbc-content/v1_4/video/%s' % video_id,
- video_id, headers={
- 'apikey': 'abdcbed02c124d393b39e818a4312055',
- 'Content-Type': 'application/json',
- 'Referer': url,
- })
+ video = self._call_api('vodplayer/' + video_id, video_id)
title = video['name']
- release_url = video['videoRelease']['url']
-
- description = video.get('description')
- duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
- video.get('duration')) or parse_duration(video.get('duration'))
- timestamp = unified_timestamp(video.get('datePublished'))
- rating = video.get('contentRating')
- age_limit = parse_age_limit(rating)
+ release_url = video['url']
+ try:
+ m3u8_url = self._download_json(release_url, video_id)['playURL']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read().decode(), video_id)
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['US'])
+ raise ExtractorError(error['description'], expected=True)
+ raise
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
data = try_get(
video, lambda x: x['trackingData']['properties'], dict) or {}
+ duration = int_or_none(video.get('durationInSeconds')) or int_or_none(
+ video.get('duration')) or parse_duration(video.get('duration'))
+ timestamp = unified_timestamp(video.get('datePublished'))
creator = data.get('brand') or data.get('network') or video.get('network')
-
series = video.get('seriesName') or data.get(
'seriesName') or data.get('show')
- season_number = int_or_none(video.get('seasonNumber'))
- episode = video.get('name')
- episode_number = int_or_none(video.get('episodeNumber'))
- release_year = int_or_none(video.get('releaseYear'))
-
- if data.get('authRequired'):
- resource = self._get_mvpd_resource(
- 'fbc-fox', title, video.get('guid'), rating)
- release_url = update_url_query(
- release_url, {
- 'auth': self._extract_mvpd_auth(
- url, video_id, 'fbc-fox', resource)
- })
subtitles = {}
for doc_rel in video.get('documentReleases', []):
@@ -98,36 +132,19 @@ class FOXIE(AdobePassIE):
}]
break
- info = {
+ return {
'id': video_id,
'title': title,
- 'description': description,
+ 'formats': formats,
+ 'description': video.get('description'),
'duration': duration,
'timestamp': timestamp,
- 'age_limit': age_limit,
+ 'age_limit': parse_age_limit(video.get('contentRating')),
'creator': creator,
'series': series,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'release_year': release_year,
+ 'season_number': int_or_none(video.get('seasonNumber')),
+ 'episode': video.get('name'),
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'release_year': int_or_none(video.get('releaseYear')),
'subtitles': subtitles,
}
-
- urlh = self._request_webpage(HEADRequest(release_url), video_id)
- video_url = compat_str(urlh.geturl())
-
- if UplynkPreplayIE.suitable(video_url):
- info.update({
- '_type': 'url_transparent',
- 'url': video_url,
- 'ie_key': UplynkPreplayIE.ie_key(),
- })
- else:
- m3u8_url = self._download_json(release_url, video_id)['playURL']
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
- info['formats'] = formats
- return info
diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py
index 17dfffa7b..91f8f7b8a 100644
--- a/youtube_dl/extractor/fox9.py
+++ b/youtube_dl/extractor/fox9.py
@@ -1,13 +1,23 @@
# coding: utf-8
from __future__ import unicode_literals
-from .anvato import AnvatoIE
+from .common import InfoExtractor
-class FOX9IE(AnvatoIE):
- _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
- _TESTS = [{
- 'url': 'http://www.fox9.com/news/215123287-story',
+class FOX9IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
+ 'Anvato', video_id)
+
+
+class FOX9NewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
'md5': 'd6e1b2572c3bab8a849c9103615dd243',
'info_dict': {
'id': '314473',
@@ -21,22 +31,11 @@ class FOX9IE(AnvatoIE):
'categories': ['News', 'Sports'],
'tags': ['news', 'video'],
},
- }, {
- 'url': 'http://www.fox9.com/news/investigators/214070684-story',
- 'only_matching': True,
- }]
+ }
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._parse_json(
- self._search_regex(
- r"this\.videosJson\s*=\s*'(\[.+?\])';",
- webpage, 'anvato playlist'),
- video_id)[0]['video']
-
- return self._get_anvato_videos(
- 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
- video_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ anvato_id = self._search_regex(
+ r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
+ return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dl/extractor/foxsports.py
index 985542727..2b2cb6c6f 100644
--- a/youtube_dl/extractor/foxsports.py
+++ b/youtube_dl/extractor/foxsports.py
@@ -1,43 +1,33 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import (
- smuggle_url,
- update_url_query,
-)
class FoxSportsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
+ _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
_TEST = {
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
'md5': 'b49050e955bebe32c301972e4012ac17',
'info_dict': {
- 'id': 'bwduI3X_TgUB',
+ 'id': '432609859715',
'ext': 'mp4',
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
'description': 'Courtney Lee talks about Memphis being focused.',
- 'upload_date': '20150423',
- 'timestamp': 1429761109,
+ # TODO: fix timestamp
+ 'upload_date': '19700101', # '20150423',
+ # 'timestamp': 1429761109,
'uploader': 'NEWA-FNG-FOXSPORTS',
},
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
'add_ie': ['ThePlatform'],
}
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- config = self._parse_json(
- self._html_search_regex(
- r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
- webpage, 'data player config'),
- video_id)
-
- return self.url_result(smuggle_url(update_url_query(
- config['releaseURL'], {
- 'mbr': 'true',
- 'switch': 'http',
- }), {'force_smil_url': True}))
+ return self.url_result(
+ 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py
index b8fa17588..306b45fc9 100644
--- a/youtube_dl/extractor/franceculture.py
+++ b/youtube_dl/extractor/franceculture.py
@@ -31,7 +31,13 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_data = extract_attributes(self._search_regex(
- r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
+ r'''(?sx)
+ (?:
+ </h1>|
+ <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
+ ).*?
+ (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+ ''',
webpage, 'video data'))
video_url = video_data['data-asset-source']
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 2ffe83a78..81b468c7d 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -143,7 +143,7 @@ class FranceTVIE(InfoExtractor):
ext = determine_ext(video_url)
if ext == 'f4m':
if georestricted:
- # See https://github.com/rg3/youtube-dl/issues/3963
+ # See https://github.com/ytdl-org/youtube-dl/issues/3963
# m3u8 urls work fine
continue
formats.extend(self._extract_f4m_formats(
@@ -215,7 +215,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
_TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': {
- 'id': '162311093',
+ 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
@@ -271,7 +271,7 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
catalogue = None
video_id = self._search_regex(
- r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'video id', default=None, group='id')
if not video_id:
@@ -371,12 +371,13 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
self.url_result(dailymotion_url, DailymotionIE.ie_key())
for dailymotion_url in dailymotion_urls])
- video_id, catalogue = self._search_regex(
- (r'id-video=([^@]+@[^"]+)',
+ video_id = self._search_regex(
+ (r'player\.load[^;]+src:\s*["\']([^"\']+)',
+ r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
- webpage, 'video id').split('@')
+ webpage, 'video id')
- return self._make_url_result(video_id, catalogue)
+ return self._make_url_result(video_id)
class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py
index 486a49c05..ea9c3e317 100644
--- a/youtube_dl/extractor/freespeech.py
+++ b/youtube_dl/extractor/freespeech.py
@@ -1,6 +1,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from .youtube import YoutubeIE
class FreespeechIE(InfoExtractor):
@@ -27,8 +28,4 @@ class FreespeechIE(InfoExtractor):
r'data-video-url="([^"]+)"',
webpage, 'youtube url')
- return {
- '_type': 'url',
- 'url': youtube_url,
- 'ie_key': 'Youtube',
- }
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
diff --git a/youtube_dl/extractor/frontendmasters.py b/youtube_dl/extractor/frontendmasters.py
index cb57ba007..f1db33fb1 100644
--- a/youtube_dl/extractor/frontendmasters.py
+++ b/youtube_dl/extractor/frontendmasters.py
@@ -94,8 +94,8 @@ class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
chapter_number = None
index = lesson.get('index')
element_index = lesson.get('elementIndex')
- if (isinstance(index, int) and isinstance(element_index, int) and
- index < element_index):
+ if (isinstance(index, int) and isinstance(element_index, int)
+ and index < element_index):
chapter_number = element_index - index
chapter = (chapters[chapter_number - 1]
if chapter_number - 1 < len(chapters) else None)
diff --git a/youtube_dl/extractor/funimation.py b/youtube_dl/extractor/funimation.py
index 07d01caec..8bbedca26 100644
--- a/youtube_dl/extractor/funimation.py
+++ b/youtube_dl/extractor/funimation.py
@@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
+import random
+import string
+
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
@@ -87,7 +90,7 @@ class FunimationIE(InfoExtractor):
video_id = title_data.get('id') or self._search_regex([
r"KANE_customdimensions.videoID\s*=\s*'(\d+)';",
- r'<iframe[^>]+src="/player/(\d+)"',
+ r'<iframe[^>]+src="/player/(\d+)',
], webpage, 'video_id', default=None)
if not video_id:
player_url = self._html_search_meta([
@@ -108,8 +111,10 @@ class FunimationIE(InfoExtractor):
if self._TOKEN:
headers['Authorization'] = 'Token %s' % self._TOKEN
sources = self._download_json(
- 'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
- video_id, headers=headers)['items']
+ 'https://www.funimation.com/api/showexperience/%s/' % video_id,
+ video_id, headers=headers, query={
+ 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]),
+ })['items']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read(), video_id)['errors'][0]
diff --git a/youtube_dl/extractor/funk.py b/youtube_dl/extractor/funk.py
index 7e1af95e0..81d1949fd 100644
--- a/youtube_dl/extractor/funk.py
+++ b/youtube_dl/extractor/funk.py
@@ -1,89 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
-import itertools
import re
from .common import InfoExtractor
from .nexx import NexxIE
-from ..compat import compat_str
from ..utils import (
int_or_none,
- try_get,
+ str_or_none,
)
-class FunkBaseIE(InfoExtractor):
- _HEADERS = {
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
- 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
- }
- _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
-
- @staticmethod
- def _make_headers(referer):
- headers = FunkBaseIE._HEADERS.copy()
- headers['Referer'] = referer
- return headers
-
- def _make_url_result(self, video):
- return {
- '_type': 'url_transparent',
- 'url': 'nexx:741:%s' % video['sourceId'],
- 'ie_key': NexxIE.ie_key(),
- 'id': video['sourceId'],
- 'title': video.get('title'),
- 'description': video.get('description'),
- 'duration': int_or_none(video.get('duration')),
- 'season_number': int_or_none(video.get('seasonNr')),
- 'episode_number': int_or_none(video.get('episodeNr')),
- }
-
-
-class FunkMixIE(FunkBaseIE):
- _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
+class FunkIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
- 'md5': '8edf617c2f2b7c9847dfda313f199009',
- 'info_dict': {
- 'id': '123748',
- 'ext': 'mp4',
- 'title': '"Die realste Kifferdoku aller Zeiten"',
- 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
- 'timestamp': 1490274721,
- 'upload_date': '20170323',
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- mix_id = mobj.group('id')
- alias = mobj.group('alias')
-
- lists = self._download_json(
- 'https://www.funk.net/api/v3.1/curation/curatedLists/',
- mix_id, headers=self._make_headers(url), query={
- 'size': 100,
- })['_embedded']['curatedListList']
-
- metas = next(
- l for l in lists
- if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
- video = next(
- meta['videoDataDelegate']
- for meta in metas
- if try_get(
- meta, lambda x: x['videoDataDelegate']['alias'],
- compat_str) == alias)
-
- return self._make_url_result(video)
-
-
-class FunkChannelIE(FunkBaseIE):
- _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
+ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
+ 'md5': '8dd9d9ab59b4aa4173b3197f2ea48e81',
'info_dict': {
'id': '1155821',
'ext': 'mp4',
@@ -92,83 +24,26 @@ class FunkChannelIE(FunkBaseIE):
'timestamp': 1514507395,
'upload_date': '20171229',
},
- 'params': {
- 'skip_download': True,
- },
- }, {
- # only available via byIdList API
- 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
- 'info_dict': {
- 'id': '205067',
- 'ext': 'mp4',
- 'title': 'Martin Sonneborn erklärt die EU',
- 'description': 'md5:050f74626e4ed87edf4626d2024210c0',
- 'timestamp': 1494424042,
- 'upload_date': '20170510',
- },
- 'params': {
- 'skip_download': True,
- },
+
}, {
- 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
+ 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- alias = mobj.group('alias')
-
- headers = self._make_headers(url)
-
- video = None
-
- # Id-based channels are currently broken on their side: webplayer
- # tries to process them via byChannelAlias endpoint and fails
- # predictably.
- for page_num in itertools.count():
- by_channel_alias = self._download_json(
- 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
- % channel_id,
- 'Downloading byChannelAlias JSON page %d' % (page_num + 1),
- headers=headers, query={
- 'filterFsk': 'false',
- 'sort': 'creationDate,desc',
- 'size': 100,
- 'page': page_num,
- }, fatal=False)
- if not by_channel_alias:
- break
- video_list = try_get(
- by_channel_alias, lambda x: x['_embedded']['videoList'], list)
- if not video_list:
- break
- try:
- video = next(r for r in video_list if r.get('alias') == alias)
- break
- except StopIteration:
- pass
- if not try_get(
- by_channel_alias, lambda x: x['_links']['next']):
- break
-
- if not video:
- by_id_list = self._download_json(
- 'https://www.funk.net/api/v3.0/content/videos/byIdList',
- channel_id, 'Downloading byIdList JSON', headers=headers,
- query={
- 'ids': alias,
- }, fatal=False)
- if by_id_list:
- video = try_get(by_id_list, lambda x: x['result'][0], dict)
-
- if not video:
- results = self._download_json(
- 'https://www.funk.net/api/v3.0/content/videos/filter',
- channel_id, 'Downloading filter JSON', headers=headers, query={
- 'channelId': channel_id,
- 'size': 100,
- })['result']
- video = next(r for r in results if r.get('alias') == alias)
-
- return self._make_url_result(video)
+ display_id, nexx_id = re.match(self._VALID_URL, url).groups()
+ video = self._download_json(
+ 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
+ return {
+ '_type': 'url_transparent',
+ 'url': 'nexx:741:' + nexx_id,
+ 'ie_key': NexxIE.ie_key(),
+ 'id': nexx_id,
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'channel_id': str_or_none(video.get('channelId')),
+ 'display_id': display_id,
+ 'tags': video.get('tags'),
+ 'thumbnail': video.get('imageUrlLandscape'),
+ }
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
deleted file mode 100644
index f85e7de14..000000000
--- a/youtube_dl/extractor/funnyordie.py
+++ /dev/null
@@ -1,162 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- unified_timestamp,
-)
-
-
-class FunnyOrDieIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?funnyordie\.com/(?P<type>embed|articles|videos)/(?P<id>[0-9a-f]+)(?:$|[?#/])'
- _TESTS = [{
- 'url': 'http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version',
- 'md5': 'bcd81e0c4f26189ee09be362ad6e6ba9',
- 'info_dict': {
- 'id': '0732f586d7',
- 'ext': 'mp4',
- 'title': 'Heart-Shaped Box: Literal Video Version',
- 'description': 'md5:ea09a01bc9a1c46d9ab696c01747c338',
- 'thumbnail': r're:^http:.*\.jpg$',
- 'uploader': 'DASjr',
- 'timestamp': 1317904928,
- 'upload_date': '20111006',
- 'duration': 318.3,
- },
- }, {
- 'url': 'http://www.funnyordie.com/embed/e402820827',
- 'info_dict': {
- 'id': 'e402820827',
- 'ext': 'mp4',
- 'title': 'Please Use This Song (Jon Lajoie)',
- 'description': 'Please use this to sell something. www.jonlajoie.com',
- 'thumbnail': r're:^http:.*\.jpg$',
- 'timestamp': 1398988800,
- 'upload_date': '20140502',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- webpage = self._download_webpage(url, video_id)
-
- links = re.findall(r'<source src="([^"]+/v)[^"]+\.([^"]+)" type=\'video', webpage)
- if not links:
- raise ExtractorError('No media links available for %s' % video_id)
-
- links.sort(key=lambda link: 1 if link[1] == 'mp4' else 0)
-
- m3u8_url = self._search_regex(
- r'<source[^>]+src=(["\'])(?P<url>.+?/master\.m3u8[^"\']*)\1',
- webpage, 'm3u8 url', group='url')
-
- formats = []
-
- m3u8_formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False)
- source_formats = list(filter(
- lambda f: f.get('vcodec') != 'none', m3u8_formats))
-
- bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
- bitrates.sort()
-
- if source_formats:
- self._sort_formats(source_formats)
-
- for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
- for path, ext in links:
- ff = f.copy()
- if ff:
- if ext != 'mp4':
- ff = dict(
- [(k, v) for k, v in ff.items()
- if k in ('height', 'width', 'format_id')])
- ff.update({
- 'format_id': ff['format_id'].replace('hls', ext),
- 'ext': ext,
- 'protocol': 'http',
- })
- else:
- ff.update({
- 'format_id': '%s-%d' % (ext, bitrate),
- 'vbr': bitrate,
- })
- ff['url'] = self._proto_relative_url(
- '%s%d.%s' % (path, bitrate, ext))
- formats.append(ff)
- self._check_formats(formats, video_id)
-
- formats.extend(m3u8_formats)
- self._sort_formats(
- formats, field_preference=('height', 'width', 'tbr', 'format_id'))
-
- subtitles = {}
- for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):
- subtitles[src_lang] = [{
- 'ext': src.split('/')[-1],
- 'url': 'http://www.funnyordie.com%s' % src,
- }]
-
- timestamp = unified_timestamp(self._html_search_meta(
- 'uploadDate', webpage, 'timestamp', default=None))
-
- uploader = self._html_search_regex(
- r'<h\d[^>]+\bclass=["\']channel-preview-name[^>]+>(.+?)</h',
- webpage, 'uploader', default=None)
-
- title, description, thumbnail, duration = [None] * 4
-
- medium = self._parse_json(
- self._search_regex(
- r'jsonMedium\s*=\s*({.+?});', webpage, 'JSON medium',
- default='{}'),
- video_id, fatal=False)
- if medium:
- title = medium.get('title')
- duration = float_or_none(medium.get('duration'))
- if not timestamp:
- timestamp = unified_timestamp(medium.get('publishDate'))
-
- post = self._parse_json(
- self._search_regex(
- r'fb_post\s*=\s*(\{.*?\});', webpage, 'post details',
- default='{}'),
- video_id, fatal=False)
- if post:
- if not title:
- title = post.get('name')
- description = post.get('description')
- thumbnail = post.get('picture')
-
- if not title:
- title = self._og_search_title(webpage)
- if not description:
- description = self._og_search_description(webpage)
- if not duration:
- duration = int_or_none(self._html_search_meta(
- ('video:duration', 'duration'), webpage, 'duration', default=False))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py
index 25e284d46..a3f44b812 100644
--- a/youtube_dl/extractor/fusion.py
+++ b/youtube_dl/extractor/fusion.py
@@ -1,35 +1,84 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from .ooyala import OoyalaIE
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+)
class FusionIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?fusion\.(?:net|tv)/(?:video/|show/.+?\bvideo=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://fusion.tv/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/',
'info_dict': {
- 'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P',
+ 'id': '3145868',
'ext': 'mp4',
'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs',
'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7',
'duration': 140.0,
+ 'timestamp': 1442589635,
+ 'uploader': 'UNIVISON',
+ 'upload_date': '20150918',
},
'params': {
'skip_download': True,
},
- 'add_ie': ['Ooyala'],
+ 'add_ie': ['Anvato'],
}, {
'url': 'http://fusion.tv/video/201781',
'only_matching': True,
+ }, {
+ 'url': 'https://fusion.tv/show/food-exposed-with-nelufar-hedayat/?ancla=full-episodes&video=588644',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://platform.fusion.net/wp-json/fusiondotnet/v1/video/' + video_id, video_id)
+
+ info = {
+ 'id': video_id,
+ 'title': video['title'],
+ 'description': video.get('excerpt'),
+ 'timestamp': parse_iso8601(video.get('published')),
+ 'series': video.get('show'),
+ }
- ooyala_code = self._search_regex(
- r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
- webpage, 'ooyala code', group='code')
+ formats = []
+ src = video.get('src') or {}
+ for f_id, f in src.items():
+ for q_id, q in f.items():
+ q_url = q.get('url')
+ if not q_url:
+ continue
+ ext = determine_ext(q_url, mimetype2ext(q.get('type')))
+ if ext == 'smil':
+ formats.extend(self._extract_smil_formats(q_url, video_id, fatal=False))
+ elif f_id == 'm3u8-variant' or (ext == 'm3u8' and q_id == 'Variant'):
+ formats.extend(self._extract_m3u8_formats(
+ q_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'format_id': '-'.join([f_id, q_id]),
+ 'url': q_url,
+ 'width': int_or_none(q.get('width')),
+ 'height': int_or_none(q.get('height')),
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)\.m(?:p4|3u8)', q_url, 'bitrate')),
+ 'ext': 'mp4' if ext == 'm3u8' else ext,
+ 'protocol': 'm3u8_native' if ext == 'm3u8' else 'https',
+ })
+ if formats:
+ self._sort_formats(formats)
+ info['formats'] = formats
+ else:
+ info.update({
+ '_type': 'url',
+ 'url': 'anvato:uni:' + video['video_ids']['anvato'],
+ 'ie_key': 'Anvato',
+ })
- return OoyalaIE._build_url_result(ooyala_code)
+ return info
diff --git a/youtube_dl/extractor/gaia.py b/youtube_dl/extractor/gaia.py
new file mode 100644
index 000000000..e9527758f
--- /dev/null
+++ b/youtube_dl/extractor/gaia.py
@@ -0,0 +1,130 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class GaiaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?gaia\.com/video/(?P<id>[^/?]+).*?\bfullplayer=(?P<type>feature|preview)'
+ _TESTS = [{
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=feature',
+ 'info_dict': {
+ 'id': '89356',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 936,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.gaia.com/video/connecting-universal-consciousness?fullplayer=preview',
+ 'info_dict': {
+ 'id': '89351',
+ 'ext': 'mp4',
+ 'title': 'Connecting with Universal Consciousness',
+ 'description': 'md5:844e209ad31b7d31345f5ed689e3df6f',
+ 'upload_date': '20151116',
+ 'timestamp': 1447707266,
+ 'duration': 53,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+ _NETRC_MACHINE = 'gaia'
+ _jwt = None
+
+ def _real_initialize(self):
+ auth = self._get_cookies('https://www.gaia.com/').get('auth')
+ if auth:
+ auth = self._parse_json(
+ compat_urllib_parse_unquote(auth.value),
+ None, fatal=False)
+ if not auth:
+ username, password = self._get_login_info()
+ if username is None:
+ return
+ auth = self._download_json(
+ 'https://auth.gaia.com/v1/login',
+ None, data=urlencode_postdata({
+ 'username': username,
+ 'password': password
+ }))
+ if auth.get('success') is False:
+ raise ExtractorError(', '.join(auth['messages']), expected=True)
+ if auth:
+ self._jwt = auth.get('jwt')
+
+ def _real_extract(self, url):
+ display_id, vtype = re.search(self._VALID_URL, url).groups()
+ node_id = self._download_json(
+ 'https://brooklyn.gaia.com/pathinfo', display_id, query={
+ 'path': 'video/' + display_id,
+ })['id']
+ node = self._download_json(
+ 'https://brooklyn.gaia.com/node/%d' % node_id, node_id)
+ vdata = node[vtype]
+ media_id = compat_str(vdata['nid'])
+ title = node['title']
+
+ headers = None
+ if self._jwt:
+ headers = {'Authorization': 'Bearer ' + self._jwt}
+ media = self._download_json(
+ 'https://brooklyn.gaia.com/media/' + media_id,
+ media_id, headers=headers)
+ formats = self._extract_m3u8_formats(
+ media['mediaUrls']['bcHLS'], media_id, 'mp4')
+ self._sort_formats(formats)
+
+ subtitles = {}
+ text_tracks = media.get('textTracks', {})
+ for key in ('captions', 'subtitles'):
+ for lang, sub_url in text_tracks.get(key, {}).items():
+ subtitles.setdefault(lang, []).append({
+ 'url': sub_url,
+ })
+
+ fivestar = node.get('fivestar', {})
+ fields = node.get('fields', {})
+
+ def get_field_value(key, value_key='value'):
+ return try_get(fields, lambda x: x[key][0][value_key])
+
+ return {
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': strip_or_none(get_field_value('body') or get_field_value('teaser')),
+ 'timestamp': int_or_none(node.get('created')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(vdata.get('duration')),
+ 'like_count': int_or_none(try_get(fivestar, lambda x: x['up_count']['value'])),
+ 'dislike_count': int_or_none(try_get(fivestar, lambda x: x['down_count']['value'])),
+ 'comment_count': int_or_none(node.get('comment_count')),
+ 'series': try_get(node, lambda x: x['series']['title'], compat_str),
+ 'season_number': int_or_none(get_field_value('season')),
+ 'season_id': str_or_none(get_field_value('series_nid', 'nid')),
+ 'episode_number': int_or_none(get_field_value('episode')),
+ }
diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dl/extractor/gameinformer.py
index a2920a793..f1b96c172 100644
--- a/youtube_dl/extractor/gameinformer.py
+++ b/youtube_dl/extractor/gameinformer.py
@@ -1,12 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ get_element_by_class,
+ get_element_by_id,
+)
class GameInformerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>[^.?&#]+)'
+ _TESTS = [{
+ # normal Brightcove embed code extracted with BrightcoveNewIE._extract_url
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
'md5': '292f26da1ab4beb4c9099f1304d2b071',
'info_dict': {
@@ -18,16 +25,25 @@ class GameInformerIE(InfoExtractor):
'upload_date': '20150928',
'uploader_id': '694940074001',
},
- }
+ }, {
+ # Brightcove id inside unique element with field--name-field-brightcove-video-id class
+ 'url': 'https://www.gameinformer.com/video-feature/new-gameplay-today/2019/07/09/new-gameplay-today-streets-of-rogue',
+ 'info_dict': {
+ 'id': '6057111913001',
+ 'ext': 'mp4',
+ 'title': 'New Gameplay Today – Streets Of Rogue',
+ 'timestamp': 1562699001,
+ 'upload_date': '20190709',
+ 'uploader_id': '694940074001',
+
+ },
+ }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(
url, display_id, headers=self.geo_verification_headers())
- brightcove_id = self._search_regex(
- [r'<[^>]+\bid=["\']bc_(\d+)', r"getVideo\('[^']+video_id=(\d+)"],
- webpage, 'brightcove id')
- return self.url_result(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
- brightcove_id)
+ brightcove_id = clean_html(get_element_by_class('field--name-field-brightcove-video-id', webpage) or get_element_by_id('video-source-content', webpage))
+ brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id if brightcove_id else BrightcoveNewIE._extract_url(self, webpage)
+ return self.url_result(brightcove_url, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py
deleted file mode 100644
index a07d69841..000000000
--- a/youtube_dl/extractor/gameone.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- xpath_with_ns,
- parse_iso8601,
- float_or_none,
- int_or_none,
-)
-
-NAMESPACE_MAP = {
- 'media': 'http://search.yahoo.com/mrss/',
-}
-
-# URL prefix to download the mp4 files directly instead of streaming via rtmp
-# Credits go to XBox-Maniac
-# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
-RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
-
-
-class GameOneIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
- _TESTS = [
- {
- 'url': 'http://www.gameone.de/tv/288',
- 'md5': '136656b7fb4c9cb4a8e2d500651c499b',
- 'info_dict': {
- 'id': '288',
- 'ext': 'mp4',
- 'title': 'Game One - Folge 288',
- 'duration': 1238,
- 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
- 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
- 'age_limit': 16,
- 'upload_date': '20140513',
- 'timestamp': 1399980122,
- }
- },
- {
- 'url': 'http://gameone.de/tv/220',
- 'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
- 'info_dict': {
- 'id': '220',
- 'ext': 'mp4',
- 'upload_date': '20120918',
- 'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
- 'timestamp': 1347971451,
- 'title': 'Game One - Folge 220',
- 'duration': 896.62,
- 'age_limit': 16,
- }
- }
-
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- og_video = self._og_search_video_url(webpage, secure=False)
- description = self._html_search_meta('description', webpage)
- age_limit = int(
- self._search_regex(
- r'age=(\d+)',
- self._html_search_meta(
- 'age-de-meta-label',
- webpage),
- 'age_limit',
- '0'))
- mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
-
- mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
- title = mrss.find('.//item/title').text
- thumbnail = mrss.find('.//item/image').get('url')
- timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
- content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
- content_url = content.get('url')
-
- content = self._download_xml(
- content_url,
- video_id,
- 'Downloading media:content')
- rendition_items = content.findall('.//rendition')
- duration = float_or_none(rendition_items[0].get('duration'))
- formats = [
- {
- 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
- 'width': int_or_none(r.get('width')),
- 'height': int_or_none(r.get('height')),
- 'tbr': int_or_none(r.get('bitrate')),
- }
- for r in rendition_items
- ]
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'description': description,
- 'age_limit': age_limit,
- 'timestamp': timestamp,
- }
-
-
-class GameOnePlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
- IE_NAME = 'gameone:playlist'
- _TEST = {
- 'url': 'http://www.gameone.de/tv',
- 'info_dict': {
- 'title': 'GameOne',
- },
- 'playlist_mincount': 294,
- }
-
- def _real_extract(self, url):
- webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
- max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
- entries = [
- self.url_result('http://www.gameone.de/tv/%d' %
- video_id, 'GameOne')
- for video_id in range(max_id, 0, -1)]
-
- return {
- '_type': 'playlist',
- 'title': 'GameOne',
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index ab647dd41..4236a5ed8 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE):
- _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
}, {
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py
index 8806dc48a..2f555c1d4 100644
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dl/extractor/gdcvault.py
@@ -3,22 +3,24 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from .kaltura import KalturaIE
from ..utils import (
HEADRequest,
sanitized_Request,
+ smuggle_url,
urlencode_postdata,
)
class GDCVaultIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)/(?P<name>(\w|-)+)?'
+ _VALID_URL = r'https?://(?:www\.)?gdcvault\.com/play/(?P<id>\d+)(?:/(?P<name>[\w-]+))?'
_NETRC_MACHINE = 'gdcvault'
_TESTS = [
{
'url': 'http://www.gdcvault.com/play/1019721/Doki-Doki-Universe-Sweet-Simple',
'md5': '7ce8388f544c88b7ac11c7ab1b593704',
'info_dict': {
- 'id': '1019721',
+ 'id': '201311826596_AWNY',
'display_id': 'Doki-Doki-Universe-Sweet-Simple',
'ext': 'mp4',
'title': 'Doki-Doki Universe: Sweet, Simple and Genuine (GDC Next 10)'
@@ -27,7 +29,7 @@ class GDCVaultIE(InfoExtractor):
{
'url': 'http://www.gdcvault.com/play/1015683/Embracing-the-Dark-Art-of',
'info_dict': {
- 'id': '1015683',
+ 'id': '201203272_1330951438328RSXR',
'display_id': 'Embracing-the-Dark-Art-of',
'ext': 'flv',
'title': 'Embracing the Dark Art of Mathematical Modeling in AI'
@@ -56,7 +58,7 @@ class GDCVaultIE(InfoExtractor):
'url': 'http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface',
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
'info_dict': {
- 'id': '1023460',
+ 'id': '840376_BQRC',
'ext': 'mp4',
'display_id': 'Tenacious-Design-and-The-Interface',
'title': 'Tenacious Design and The Interface of \'Destiny\'',
@@ -66,26 +68,38 @@ class GDCVaultIE(InfoExtractor):
# Multiple audios
'url': 'http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC',
'info_dict': {
- 'id': '1014631',
- 'ext': 'flv',
+ 'id': '12396_1299111843500GMPX',
+ 'ext': 'mp4',
'title': 'How to Create a Good Game - From My Experience of Designing Pac-Man',
},
- 'params': {
- 'skip_download': True, # Requires rtmpdump
- 'format': 'jp', # The japanese audio
- }
+ # 'params': {
+ # 'skip_download': True, # Requires rtmpdump
+ # 'format': 'jp', # The japanese audio
+ # }
},
{
# gdc-player.html
'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
'info_dict': {
- 'id': '1435',
+ 'id': '9350_1238021887562UHXB',
'display_id': 'An-American-engine-in-Tokyo',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
},
+ },
+ {
+ # Kaltura Embed
+ 'url': 'https://www.gdcvault.com/play/1026180/Mastering-the-Apex-of-Scaling',
+ 'info_dict': {
+ 'id': '0_h1fg8j3p',
+ 'ext': 'mp4',
+ 'title': 'Mastering the Apex of Scaling Game Servers (Presented by Multiplay)',
+ 'timestamp': 1554401811,
+ 'upload_date': '20190404',
+ 'uploader_id': 'joe@blazestreaming.com',
+ },
'params': {
- 'skip_download': True, # Requires rtmpdump
+ 'format': 'mp4-408',
},
},
]
@@ -114,10 +128,8 @@ class GDCVaultIE(InfoExtractor):
return start_page
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- display_id = mobj.group('name') or video_id
+ video_id, name = re.match(self._VALID_URL, url).groups()
+ display_id = name or video_id
webpage_url = 'http://www.gdcvault.com/play/' + video_id
start_page = self._download_webpage(webpage_url, display_id)
@@ -127,12 +139,12 @@ class GDCVaultIE(InfoExtractor):
start_page, 'url', default=None)
if direct_url:
title = self._html_search_regex(
- r'<td><strong>Session Name</strong></td>\s*<td>(.*?)</td>',
+ r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
start_page, 'title')
video_url = 'http://www.gdcvault.com' + direct_url
# resolve the url so that we can detect the correct extension
- head = self._request_webpage(HEADRequest(video_url), video_id)
- video_url = head.geturl()
+ video_url = self._request_webpage(
+ HEADRequest(video_url), video_id).geturl()
return {
'id': video_id,
@@ -141,34 +153,36 @@ class GDCVaultIE(InfoExtractor):
'title': title,
}
- PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
-
- xml_root = self._html_search_regex(
- PLAYER_REGEX, start_page, 'xml root', default=None)
- if xml_root is None:
- # Probably need to authenticate
- login_res = self._login(webpage_url, display_id)
- if login_res is None:
- self.report_warning('Could not login.')
- else:
- start_page = login_res
- # Grab the url from the authenticated page
- xml_root = self._html_search_regex(
- PLAYER_REGEX, start_page, 'xml root')
-
- xml_name = self._html_search_regex(
- r'<iframe src=".*?\?xml=(.+?\.xml).*?".*?</iframe>',
- start_page, 'xml filename', default=None)
- if xml_name is None:
- # Fallback to the older format
+ embed_url = KalturaIE._extract_url(start_page)
+ if embed_url:
+ embed_url = smuggle_url(embed_url, {'source_url': url})
+ ie_key = 'Kaltura'
+ else:
+ PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
+
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root', default=None)
+ if xml_root is None:
+ # Probably need to authenticate
+ login_res = self._login(webpage_url, display_id)
+ if login_res is None:
+ self.report_warning('Could not login.')
+ else:
+ start_page = login_res
+ # Grab the url from the authenticated page
+ xml_root = self._html_search_regex(
+ PLAYER_REGEX, start_page, 'xml root')
+
xml_name = self._html_search_regex(
- r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',
+ r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
start_page, 'xml filename')
+ embed_url = '%s/xml/%s' % (xml_root, xml_name)
+ ie_key = 'DigitallySpeaking'
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
- 'url': '%s/xml/%s' % (xml_root, xml_name),
- 'ie_key': 'DigitallySpeaking',
+ 'url': embed_url,
+ 'ie_key': ie_key,
}
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 76ef01332..355067a50 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .tvc import TVCIE
-from .sportbox import SportBoxEmbedIE
+from .sportbox import SportBoxIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
@@ -60,6 +60,9 @@ from .tnaflix import TNAFlixNetworkEmbedIE
from .drtuber import DrTuberIE
from .redtube import RedTubeIE
from .tube8 import Tube8IE
+from .mofosex import MofosexEmbedIE
+from .spankwire import SpankwireIE
+from .youporn import YouPornIE
from .vimeo import VimeoIE
from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE
@@ -77,11 +80,10 @@ from .instagram import InstagramIE
from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
-from .vessel import VesselIE
from .kaltura import KalturaIE
from .eagleplatform import EaglePlatformIE
from .facebook import FacebookIE
-from .soundcloud import SoundcloudIE
+from .soundcloud import SoundcloudEmbedIE
from .tunein import TuneInBaseIE
from .vbox7 import Vbox7IE
from .dbtv import DBTVIE
@@ -89,7 +91,6 @@ from .piksel import PikselIE
from .videa import VideaIE
from .twentymin import TwentyMinutenIE
from .ustream import UstreamIE
-from .openload import OpenloadIE
from .videopress import VideoPressIE
from .rutube import RutubeIE
from .limelight import LimelightBaseIE
@@ -109,11 +110,15 @@ from .vice import ViceIE
from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
+from .teachable import TeachableIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE
from .expressen import ExpressenIE
+from .zype import ZypeIE
+from .odnoklassniki import OdnoklassnikiIE
+from .kinja import KinjaEmbedIE
class GenericIE(InfoExtractor):
@@ -428,7 +433,7 @@ class GenericIE(InfoExtractor):
},
},
{
- # https://github.com/rg3/youtube-dl/issues/2253
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
'url': 'http://bcove.me/i6nfkrc3',
'md5': '0ba9446db037002366bab3b3eb30c88c',
'info_dict': {
@@ -453,7 +458,7 @@ class GenericIE(InfoExtractor):
},
},
{
- # https://github.com/rg3/youtube-dl/issues/3541
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': {
@@ -917,7 +922,7 @@ class GenericIE(InfoExtractor):
}
},
# Multiple brightcove videos
- # https://github.com/rg3/youtube-dl/issues/2283
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
{
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
'info_dict': {
@@ -1482,16 +1487,18 @@ class GenericIE(InfoExtractor):
'timestamp': 1432570283,
},
},
- # OnionStudios embed
+ # Kinja embed
{
'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
'info_dict': {
- 'id': '2855',
+ 'id': '106351',
'ext': 'mp4',
'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
'thumbnail': r're:^https?://.*\.jpe?g$',
- 'uploader': 'ClickHole',
- 'uploader_id': 'clickhole',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
}
},
# SnagFilms embed
@@ -1702,6 +1709,15 @@ class GenericIE(InfoExtractor):
'add_ie': ['Kaltura'],
},
{
+ # multiple kaltura embeds, nsfw
+ 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+ 'info_dict': {
+ 'id': 'kamila-avec-video-jaime-sadomie',
+ 'title': "Kamila avec vídeo “J'aime sadomie”",
+ },
+ 'playlist_count': 8,
+ },
+ {
# Non-standard Vimeo embed
'url': 'https://openclassrooms.com/courses/understanding-the-web',
'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
@@ -2071,6 +2087,39 @@ class GenericIE(InfoExtractor):
'playlist_count': 6,
},
{
+ # Squarespace video embed, 2019-08-28
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Zype embed
+ 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'upload_date': '20170909',
+ 'timestamp': 1504915200,
+ },
+ 'add_ie': [ZypeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
'info_dict': {
@@ -2085,6 +2134,23 @@ class GenericIE(InfoExtractor):
},
'expected_warnings': ['Failed to download MPD manifest'],
},
+ {
+ # DailyMotion embed with DM.player
+ 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
+ 'info_dict': {
+ 'id': 'k6aKkGHd9FJs4mtJN39',
+ 'ext': 'mp4',
+ 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
+ 'description': 'This video is private.',
+ 'uploader_id': 'x1jf30l',
+ 'uploader': 'beIN SPORTS USA',
+ 'upload_date': '20190528',
+ 'timestamp': 1559062971,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -2181,10 +2247,7 @@ class GenericIE(InfoExtractor):
def _real_extract(self, url):
if url.startswith('//'):
- return {
- '_type': 'url',
- 'url': self.http_scheme() + url,
- }
+ return self.url_result(self.http_scheme() + url)
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
@@ -2193,7 +2256,7 @@ class GenericIE(InfoExtractor):
default_search = 'fixup_error'
if default_search in ('auto', 'auto_warning', 'fixup_error'):
- if '/' in url:
+ if re.match(r'^[^\s/]+\.[^\s/]+/', url):
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
elif default_search != 'fixup_error':
@@ -2236,7 +2299,7 @@ class GenericIE(InfoExtractor):
if head_response is not False:
# Check for redirect
- new_url = compat_str(head_response.geturl())
+ new_url = head_response.geturl()
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
@@ -2336,12 +2399,12 @@ class GenericIE(InfoExtractor):
return self.playlist_result(
self._parse_xspf(
doc, video_id, xspf_url=url,
- xspf_base_url=compat_str(full_response.geturl())),
+ xspf_base_url=full_response.geturl()),
video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc,
- mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
@@ -2358,10 +2421,16 @@ class GenericIE(InfoExtractor):
return camtasia_res
# Sometimes embedded video player is hidden behind percent encoding
- # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
# Unescaping the whole page allows to handle those cases in a generic way
webpage = compat_urllib_parse_unquote(webpage)
+ # Unescape squarespace embeds to be detected by generic extractor,
+ # see https://github.com/ytdl-org/youtube-dl/issues/21294
+ webpage = re.sub(
+ r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
+ lambda x: unescapeHTML(x.group(0)), webpage)
+
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
@@ -2436,11 +2505,6 @@ class GenericIE(InfoExtractor):
if tp_urls:
return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
- # Look for Vessel embeds
- vessel_urls = VesselIE._extract_urls(webpage)
- if vessel_urls:
- return self.playlist_from_matches(vessel_urls, video_id, video_title, ie=VesselIE.ie_key())
-
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
@@ -2484,15 +2548,21 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+ # Look for Teachable embeds, must be before Wistia
+ teachable_url = TeachableIE._extract_url(webpage, url)
+ if teachable_url:
+ return self.url_result(teachable_url)
+
# Look for embedded Wistia player
- wistia_url = WistiaIE._extract_url(webpage)
- if wistia_url:
- return {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(wistia_url),
- 'ie_key': WistiaIE.ie_key(),
- 'uploader': video_uploader,
- }
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if wistia_urls:
+ playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ for entry in playlist['entries']:
+ entry.update({
+ '_type': 'url_transparent',
+ 'uploader': video_uploader,
+ })
+ return playlist
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
@@ -2533,11 +2603,11 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
- re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
- re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
- re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
embed_token = self._search_regex(
r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
@@ -2567,19 +2637,6 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
- # Look for embedded NovaMov-based player
- mobj = re.search(
- r'''(?x)<(?:pagespeed_)?iframe[^>]+?src=(["\'])
- (?P<url>http://(?:(?:embed|www)\.)?
- (?:novamov\.com|
- nowvideo\.(?:ch|sx|eu|at|ag|co)|
- videoweed\.(?:es|com)|
- movshare\.(?:net|sx|ag)|
- divxstage\.(?:eu|net|ch|co|at|ag))
- /embed\.php.+?)\1''', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
# Look for embedded Facebook player
facebook_urls = FacebookIE._extract_urls(webpage)
if facebook_urls:
@@ -2591,9 +2648,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'VK')
# Look for embedded Odnoklassniki player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Odnoklassniki')
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
@@ -2636,9 +2693,9 @@ class GenericIE(InfoExtractor):
return self.url_result(tvc_url, 'TVC')
# Look for embedded SportBox player
- sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
+ sportbox_urls = SportBoxIE._extract_urls(webpage)
if sportbox_urls:
- return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
+ return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
@@ -2670,6 +2727,21 @@ class GenericIE(InfoExtractor):
if tube8_urls:
return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+ # Look for embedded Mofosex player
+ mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
+ if mofosex_urls:
+ return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
+
+ # Look for embedded Spankwire player
+ spankwire_urls = SpankwireIE._extract_urls(webpage)
+ if spankwire_urls:
+ return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
+
+ # Look for embedded YouPorn player
+ youporn_urls = YouPornIE._extract_urls(webpage)
+ if youporn_urls:
+ return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
+
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -2712,9 +2784,9 @@ class GenericIE(InfoExtractor):
return self.url_result(myvi_url)
# Look for embedded soundcloud player
- soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for tunein player
tunein_urls = TuneInBaseIE._extract_urls(webpage)
@@ -2781,9 +2853,12 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
- return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
+ kaltura_urls = KalturaIE._extract_urls(webpage)
+ if kaltura_urls:
+ return self.playlist_from_matches(
+ kaltura_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'source_url': url}),
+ ie=KalturaIE.ie_key())
# Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
@@ -2857,6 +2932,12 @@ class GenericIE(InfoExtractor):
if senate_isvp_url:
return self.url_result(senate_isvp_url, 'SenateISVP')
+ # Look for Kinja embeds
+ kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
+ if kinja_embed_urls:
+ return self.playlist_from_matches(
+ kinja_embed_urls, video_id, video_title)
+
# Look for OnionStudios embeds
onionstudios_url = OnionStudiosIE._extract_url(webpage)
if onionstudios_url:
@@ -2918,7 +2999,7 @@ class GenericIE(InfoExtractor):
# Look for VODPlatform embeds
mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
webpage)
if mobj is not None:
return self.url_result(
@@ -2926,10 +3007,14 @@ class GenericIE(InfoExtractor):
# Look for Mangomolo embeds
mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
+ (?:
+ admin\.mangomolo\.com/analytics/index\.php/customers/embed|
+ player\.mangomolo\.com/v1
+ )/
(?:
video\?.*?\bid=(?P<video_id>\d+)|
- index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
).+?)\1''', webpage)
if mobj is not None:
info = {
@@ -2998,12 +3083,6 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
- # Look for Openload embeds
- openload_urls = OpenloadIE._extract_urls(webpage)
- if openload_urls:
- return self.playlist_from_matches(
- openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
-
# Look for VideoPress embeds
videopress_urls = VideoPressIE._extract_urls(webpage)
if videopress_urls:
@@ -3023,7 +3102,7 @@ class GenericIE(InfoExtractor):
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(webpage)
+ mediaset_urls = MediasetIE._extract_urls(self, webpage)
if mediaset_urls:
return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
@@ -3129,6 +3208,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
+ zype_urls = ZypeIE._extract_urls(webpage)
+ if zype_urls:
+ return self.playlist_from_matches(
+ zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
@@ -3155,7 +3239,7 @@ class GenericIE(InfoExtractor):
jwplayer_data, video_id, require_title=False, base_url=url)
return merge_dicts(info, info_dict)
except ExtractorError:
- # See https://github.com/rg3/youtube-dl/pull/16735
+ # See https://github.com/ytdl-org/youtube-dl/pull/16735
pass
# Video.js embed
@@ -3190,8 +3274,8 @@ class GenericIE(InfoExtractor):
else:
formats.append({
'url': src,
- 'ext': (mimetype2ext(src_type) or
- ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ 'ext': (mimetype2ext(src_type)
+ or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
})
if formats:
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dl/extractor/gfycat.py
index a0670b645..18a30fe67 100644
--- a/youtube_dl/extractor/gfycat.py
+++ b/youtube_dl/extractor/gfycat.py
@@ -11,7 +11,7 @@ from ..utils import (
class GfycatIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gfycat\.com/(?:ifr/|gifs/detail/)?(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\.]+)'
_TESTS = [{
'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
'info_dict': {
@@ -45,15 +45,27 @@ class GfycatIE(InfoExtractor):
'age_limit': 0,
}
}, {
+ 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
+ 'only_matching': True
+ }, {
'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
'only_matching': True
+ }, {
+ 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
+ 'only_matching': True
+ }, {
+ 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
+ 'only_matching': True
+ }, {
+ 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
+ 'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
gfy = self._download_json(
- 'http://gfycat.com/cajax/get/%s' % video_id,
+ 'https://api.gfycat.com/v1/gfycats/%s' % video_id,
video_id, 'Downloading video info')
if 'error' in gfy:
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py
index 6a1b1e96e..c6477958d 100644
--- a/youtube_dl/extractor/giantbomb.py
+++ b/youtube_dl/extractor/giantbomb.py
@@ -13,10 +13,10 @@ from ..utils import (
class GiantBombIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
+ _TESTS = [{
'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
- 'md5': 'c8ea694254a59246a42831155dec57ac',
+ 'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
'info_dict': {
'id': '2300-9782',
'display_id': 'quick-look-destiny-the-dark-below',
@@ -26,7 +26,10 @@ class GiantBombIE(InfoExtractor):
'duration': 2399,
'thumbnail': r're:^https?://.*\.jpg$',
}
- }
+ }, {
+ 'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
index c2140c362..60d842d3a 100644
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -72,7 +72,7 @@ class GloboIE(InfoExtractor):
return
try:
- self._download_json(
+ glb_id = (self._download_json(
'https://login.globo.com/api/authentication', None, data=json.dumps({
'payload': {
'email': email,
@@ -81,7 +81,9 @@ class GloboIE(InfoExtractor):
},
}).encode(), headers={
'Content-Type': 'application/json; charset=utf-8',
- })
+ }) or {}).get('glbId')
+ if glb_id:
+ self._set_cookie('.globo.com', 'GLBID', glb_id)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(e.cause.read(), None)
@@ -94,21 +96,31 @@ class GloboIE(InfoExtractor):
video = self._download_json(
'http://api.globovideos.com/videos/%s/playlist' % video_id,
video_id)['videos'][0]
+ if video.get('encrypted') is True:
+ raise ExtractorError('This video is DRM protected.', expected=True)
title = video['title']
formats = []
+ subtitles = {}
for resource in video['resources']:
resource_id = resource.get('_id')
resource_url = resource.get('url')
- if not resource_id or not resource_url:
+ resource_type = resource.get('type')
+ if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
+ continue
+
+ if resource_type == 'subtitle':
+ subtitles.setdefault(resource.get('language') or 'por', []).append({
+ 'url': resource_url,
+ })
continue
security = self._download_json(
'http://security.video.globo.com/videos/%s/hash' % video_id,
video_id, 'Downloading security hash for %s' % resource_id, query={
- 'player': 'flash',
- 'version': '17.0.0.132',
+ 'player': 'desktop',
+ 'version': '5.19.1',
'resource_id': resource_id,
})
@@ -121,18 +133,23 @@ class GloboIE(InfoExtractor):
continue
hash_code = security_hash[:2]
- received_time = security_hash[2:12]
- received_random = security_hash[12:22]
- received_md5 = security_hash[22:]
-
- sign_time = compat_str(int(received_time) + 86400)
padding = '%010d' % random.randint(1, 10000000000)
-
- md5_data = (received_md5 + sign_time + padding + '0xFF01DD').encode()
+ if hash_code in ('04', '14'):
+ received_time = security_hash[3:13]
+ received_md5 = security_hash[24:]
+ hash_prefix = security_hash[:23]
+ elif hash_code in ('02', '12', '03', '13'):
+ received_time = security_hash[2:12]
+ received_md5 = security_hash[22:]
+ padding += '1'
+ hash_prefix = '05' + security_hash[:22]
+
+ padded_sign_time = compat_str(int(received_time) + 86400) + padding
+ md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
- signed_hash = hash_code + received_time + received_random + sign_time + padding + signed_md5
+ signed_hash = hash_prefix + padded_sign_time + signed_md5
+ signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
- signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash')
if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
formats.extend(self._extract_m3u8_formats(
signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
@@ -162,7 +179,8 @@ class GloboIE(InfoExtractor):
'duration': duration,
'uploader': uploader,
'uploader_id': uploader_id,
- 'formats': formats
+ 'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index ec9dd6e3a..03cfba91f 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -25,19 +25,32 @@ class GoIE(AdobePassIE):
},
'watchdisneychannel': {
'brand': '004',
- 'requestor_id': 'Disney',
+ 'resource_id': 'Disney',
},
'watchdisneyjunior': {
'brand': '008',
- 'requestor_id': 'DisneyJunior',
+ 'resource_id': 'DisneyJunior',
},
'watchdisneyxd': {
'brand': '009',
- 'requestor_id': 'DisneyXD',
+ 'resource_id': 'DisneyXD',
+ },
+ 'disneynow': {
+ 'brand': '011',
+ 'resource_id': 'Disney',
}
}
- _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
- % '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?P<sub_domain>%s)\.)?go|
+ (?P<sub_domain_2>abc|freeform|disneynow)
+ )\.com/
+ (?:
+ (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
+ (?:[^/]+/)*(?P<display_id>[^/?\#]+)
+ )
+ ''' % '|'.join(list(_SITE_INFO.keys()))
_TESTS = [{
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
'info_dict': {
@@ -50,6 +63,7 @@ class GoIE(AdobePassIE):
# m3u8 download
'skip_download': True,
},
+ 'skip': 'This content is no longer available.',
}, {
'url': 'http://watchdisneyxd.go.com/doraemon',
'info_dict': {
@@ -58,6 +72,34 @@ class GoIE(AdobePassIE):
},
'playlist_mincount': 51,
}, {
+ 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
+ 'info_dict': {
+ 'id': 'VDKA3609139',
+ 'ext': 'mp4',
+ 'title': 'This Guilty Blood',
+ 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
+ 'info_dict': {
+ 'id': 'VDKA13435179',
+ 'ext': 'mp4',
+ 'title': 'The Bet',
+ 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'only_matching': True,
}, {
@@ -71,6 +113,9 @@ class GoIE(AdobePassIE):
# brand 008
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
'only_matching': True,
+ }, {
+ 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
}]
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
@@ -80,16 +125,21 @@ class GoIE(AdobePassIE):
display_id)['video']
def _real_extract(self, url):
- sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
+ mobj = re.match(self._VALID_URL, url)
+ sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
+ video_id, display_id = mobj.group('id', 'display_id')
site_info = self._SITE_INFO.get(sub_domain, {})
brand = site_info.get('brand')
if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex(
- # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
- # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
- r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
- default=None)
+ (
+ # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
+ # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
+ r'data-video-id=["\']*(VDKA\w+)',
+ # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
+ r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
+ ), webpage, 'video id', default=video_id)
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
@@ -130,8 +180,8 @@ class GoIE(AdobePassIE):
'device': '001',
}
if video_data.get('accesslevel') == '1':
- requestor_id = site_info['requestor_id']
- resource = self._get_mvpd_resource(
+ requestor_id = site_info.get('requestor_id', 'DisneyChannels')
+ resource = site_info.get('resource_id') or self._get_mvpd_resource(
requestor_id, title, video_id, None)
auth = self._extract_mvpd_auth(
url, video_id, requestor_id, resource)
diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
deleted file mode 100644
index c3ea717bc..000000000
--- a/youtube_dl/extractor/go90.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_age_limit,
- parse_iso8601,
-)
-
-
-class Go90IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'https://www.go90.com/videos/84BUqjLpf9D',
- 'md5': 'efa7670dbbbf21a7b07b360652b24a32',
- 'info_dict': {
- 'id': '84BUqjLpf9D',
- 'ext': 'mp4',
- 'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
- 'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
- 'timestamp': 1491868800,
- 'upload_date': '20170411',
- 'age_limit': 14,
- }
- }, {
- 'url': 'https://www.go90.com/embed/261MflWkD3N',
- 'only_matching': True,
- }]
- _GEO_BYPASS = False
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- headers = self.geo_verification_headers()
- headers.update({
- 'Content-Type': 'application/json; charset=utf-8',
- })
- video_data = self._download_json(
- 'https://www.go90.com/api/view/items/' + video_id, video_id,
- headers=headers, data=b'{"client":"web","device_type":"pc"}')
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- message = self._parse_json(e.cause.read().decode(), None)['error']['message']
- if 'region unavailable' in message:
- self.raise_geo_restricted(countries=['US'])
- raise ExtractorError(message, expected=True)
- raise
-
- if video_data.get('requires_drm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
- main_video_asset = video_data['main_video_asset']
-
- episode_number = int_or_none(video_data.get('episode_number'))
- series = None
- season = None
- season_id = None
- season_number = None
- for metadata in video_data.get('__children', {}).get('Item', {}).values():
- if metadata.get('type') == 'show':
- series = metadata.get('title')
- elif metadata.get('type') == 'season':
- season = metadata.get('title')
- season_id = metadata.get('id')
- season_number = int_or_none(metadata.get('season_number'))
-
- title = episode = video_data.get('title') or series
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- thumbnails = []
- formats = []
- subtitles = {}
- for asset in video_data.get('assets'):
- if asset.get('id') == main_video_asset:
- for source in asset.get('sources', []):
- source_location = source.get('location')
- if not source_location:
- continue
- source_type = source.get('type')
- if source_type == 'hls':
- m3u8_formats = self._extract_m3u8_formats(
- source_location, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False)
- for f in m3u8_formats:
- mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
- if mobj:
- height, tbr = mobj.groups()
- height = int_or_none(height)
- f.update({
- 'height': f.get('height') or height,
- 'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
- 'tbr': f.get('tbr') or int_or_none(tbr),
- })
- formats.extend(m3u8_formats)
- elif source_type == 'dash':
- formats.extend(self._extract_mpd_formats(
- source_location, video_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'format_id': source.get('name'),
- 'url': source_location,
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- 'tbr': int_or_none(source.get('bitrate')),
- })
-
- for caption in asset.get('caption_metadata', []):
- caption_url = caption.get('source_url')
- if not caption_url:
- continue
- subtitles.setdefault(caption.get('language', 'en'), []).append({
- 'url': caption_url,
- 'ext': determine_ext(caption_url, 'vtt'),
- })
- elif asset.get('type') == 'image':
- asset_location = asset.get('location')
- if not asset_location:
- continue
- thumbnails.append({
- 'url': asset_location,
- 'width': int_or_none(asset.get('width')),
- 'height': int_or_none(asset.get('height')),
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'description': video_data.get('short_description'),
- 'like_count': int_or_none(video_data.get('like_count')),
- 'timestamp': parse_iso8601(video_data.get('released_at')),
- 'series': series,
- 'episode': episode,
- 'season': season,
- 'season_id': season_id,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'subtitles': subtitles,
- 'age_limit': parse_age_limit(video_data.get('rating')),
- }
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dl/extractor/googledrive.py
index 3bf462d63..589e4d5c3 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dl/extractor/googledrive.py
@@ -36,7 +36,7 @@ class GoogleDriveIE(InfoExtractor):
}
}, {
# video can't be watched anonymously due to view count limit reached,
- # but can be downloaded (see https://github.com/rg3/youtube-dl/issues/14046)
+ # but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
'info_dict': {
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py
deleted file mode 100644
index 342a6130e..000000000
--- a/youtube_dl/extractor/hark.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class HarkIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
- _TEST = {
- 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
- 'md5': '6783a58491b47b92c7c1af5a77d4cbee',
- 'info_dict': {
- 'id': 'mmbzyhkgny',
- 'ext': 'mp3',
- 'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
- 'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
- 'duration': 11,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- data = self._download_json(
- 'http://www.hark.com/clips/%s.json' % video_id, video_id)
-
- return {
- 'id': video_id,
- 'url': data['url'],
- 'title': data['name'],
- 'description': data.get('description'),
- 'thumbnail': data.get('image_original'),
- 'duration': data.get('duration'),
- }
diff --git a/youtube_dl/extractor/hbo.py b/youtube_dl/extractor/hbo.py
index 859ad5429..68df748f5 100644
--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dl/extractor/hbo.py
@@ -4,12 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
xpath_text,
xpath_element,
int_or_none,
parse_duration,
+ urljoin,
)
@@ -53,10 +53,13 @@ class HBOBaseIE(InfoExtractor):
},
}
- def _extract_from_id(self, video_id):
- video_data = self._download_xml(
- 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
- title = xpath_text(video_data, 'title', 'title', True)
+ def _extract_info(self, url, display_id):
+ video_data = self._download_xml(url, display_id)
+ video_id = xpath_text(video_data, 'id', fatal=True)
+ episode_title = title = xpath_text(video_data, 'title', fatal=True)
+ series = xpath_text(video_data, 'program')
+ if series:
+ title = '%s - %s' % (series, title)
formats = []
for source in xpath_element(video_data, 'videos', 'sources', True):
@@ -128,68 +131,45 @@ class HBOBaseIE(InfoExtractor):
'width': width,
})
+ subtitles = None
+ caption_url = xpath_text(video_data, 'captionUrl')
+ if caption_url:
+ subtitles = {
+ 'en': [{
+ 'url': caption_url,
+ 'ext': 'ttml'
+ }],
+ }
+
return {
'id': video_id,
'title': title,
'duration': parse_duration(xpath_text(video_data, 'duration/tv14')),
+ 'series': series,
+ 'episode': episode_title,
'formats': formats,
'thumbnails': thumbnails,
+ 'subtitles': subtitles,
}
class HBOIE(HBOBaseIE):
IE_NAME = 'hbo'
- _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?:video|embed)(?:/[^/]+)*/(?P<id>[^/?#]+)'
_TEST = {
- 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
- 'md5': '2c6a6bc1222c7e91cb3334dad1746e5a',
+ 'url': 'https://www.hbo.com/video/game-of-thrones/seasons/season-8/videos/trailer',
+ 'md5': '8126210656f433c452a21367f9ad85b3',
'info_dict': {
- 'id': '1437839',
+ 'id': '22113301',
'ext': 'mp4',
- 'title': 'Ep. 64 Clip: Encryption',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'duration': 1072,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return self._extract_from_id(video_id)
-
-
-class HBOEpisodeIE(HBOBaseIE):
- IE_NAME = 'hbo:episode'
- _VALID_URL = r'https?://(?:www\.)?hbo\.com/(?P<path>(?!video)(?:(?:[^/]+/)+video|watch-free-episodes)/(?P<id>[0-9a-z-]+))(?:\.html)?'
-
- _TESTS = [{
- 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
- 'md5': '61ead79b9c0dfa8d3d4b07ef4ac556fb',
- 'info_dict': {
- 'id': '1439518',
- 'display_id': 'ep-52-inside-the-episode',
- 'ext': 'mp4',
- 'title': 'Ep. 52: Inside the Episode',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'duration': 240,
+ 'title': 'Game of Thrones - Trailer',
},
- }, {
- 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
- 'only_matching': True,
- }, {
- 'url': 'http://www.hbo.com/watch-free-episodes/last-week-tonight-with-john-oliver',
- 'only_matching': True,
- }]
+ 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'],
+ }
def _real_extract(self, url):
- path, display_id = re.match(self._VALID_URL, url).groups()
-
- content = self._download_json(
- 'http://www.hbo.com/api/content/' + path, display_id)['content']
-
- video_id = compat_str((content.get('parsed', {}).get(
- 'common:FullBleedVideo', {}) or content['selectedEpisode'])['videoId'])
-
- info_dict = self._extract_from_id(video_id)
- info_dict['display_id'] = display_id
-
- return info_dict
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ location_path = self._parse_json(self._html_search_regex(
+ r'data-state="({.+?})"', webpage, 'state'), display_id)['video']['locationUrl']
+ return self._extract_info(urljoin(url, location_path), display_id)
diff --git a/youtube_dl/extractor/heise.py b/youtube_dl/extractor/heise.py
index 5c03780a3..cbe564a3c 100644
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dl/extractor/heise.py
@@ -105,8 +105,7 @@ class HeiseIE(InfoExtractor):
webpage, default=None) or self._html_search_meta(
'description', webpage)
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
+ def _make_kaltura_result(kaltura_url):
return {
'_type': 'url_transparent',
'url': smuggle_url(kaltura_url, {'source_url': url}),
@@ -115,6 +114,16 @@ class HeiseIE(InfoExtractor):
'description': description,
}
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return _make_kaltura_result(kaltura_url)
+
+ kaltura_id = self._search_regex(
+ r'entry-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura id',
+ default=None, group='id')
+ if kaltura_id:
+ return _make_kaltura_result('kaltura:2238431:%s' % kaltura_id)
+
yt_urls = YoutubeIE._extract_urls(webpage)
if yt_urls:
return self.playlist_from_matches(
@@ -155,8 +164,8 @@ class HeiseIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
- 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image') or
- self._og_search_thumbnail(webpage)),
+ 'thumbnail': (xpath_text(doc, './/{http://rss.jwpcdn.com/}image')
+ or self._og_search_thumbnail(webpage)),
'timestamp': parse_iso8601(
self._html_search_meta('date', webpage)),
'formats': formats,
diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py
index 0ee8ea712..fae425103 100644
--- a/youtube_dl/extractor/hellporno.py
+++ b/youtube_dl/extractor/hellporno.py
@@ -1,12 +1,11 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
- js_to_json,
+ int_or_none,
+ merge_dicts,
remove_end,
- determine_ext,
+ unified_timestamp,
)
@@ -14,15 +13,21 @@ class HellPornoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
_TESTS = [{
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
- 'md5': '1fee339c610d2049699ef2aa699439f1',
+ 'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
'info_dict': {
'id': '149116',
'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
'ext': 'mp4',
'title': 'Dixie is posing with naked ass very erotic',
+ 'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
+ 'categories': list,
'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 240,
+ 'timestamp': 1398762720,
+ 'upload_date': '20140429',
+ 'view_count': int,
'age_limit': 18,
- }
+ },
}, {
'url': 'http://hellporno.net/v/186271/',
'only_matching': True,
@@ -36,40 +41,36 @@ class HellPornoIE(InfoExtractor):
title = remove_end(self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
- flashvars = self._parse_json(self._search_regex(
- r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
- display_id, transform_source=js_to_json)
-
- video_id = flashvars.get('video_id')
- thumbnail = flashvars.get('preview_url')
- ext = determine_ext(flashvars.get('postfix'), 'mp4')
-
- formats = []
- for video_url_key in ['video_url', 'video_alt_url']:
- video_url = flashvars.get(video_url_key)
- if not video_url:
- continue
- video_text = flashvars.get('%s_text' % video_url_key)
- fmt = {
- 'url': video_url,
- 'ext': ext,
- 'format_id': video_text,
- }
- m = re.search(r'^(?P<height>\d+)[pP]', video_text)
- if m:
- fmt['height'] = int(m.group('height'))
- formats.append(fmt)
- self._sort_formats(formats)
+ info = self._parse_html5_media_entries(url, webpage, display_id)[0]
+ self._sort_formats(info['formats'])
- categories = self._html_search_meta(
- 'keywords', webpage, 'categories', default='').split(',')
+ video_id = self._search_regex(
+ (r'chs_object\s*=\s*["\'](\d+)',
+ r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+ description = self._search_regex(
+ r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
+ 'description', fatal=False)
+ categories = [
+ c.strip()
+ for c in self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+ if c.strip()]
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, fatal=False))
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
- return {
+ return merge_dicts(info, {
'id': video_id,
'display_id': display_id,
'title': title,
- 'thumbnail': thumbnail,
+ 'description': description,
'categories': categories,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
'age_limit': 18,
- 'formats': formats,
- }
+ })
diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py
index 1d905dc81..3e5ff2685 100644
--- a/youtube_dl/extractor/hitbox.py
+++ b/youtube_dl/extractor/hitbox.py
@@ -58,8 +58,8 @@ class HitboxIE(InfoExtractor):
title = video_meta.get('media_status')
alt_title = video_meta.get('media_title')
description = clean_html(
- video_meta.get('media_description') or
- video_meta.get('media_description_md'))
+ video_meta.get('media_description')
+ or video_meta.get('media_description_md'))
duration = float_or_none(video_meta.get('media_duration'))
uploader = video_meta.get('media_user_name')
views = int_or_none(video_meta.get('media_views'))
diff --git a/youtube_dl/extractor/hitrecord.py b/youtube_dl/extractor/hitrecord.py
index 01a6946d0..fd5dc2935 100644
--- a/youtube_dl/extractor/hitrecord.py
+++ b/youtube_dl/extractor/hitrecord.py
@@ -47,8 +47,8 @@ class HitRecordIE(InfoExtractor):
tags = [
t['text']
for t in tags_list
- if isinstance(t, dict) and t.get('text') and
- isinstance(t['text'], compat_str)]
+ if isinstance(t, dict) and t.get('text')
+ and isinstance(t['text'], compat_str)]
return {
'id': video_id,
diff --git a/youtube_dl/extractor/hketv.py b/youtube_dl/extractor/hketv.py
new file mode 100644
index 000000000..1f3502b90
--- /dev/null
+++ b/youtube_dl/extractor/hketv.py
@@ -0,0 +1,191 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_count,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class HKETVIE(InfoExtractor):
+ IE_NAME = 'hketv'
+ IE_DESC = '香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['HK']
+ _VALID_URL = r'https?://(?:www\.)?hkedcity\.net/etv/resource/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.hkedcity.net/etv/resource/2932360618',
+ 'md5': 'f193712f5f7abb208ddef3c5ea6ed0b7',
+ 'info_dict': {
+ 'id': '2932360618',
+ 'ext': 'mp4',
+ 'title': '喜閱一生(共享閱讀樂) (中、英文字幕可供選擇)',
+ 'description': 'md5:d5286d05219ef50e0613311cbe96e560',
+ 'upload_date': '20181024',
+ 'duration': 900,
+ 'subtitles': 'count:2',
+ },
+ 'skip': 'Geo restricted to HK',
+ }, {
+ 'url': 'https://www.hkedcity.net/etv/resource/972641418',
+ 'md5': '1ed494c1c6cf7866a8290edad9b07dc9',
+ 'info_dict': {
+ 'id': '972641418',
+ 'ext': 'mp4',
+ 'title': '衣冠楚楚 (天使系列之一)',
+ 'description': 'md5:10bb3d659421e74f58e5db5691627b0f',
+ 'upload_date': '20070109',
+ 'duration': 907,
+ 'subtitles': {},
+ },
+ 'params': {
+ 'geo_verification_proxy': '<HK proxy here>',
+ },
+ 'skip': 'Geo restricted to HK',
+ }]
+
+ _CC_LANGS = {
+ '中文(繁體中文)': 'zh-Hant',
+ '中文(简体中文)': 'zh-Hans',
+ 'English': 'en',
+ 'Bahasa Indonesia': 'id',
+ '\u0939\u093f\u0928\u094d\u0926\u0940': 'hi',
+ '\u0928\u0947\u092a\u093e\u0932\u0940': 'ne',
+ 'Tagalog': 'tl',
+ '\u0e44\u0e17\u0e22': 'th',
+ '\u0627\u0631\u062f\u0648': 'ur',
+ }
+ _FORMAT_HEIGHTS = {
+ 'SD': 360,
+ 'HD': 720,
+ }
+ _APPS_BASE_URL = 'https://apps.hkedcity.net'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = (
+ self._html_search_meta(
+ ('ed_title', 'search.ed_title'), webpage, default=None)
+ or self._search_regex(
+ r'data-favorite_title_(?:eng|chi)=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'title', default=None, group='url')
+ or self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', default=None)
+ or self._og_search_title(webpage)
+ )
+
+ file_id = self._search_regex(
+ r'post_var\[["\']file_id["\']\s*\]\s*=\s*(.+?);',
+ webpage, 'file ID')
+ curr_url = self._search_regex(
+ r'post_var\[["\']curr_url["\']\s*\]\s*=\s*"(.+?)";',
+ webpage, 'curr URL')
+ data = {
+ 'action': 'get_info',
+ 'curr_url': curr_url,
+ 'file_id': file_id,
+ 'video_url': file_id,
+ }
+
+ response = self._download_json(
+ self._APPS_BASE_URL + '/media/play/handler.php', video_id,
+ data=urlencode_postdata(data),
+ headers=merge_dicts({
+ 'Content-Type': 'application/x-www-form-urlencoded'},
+ self.geo_verification_headers()))
+
+ result = response['result']
+
+ if not response.get('success') or not response.get('access'):
+ error = clean_html(response.get('access_err_msg'))
+ if 'Video streaming is not available in your country' in error:
+ self.raise_geo_restricted(
+ msg=error, countries=self._GEO_COUNTRIES)
+ else:
+ raise ExtractorError(error, expected=True)
+
+ formats = []
+
+ width = int_or_none(result.get('width'))
+ height = int_or_none(result.get('height'))
+
+ playlist0 = result['playlist'][0]
+ for fmt in playlist0['sources']:
+ file_url = urljoin(self._APPS_BASE_URL, fmt.get('file'))
+ if not file_url:
+ continue
+ # If we ever wanted to provide the final resolved URL that
+ # does not require cookies, albeit with a shorter lifespan:
+ # urlh = self._downloader.urlopen(file_url)
+ # resolved_url = urlh.geturl()
+ label = fmt.get('label')
+ h = self._FORMAT_HEIGHTS.get(label)
+ w = h * width // height if h and width and height else None
+ formats.append({
+ 'format_id': label,
+ 'ext': fmt.get('type'),
+ 'url': file_url,
+ 'width': w,
+ 'height': h,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ tracks = try_get(playlist0, lambda x: x['tracks'], list) or []
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = str_or_none(track.get('kind'))
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(self._APPS_BASE_URL, track.get('file'))
+ if not track_url:
+ continue
+ track_label = track.get('label')
+ subtitles.setdefault(self._CC_LANGS.get(
+ track_label, track_label), []).append({
+ 'url': self._proto_relative_url(track_url),
+ 'ext': 'srt',
+ })
+
+ # Likes
+ emotion = self._download_json(
+ 'https://emocounter.hkedcity.net/handler.php', video_id,
+ data=urlencode_postdata({
+ 'action': 'get_emotion',
+ 'data[bucket_id]': 'etv',
+ 'data[identifier]': video_id,
+ }),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'},
+ fatal=False) or {}
+ like_count = int_or_none(try_get(
+ emotion, lambda x: x['data']['emotion_data'][0]['count']))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': self._html_search_meta(
+ 'description', webpage, fatal=False),
+ 'upload_date': unified_strdate(self._html_search_meta(
+ 'ed_date', webpage, fatal=False), day_first=False),
+ 'duration': int_or_none(result.get('length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': urljoin(self._APPS_BASE_URL, result.get('image')),
+ 'view_count': parse_count(result.get('view_count')),
+ 'like_count': like_count,
+ }
diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py
index d28af36ec..f97eefa3d 100644
--- a/youtube_dl/extractor/hotstar.py
+++ b/youtube_dl/extractor/hotstar.py
@@ -1,49 +1,76 @@
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
+import hmac
import re
+import time
+import uuid
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
)
class HotStarBaseIE(InfoExtractor):
- _GEO_COUNTRIES = ['IN']
-
- def _download_json(self, *args, **kwargs):
- response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
- if response['resultCode'] != 'OK':
- if kwargs.get('fatal'):
- raise ExtractorError(
- response['errorDescription'], expected=True)
- return None
- return response['resultObj']
-
- def _download_content_info(self, content_id):
- return self._download_json(
- 'https://account.hotstar.com/AVS/besc', content_id, query={
- 'action': 'GetAggregatedContentDetails',
- 'appVersion': '5.0.40',
- 'channel': 'PCTV',
- 'contentId': content_id,
- })['contentInfo'][0]
+ _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
+
+ def _call_api_impl(self, path, video_id, query):
+ st = int(time.time())
+ exp = st + 6000
+ auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
+ auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
+ response = self._download_json(
+ 'https://api.hotstar.com/' + path, video_id, headers={
+ 'hotstarauth': auth,
+ 'x-country-code': 'IN',
+ 'x-platform-code': 'JIO',
+ }, query=query)
+ if response['statusCode'] != 'OK':
+ raise ExtractorError(
+ response['body']['message'], expected=True)
+ return response['body']['results']
+
+ def _call_api(self, path, video_id, query_name='contentId'):
+ return self._call_api_impl(path, video_id, {
+ query_name: video_id,
+ 'tas': 10000,
+ })
+
+ def _call_api_v2(self, path, video_id):
+ return self._call_api_impl(
+ '%s/in/contents/%s' % (path, video_id), video_id, {
+ 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
+ 'client': 'mweb',
+ 'clientVersion': '6.18.0',
+ 'deviceId': compat_str(uuid.uuid4()),
+ 'osName': 'Windows',
+ 'osVersion': '10',
+ })
class HotStarIE(HotStarBaseIE):
+ IE_NAME = 'hotstar'
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
_TESTS = [{
- 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
+ # contentData
+ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
'info_dict': {
'id': '1000076273',
'ext': 'mp4',
- 'title': 'On Air With AIB',
+ 'title': 'Can You Not Spread Rumours?',
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
- 'timestamp': 1447227000,
+ 'timestamp': 1447248600,
'upload_date': '20151111',
'duration': 381,
},
@@ -52,111 +79,132 @@ class HotStarIE(HotStarBaseIE):
'skip_download': True,
}
}, {
+ # contentDetail
+ 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+ 'only_matching': True,
+ }, {
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
'only_matching': True,
}, {
'url': 'http://www.hotstar.com/1000000515',
'only_matching': True,
+ }, {
+ # only available via api v2
+ 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ 'only_matching': True,
}]
+ _GEO_BYPASS = False
def _real_extract(self, url):
video_id = self._match_id(url)
- video_data = self._download_content_info(video_id)
-
- title = video_data['episodeTitle']
-
- if video_data.get('encrypted') == 'Y':
+ webpage = self._download_webpage(url, video_id)
+ app_state = self._parse_json(self._search_regex(
+ r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
+ webpage, 'app state'), video_id)
+ video_data = {}
+ getters = list(
+ lambda x, k=k: x['initialState']['content%s' % k]['content']
+ for k in ('Data', 'Detail')
+ )
+ for v in app_state.values():
+ content = try_get(v, getters, dict)
+ if content and content.get('contentId') == video_id:
+ video_data = content
+ break
+
+ title = video_data['title']
+
+ if video_data.get('drmProtected'):
raise ExtractorError('This video is DRM protected.', expected=True)
+ headers = {'Referer': url}
formats = []
- for f in ('JIO',):
- format_data = self._download_json(
- 'http://getcdn.hotstar.com/AVS/besc',
- video_id, 'Downloading %s JSON metadata' % f,
- fatal=False, query={
- 'action': 'GetCDN',
- 'asJson': 'Y',
- 'channel': f,
- 'id': video_id,
- 'type': 'VOD',
- })
- if format_data:
- format_url = format_data.get('src')
- if not format_url:
- continue
- ext = determine_ext(format_url)
- if ext == 'm3u8':
+ geo_restricted = False
+ playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
+ for playback_set in playback_sets:
+ if not isinstance(playback_set, dict):
+ continue
+ format_url = url_or_none(playback_set.get('playbackUrl'))
+ if not format_url:
+ continue
+ format_url = re.sub(
+ r'(?<=//staragvod)(\d)', r'web\1', format_url)
+ tags = str_or_none(playback_set.get('tagsCombination')) or ''
+ if tags and 'encryption:plain' not in tags:
+ continue
+ ext = determine_ext(format_url)
+ try:
+ if 'package:hls' in tags or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
- m3u8_id='hls', fatal=False))
+ entry_protocol='m3u8_native',
+ m3u8_id='hls', headers=headers))
+ elif 'package:dash' in tags or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', headers=headers))
elif ext == 'f4m':
# produce broken files
- continue
+ pass
else:
formats.append({
'url': format_url,
- 'width': int_or_none(format_data.get('width')),
- 'height': int_or_none(format_data.get('height')),
+ 'width': int_or_none(playback_set.get('width')),
+ 'height': int_or_none(playback_set.get('height')),
})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ geo_restricted = True
+ continue
+ if not formats and geo_restricted:
+ self.raise_geo_restricted(countries=['IN'])
self._sort_formats(formats)
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
- 'timestamp': int_or_none(video_data.get('broadcastDate')),
+ 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
'formats': formats,
+ 'channel': video_data.get('channelName'),
+ 'channel_id': video_data.get('channelId'),
+ 'series': video_data.get('showName'),
+ 'season': video_data.get('seasonName'),
+ 'season_number': int_or_none(video_data.get('seasonNo')),
+ 'season_id': video_data.get('seasonId'),
'episode': title,
- 'episode_number': int_or_none(video_data.get('episodeNumber')),
- 'series': video_data.get('contentTitle'),
+ 'episode_number': int_or_none(video_data.get('episodeNo')),
}
class HotStarPlaylistIE(HotStarBaseIE):
IE_NAME = 'hotstar:playlist'
- _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
_TESTS = [{
- 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
'info_dict': {
- 'id': '14812',
+ 'id': '3_2_26',
},
- 'playlist_mincount': 75,
+ 'playlist_mincount': 20,
}, {
- 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
'only_matching': True,
}]
- _ITEM_TYPES = {
- 'episodes': 'EPISODE',
- 'popular-clips': 'CLIPS',
- }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- base_url = mobj.group('url')
- content_id = mobj.group('content_id')
- playlist_type = mobj.group('type')
-
- content_info = self._download_content_info(content_id)
- playlist_id = compat_str(content_info['categoryId'])
-
- collection = self._download_json(
- 'https://search.hotstar.com/AVS/besc', playlist_id, query={
- 'action': 'SearchContents',
- 'appVersion': '5.0.40',
- 'channel': 'PCTV',
- 'moreFilters': 'series:%s;' % playlist_id,
- 'query': '*',
- 'searchOrder': 'last_broadcast_date desc,year desc,title asc',
- 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
- })
+ playlist_id = self._match_id(url)
+
+ collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
entries = [
self.url_result(
- '%s/_/%s' % (base_url, video['contentId']),
+ 'https://www.hotstar.com/%s' % video['contentId'],
ie=HotStarIE.ie_key(), video_id=video['contentId'])
- for video in collection['response']['docs']
+ for video in collection['assets']['items']
if video.get('contentId')]
return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/hrti.py b/youtube_dl/extractor/hrti.py
index 9ba1aa703..23f7b1fc9 100644
--- a/youtube_dl/extractor/hrti.py
+++ b/youtube_dl/extractor/hrti.py
@@ -60,8 +60,8 @@ class HRTiBaseIE(InfoExtractor):
language=self._APP_LANGUAGE,
application_id=self._APP_PUBLICATION_ID)
- self._login_url = (modules['user']['resources']['login']['uri'] +
- '/format/json').format(session_id=self._session_id)
+ self._login_url = (modules['user']['resources']['login']['uri']
+ + '/format/json').format(session_id=self._session_id)
self._logout_url = modules['user']['resources']['logout']['uri']
diff --git a/youtube_dl/extractor/hungama.py b/youtube_dl/extractor/hungama.py
new file mode 100644
index 000000000..3fdaac5b6
--- /dev/null
+++ b/youtube_dl/extractor/hungama.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class HungamaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?hungama\.com/
+ (?:
+ (?:video|movie)/[^/]+/|
+ tv-show/(?:[^/]+/){2}\d+/episode/[^/]+/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.hungama.com/video/krishna-chants/39349649/',
+ 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+ 'info_dict': {
+ 'id': '2931166',
+ 'ext': 'mp4',
+ 'title': 'Lucky Ali - Kitni Haseen Zindagi',
+ 'track': 'Kitni Haseen Zindagi',
+ 'artist': 'Lucky Ali',
+ 'album': 'Aks',
+ 'release_year': 2000,
+ }
+ }, {
+ 'url': 'https://www.hungama.com/movie/kahaani-2/44129919/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hungama.com/tv-show/padded-ki-pushup/season-1/44139461/episode/ep-02-training-sasu-pathlaag-karing/44139503/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ info = self._search_json_ld(webpage, video_id)
+
+ m3u8_url = self._download_json(
+ 'https://www.hungama.com/index.php', video_id,
+ data=urlencode_postdata({'content_id': video_id}), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'c': 'common',
+ 'm': 'get_video_mdn_url',
+ })['stream_url']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ info.update({
+ 'id': video_id,
+ 'formats': formats,
+ })
+ return info
+
+
+class HungamaSongIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hungama\.com/song/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.hungama.com/song/kitni-haseen-zindagi/2931166/',
+ 'md5': 'a845a6d1ebd08d80c1035126d49bd6a0',
+ 'info_dict': {
+ 'id': '2931166',
+ 'ext': 'mp4',
+ 'title': 'Lucky Ali - Kitni Haseen Zindagi',
+ 'track': 'Kitni Haseen Zindagi',
+ 'artist': 'Lucky Ali',
+ 'album': 'Aks',
+ 'release_year': 2000,
+ }
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.hungama.com/audio-player-data/track/%s' % audio_id,
+ audio_id, query={'_country': 'IN'})[0]
+
+ track = data['song_name']
+ artist = data.get('singer_name')
+
+ m3u8_url = self._download_json(
+ data.get('file') or data['preview_link'],
+ audio_id)['response']['media_url']
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, audio_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ title = '%s - %s' % (artist, track) if artist else track
+ thumbnail = data.get('img_src') or data.get('album_image')
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'track': track,
+ 'artist': artist,
+ 'album': data.get('album_name'),
+ 'release_year': int_or_none(data.get('date')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py
index f7c913054..9ca28d632 100644
--- a/youtube_dl/extractor/hypem.py
+++ b/youtube_dl/extractor/hypem.py
@@ -1,18 +1,11 @@
from __future__ import unicode_literals
-import json
-import time
-
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlencode
-from ..utils import (
- ExtractorError,
- sanitized_Request,
-)
+from ..utils import int_or_none
class HypemIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
+ _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[0-9a-z]{5})'
_TEST = {
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
'md5': 'b9cc91b5af8995e9f0c1cee04c575828',
@@ -21,41 +14,36 @@ class HypemIE(InfoExtractor):
'ext': 'mp3',
'title': 'Tame',
'uploader': 'BODYWORK',
+ 'timestamp': 1371810457,
+ 'upload_date': '20130621',
}
}
def _real_extract(self, url):
track_id = self._match_id(url)
- data = {'ax': 1, 'ts': time.time()}
- request = sanitized_Request(url + '?' + compat_urllib_parse_urlencode(data))
- response, urlh = self._download_webpage_handle(
- request, track_id, 'Downloading webpage with the url')
-
- html_tracks = self._html_search_regex(
- r'(?ms)<script type="application/json" id="displayList-data">(.+?)</script>',
- response, 'tracks')
- try:
- track_list = json.loads(html_tracks)
- track = track_list['tracks'][0]
- except ValueError:
- raise ExtractorError('Hypemachine contained invalid JSON.')
-
- key = track['key']
+ response = self._download_webpage(url, track_id)
+
+ track = self._parse_json(self._html_search_regex(
+ r'(?s)<script\s+type="application/json"\s+id="displayList-data">(.+?)</script>',
+ response, 'tracks'), track_id)['tracks'][0]
+
track_id = track['id']
title = track['song']
- request = sanitized_Request(
- 'http://hypem.com/serve/source/%s/%s' % (track_id, key),
- '', {'Content-Type': 'application/json'})
- song_data = self._download_json(request, track_id, 'Downloading metadata')
- final_url = song_data['url']
- artist = track.get('artist')
+ final_url = self._download_json(
+ 'http://hypem.com/serve/source/%s/%s' % (track_id, track['key']),
+ track_id, 'Downloading metadata', headers={
+ 'Content-Type': 'application/json'
+ })['url']
return {
'id': track_id,
'url': final_url,
'ext': 'mp3',
'title': title,
- 'uploader': artist,
+ 'uploader': track.get('artist'),
+ 'duration': int_or_none(track.get('time')),
+ 'timestamp': int_or_none(track.get('ts')),
+ 'track': title,
}
diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py
deleted file mode 100644
index a39f422e9..000000000
--- a/youtube_dl/extractor/iconosquare.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- get_element_by_id,
- remove_end,
-)
-
-
-class IconosquareIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
- _TEST = {
- 'url': 'http://statigr.am/p/522207370455279102_24101272',
- 'md5': '6eb93b882a3ded7c378ee1d6884b1814',
- 'info_dict': {
- 'id': '522207370455279102_24101272',
- 'ext': 'mp4',
- 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
- 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
- 'timestamp': 1376471991,
- 'upload_date': '20130814',
- 'uploader': 'aguynamedpatrick',
- 'uploader_id': '24101272',
- 'comment_count': int,
- 'like_count': int,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- media = self._parse_json(
- get_element_by_id('mediaJson', webpage),
- video_id)
-
- formats = [{
- 'url': f['url'],
- 'format_id': format_id,
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height'))
- } for format_id, f in media['videos'].items()]
- self._sort_formats(formats)
-
- title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
-
- timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
- description = media.get('caption', {}).get('text')
-
- uploader = media.get('user', {}).get('username')
- uploader_id = media.get('user', {}).get('id')
-
- comment_count = int_or_none(media.get('comments', {}).get('count'))
- like_count = int_or_none(media.get('likes', {}).get('count'))
-
- thumbnails = [{
- 'url': t['url'],
- 'id': thumbnail_id,
- 'width': int_or_none(t.get('width')),
- 'height': int_or_none(t.get('height'))
- } for thumbnail_id, t in media.get('images', {}).items()]
-
- comments = [{
- 'id': comment.get('id'),
- 'text': comment['text'],
- 'timestamp': int_or_none(comment.get('created_time')),
- 'author': comment.get('from', {}).get('full_name'),
- 'author_id': comment.get('from', {}).get('username'),
- } for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'comment_count': comment_count,
- 'like_count': like_count,
- 'formats': formats,
- 'comments': comments,
- }
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
index 436759da5..a31301985 100644
--- a/youtube_dl/extractor/imdb.py
+++ b/youtube_dl/extractor/imdb.py
@@ -1,5 +1,7 @@
from __future__ import unicode_literals
+import base64
+import json
import re
from .common import InfoExtractor
@@ -8,6 +10,7 @@ from ..utils import (
mimetype2ext,
parse_duration,
qualities,
+ try_get,
url_or_none,
)
@@ -15,15 +18,16 @@ from ..utils import (
class ImdbIE(InfoExtractor):
IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers'
- _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
'info_dict': {
'id': '2524815897',
'ext': 'mp4',
- 'title': 'No. 2 from Ice Age: Continental Drift (2012)',
+ 'title': 'No. 2',
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
+ 'duration': 152,
}
}, {
'url': 'http://www.imdb.com/video/_/vi2524815897',
@@ -47,21 +51,23 @@ class ImdbIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
- video_metadata = self._parse_json(self._search_regex(
- r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
- 'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
- title = self._html_search_meta(
- ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
+
+ data = self._download_json(
+ 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
+ query={
+ 'key': base64.b64encode(json.dumps({
+ 'type': 'VIDEO_PLAYER',
+ 'subType': 'FORCE_LEGACY',
+ 'id': 'vi%s' % video_id,
+ }).encode()).decode(),
+ })[0]
quality = qualities(('SD', '480p', '720p', '1080p'))
formats = []
- for encoding in video_metadata.get('encodings', []):
+ for encoding in data['videoLegacyEncodings']:
if not encoding or not isinstance(encoding, dict):
continue
- video_url = url_or_none(encoding.get('videoUrl'))
+ video_url = url_or_none(encoding.get('url'))
if not video_url:
continue
ext = mimetype2ext(encoding.get(
@@ -69,7 +75,7 @@ class ImdbIE(InfoExtractor):
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ preference=1, m3u8_id='hls', fatal=False))
continue
format_id = encoding.get('definition')
formats.append({
@@ -80,13 +86,33 @@ class ImdbIE(InfoExtractor):
})
self._sort_formats(formats)
+ webpage = self._download_webpage(
+ 'https://www.imdb.com/video/vi' + video_id, video_id)
+ video_metadata = self._parse_json(self._search_regex(
+ r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
+ 'video metadata'), video_id)
+
+ video_info = video_metadata.get('VIDEO_INFO')
+ if video_info and isinstance(video_info, dict):
+ info = try_get(
+ video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
+ else:
+ info = {}
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title',
+ default=None) or info['videoTitle']
+
return {
'id': video_id,
'title': title,
+ 'alt_title': info.get('videoSubTitle'),
'formats': formats,
- 'description': video_metadata.get('description'),
- 'thumbnail': video_metadata.get('slate', {}).get('url'),
- 'duration': parse_duration(video_metadata.get('duration')),
+ 'description': info.get('videoDescription'),
+ 'thumbnail': url_or_none(try_get(
+ video_metadata, lambda x: x['videoSlate']['source'])),
+ 'duration': parse_duration(info.get('videoRuntime')),
}
diff --git a/youtube_dl/extractor/imggaming.py b/youtube_dl/extractor/imggaming.py
new file mode 100644
index 000000000..e11f92053
--- /dev/null
+++ b/youtube_dl/extractor/imggaming.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class ImgGamingBaseIE(InfoExtractor):
+ _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
+ _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
+ _HEADERS = None
+ _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
+ _REALM = None
+ _VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
+
+ def _real_initialize(self):
+ self._HEADERS = {
+ 'Realm': 'dce.' + self._REALM,
+ 'x-api-key': self._API_KEY,
+ }
+
+ email, password = self._get_login_info()
+ if email is None:
+ self.raise_login_required()
+
+ p_headers = self._HEADERS.copy()
+ p_headers['Content-Type'] = 'application/json'
+ self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
+ self._API_BASE + 'login',
+ None, 'Logging in', data=json.dumps({
+ 'id': email,
+ 'secret': password,
+ }).encode(), headers=p_headers)['authorisationToken']
+
+ def _call_api(self, path, media_id):
+ return self._download_json(
+ self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
+
+ def _extract_dve_api_url(self, media_id, media_type):
+ stream_path = 'stream'
+ if media_type == 'video':
+ stream_path += '/vod/'
+ else:
+ stream_path += '?eventId='
+ try:
+ return self._call_api(
+ stream_path, media_id)['playerUrlCallback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(
+ self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
+ expected=True)
+ raise
+
+ def _real_extract(self, url):
+ domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+ media_type, media_id = 'playlist', playlist_id
+
+ if media_type == 'playlist':
+ playlist = self._call_api('vod/playlist/', media_id)
+ entries = []
+ for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
+ video_id = str_or_none(video.get('id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://%s/video/%s' % (domain, video_id),
+ self.ie_key(), video_id))
+ return self.playlist_result(
+ entries, media_id, playlist.get('title'),
+ playlist.get('description'))
+
+ dve_api_url = self._extract_dve_api_url(media_id, media_type)
+ video_data = self._download_json(dve_api_url, media_id)
+ is_live = media_type == 'live'
+ if is_live:
+ title = self._live_title(self._call_api('event/', media_id)['title'])
+ else:
+ title = video_data['name']
+
+ formats = []
+ for proto in ('hls', 'dash'):
+ media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
+ if not media_url:
+ continue
+ if proto == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
+ for f in m3u8_formats:
+ f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
+ formats.append(f)
+ else:
+ formats.extend(self._extract_mpd_formats(
+ media_url, media_id, mpd_id='dash', fatal=False,
+ headers=self._MANIFEST_HEADERS))
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for subtitle in video_data.get('subtitles', []):
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
+ 'url': subtitle_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'tags': video_data.get('tags'),
+ 'is_live': is_live,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
index ecc958a17..a5ba03efa 100644
--- a/youtube_dl/extractor/imgur.py
+++ b/youtube_dl/extractor/imgur.py
@@ -12,7 +12,7 @@ from ..utils import (
class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:gallery|(?:topic|r)/[^/]+)/)?(?P<id>[a-zA-Z0-9]{6,})(?:[/?#&]+|\.[a-z0-9]+)?$'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://i.imgur.com/A61SaA1.gifv',
@@ -20,38 +20,23 @@ class ImgurIE(InfoExtractor):
'id': 'A61SaA1',
'ext': 'mp4',
'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 'Imgur: The magic of the Internet',
},
}, {
'url': 'https://imgur.com/A61SaA1',
- 'info_dict': {
- 'id': 'A61SaA1',
- 'ext': 'mp4',
- 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- 'description': 'Imgur: The magic of the Internet',
- },
- }, {
- 'url': 'https://imgur.com/gallery/YcAQlkx',
- 'info_dict': {
- 'id': 'YcAQlkx',
- 'ext': 'mp4',
- 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
- }
- }, {
- 'url': 'http://imgur.com/topic/Funny/N8rOudd',
'only_matching': True,
}, {
- 'url': 'http://imgur.com/r/aww/VQcQPhM',
+ 'url': 'https://i.imgur.com/crGpqCV.mp4',
'only_matching': True,
}, {
- 'url': 'https://i.imgur.com/crGpqCV.mp4',
+ # no title
+ 'url': 'https://i.imgur.com/jxBXAMC.gifv',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- gifv_url = 'https://i.imgur.com/{id}.gifv'.format(id=video_id)
- webpage = self._download_webpage(gifv_url, video_id)
+ webpage = self._download_webpage(
+ 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
width = int_or_none(self._og_search_property(
'video:width', webpage, default=None))
@@ -72,7 +57,6 @@ class ImgurIE(InfoExtractor):
'format_id': m.group('type').partition('/')[2],
'url': self._proto_relative_url(m.group('src')),
'ext': mimetype2ext(m.group('type')),
- 'acodec': 'none',
'width': width,
'height': height,
'http_headers': {
@@ -107,44 +91,64 @@ class ImgurIE(InfoExtractor):
return {
'id': video_id,
'formats': formats,
- 'description': self._og_search_description(webpage, default=None),
- 'title': self._og_search_title(webpage),
+ 'title': self._og_search_title(webpage, default=video_id),
}
-class ImgurAlbumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:(?:a|gallery|topic/[^/]+)/)?(?P<id>[a-zA-Z0-9]{5})(?:[/?#&]+)?$'
+class ImgurGalleryIE(InfoExtractor):
+ IE_NAME = 'imgur:gallery'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
+ 'title': 'Adding faces make every GIF better',
},
'playlist_count': 25,
}, {
- 'url': 'http://imgur.com/a/j6Orj',
+ 'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True,
}, {
- 'url': 'http://imgur.com/topic/Aww/ll5Vk',
+ 'url': 'https://imgur.com/gallery/YcAQlkx',
+ 'info_dict': {
+ 'id': 'YcAQlkx',
+ 'ext': 'mp4',
+ 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+ }
+ }, {
+ 'url': 'http://imgur.com/topic/Funny/N8rOudd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/r/aww/VQcQPhM',
'only_matching': True,
}]
def _real_extract(self, url):
- album_id = self._match_id(url)
-
- album_images = self._download_json(
- 'http://imgur.com/gallery/%s/album_images/hit.json?all=true' % album_id,
- album_id, fatal=False)
-
- if album_images:
- data = album_images.get('data')
- if data and isinstance(data, dict):
- images = data.get('images')
- if images and isinstance(images, list):
- entries = [
- self.url_result('http://imgur.com/%s' % image['hash'])
- for image in images if image.get('hash')]
- return self.playlist_result(entries, album_id)
-
- # Fallback to single video
- return self.url_result('http://imgur.com/%s' % album_id, ImgurIE.ie_key())
+ gallery_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://imgur.com/gallery/%s.json' % gallery_id,
+ gallery_id)['data']['image']
+
+ if data.get('is_album'):
+ entries = [
+ self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
+ for image in data['album_images']['images'] if image.get('hash')]
+ return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
+
+ return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
+
+
+class ImgurAlbumIE(ImgurGalleryIE):
+ IE_NAME = 'imgur:album'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://imgur.com/a/j6Orj',
+ 'info_dict': {
+ 'id': 'j6Orj',
+ 'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+ },
+ 'playlist_count': 12,
+ }]
diff --git a/youtube_dl/extractor/ina.py b/youtube_dl/extractor/ina.py
index 9544ff9d4..12695af27 100644
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dl/extractor/ina.py
@@ -1,36 +1,83 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ strip_or_none,
+ xpath_attr,
+ xpath_text,
+)
class InaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+ _TESTS = [{
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
'info_dict': {
'id': 'I12055569',
'ext': 'mp4',
'title': 'François Hollande "Je crois que c\'est clair"',
+ 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
}
- }
+ }, {
+ 'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/audio/P16173408',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/video/P16173408-video.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id
- info_doc = self._download_xml(mrss_url, video_id)
+ video_id = self._match_id(url)
+ info_doc = self._download_xml(
+ 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
+ item = info_doc.find('channel/item')
+ title = xpath_text(item, 'title', fatal=True)
+ media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
+ content = item.find(media_ns_xpath('content'))
- self.report_extraction(video_id)
+ get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
+ formats = []
+ for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
+ q_url = get_furl(q)
+ if not q_url:
+ continue
+ formats.append({
+ 'format_id': q,
+ 'url': q_url,
+ 'width': w,
+ 'height': h,
+ })
+ if not formats:
+ furl = get_furl('player') or content.attrib['url']
+ ext = determine_ext(furl)
+ formats = [{
+ 'url': furl,
+ 'vcodec': 'none' if ext == 'mp3' else None,
+ 'ext': ext,
+ }]
- video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url']
+ thumbnails = []
+ for thumbnail in content.findall(media_ns_xpath('thumbnail')):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
return {
'id': video_id,
- 'url': video_url,
- 'title': info_doc.find('.//title').text,
+ 'formats': formats,
+ 'title': title,
+ 'description': strip_or_none(xpath_text(item, 'description')),
+ 'thumbnails': thumbnails,
}
diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py
index 2b5b2b5b0..4c16243ec 100644
--- a/youtube_dl/extractor/indavideo.py
+++ b/youtube_dl/extractor/indavideo.py
@@ -58,7 +58,7 @@ class IndavideoEmbedIE(InfoExtractor):
video_id = self._match_id(url)
video = self._download_json(
- 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
+ 'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
video_id)['data']
title = video['title']
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py
index 391c2f5d0..18249cf9b 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dl/extractor/infoq.py
@@ -122,9 +122,9 @@ class InfoQIE(BokeCCBaseIE):
formats = self._extract_bokecc_formats(webpage, video_id)
else:
formats = (
- self._extract_rtmp_video(webpage) +
- self._extract_http_video(webpage) +
- self._extract_http_audio(webpage, video_id))
+ self._extract_rtmp_video(webpage)
+ + self._extract_http_video(webpage)
+ + self._extract_http_audio(webpage, video_id))
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py
index 7e0e838f0..b061850a1 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@@ -22,7 +22,7 @@ from ..utils import (
class InstagramIE(InfoExtractor):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/p/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
@@ -92,6 +92,9 @@ class InstagramIE(InfoExtractor):
}, {
'url': 'http://instagram.com/p/9o6LshA7zy/embed/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.instagram.com/tv/aye83DjauH/',
+ 'only_matching': True,
}]
@staticmethod
@@ -227,44 +230,37 @@ class InstagramIE(InfoExtractor):
}
-class InstagramUserIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
- IE_DESC = 'Instagram user profile'
- IE_NAME = 'instagram:user'
- _TEST = {
- 'url': 'https://instagram.com/porsche',
- 'info_dict': {
- 'id': 'porsche',
- 'title': 'porsche',
- },
- 'playlist_count': 5,
- 'params': {
- 'extract_flat': True,
- 'skip_download': True,
- 'playlistend': 5,
- }
- }
+class InstagramPlaylistIE(InfoExtractor):
+ # A superclass for handling any kind of query based on GraphQL which
+ # results in a playlist.
- _gis_tmpl = None
+ _gis_tmpl = None # used to cache GIS request type
- def _entries(self, data):
+ def _parse_graphql(self, webpage, item_id):
+ # Reads a webpage and returns its GraphQL data.
+ return self._parse_json(
+ self._search_regex(
+ r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
+ item_id)
+
+ def _extract_graphql(self, data, url):
+ # Parses GraphQL queries containing videos and generates a playlist.
def get_count(suffix):
return int_or_none(try_get(
node, lambda x: x['edge_media_' + suffix]['count']))
- uploader_id = data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ uploader_id = self._match_id(url)
csrf_token = data['config']['csrf_token']
rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8'
- self._set_cookie('instagram.com', 'ig_pr', '1')
-
cursor = ''
for page_num in itertools.count(1):
- variables = json.dumps({
- 'id': uploader_id,
+ variables = {
'first': 12,
'after': cursor,
- })
+ }
+ variables.update(self._query_vars_for(data))
+ variables = json.dumps(variables)
if self._gis_tmpl:
gis_tmpls = [self._gis_tmpl]
@@ -276,21 +272,26 @@ class InstagramUserIE(InfoExtractor):
'%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
]
+ # try all of the ways to generate a GIS query, and not only use the
+ # first one that works, but cache it for future requests
for gis_tmpl in gis_tmpls:
try:
- media = self._download_json(
+ json_data = self._download_json(
'https://www.instagram.com/graphql/query/', uploader_id,
'Downloading JSON page %d' % page_num, headers={
'X-Requested-With': 'XMLHttpRequest',
'X-Instagram-GIS': hashlib.md5(
('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
}, query={
- 'query_hash': '42323d64886122307be10013ad2dcc44',
+ 'query_hash': self._QUERY_HASH,
'variables': variables,
- })['data']['user']['edge_owner_to_timeline_media']
+ })
+ media = self._parse_timeline_from(json_data)
self._gis_tmpl = gis_tmpl
break
except ExtractorError as e:
+ # if it's an error caused by a bad query, and there are
+ # more GIS templates to try, ignore it and keep trying
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
if gis_tmpl != gis_tmpls[-1]:
continue
@@ -348,14 +349,80 @@ class InstagramUserIE(InfoExtractor):
break
def _real_extract(self, url):
- username = self._match_id(url)
+ user_or_tag = self._match_id(url)
+ webpage = self._download_webpage(url, user_or_tag)
+ data = self._parse_graphql(webpage, user_or_tag)
- webpage = self._download_webpage(url, username)
-
- data = self._parse_json(
- self._search_regex(
- r'sharedData\s*=\s*({.+?})\s*;\s*[<\n]', webpage, 'data'),
- username)
+ self._set_cookie('instagram.com', 'ig_pr', '1')
return self.playlist_result(
- self._entries(data), username, username)
+ self._extract_graphql(data, url), user_or_tag, user_or_tag)
+
+
+class InstagramUserIE(InstagramPlaylistIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P<id>[^/]{2,})/?(?:$|[?#])'
+ IE_DESC = 'Instagram user profile'
+ IE_NAME = 'instagram:user'
+ _TEST = {
+ 'url': 'https://instagram.com/porsche',
+ 'info_dict': {
+ 'id': 'porsche',
+ 'title': 'porsche',
+ },
+ 'playlist_count': 5,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 5,
+ }
+ }
+
+ _QUERY_HASH = '42323d64886122307be10013ad2dcc44',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['user']['edge_owner_to_timeline_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'id': data['entry_data']['ProfilePage'][0]['graphql']['user']['id']
+ }
+
+
+class InstagramTagIE(InstagramPlaylistIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
+ IE_DESC = 'Instagram hashtag search'
+ IE_NAME = 'instagram:tag'
+ _TEST = {
+ 'url': 'https://instagram.com/explore/tags/lolcats',
+ 'info_dict': {
+ 'id': 'lolcats',
+ 'title': 'lolcats',
+ },
+ 'playlist_count': 50,
+ 'params': {
+ 'extract_flat': True,
+ 'skip_download': True,
+ 'playlistend': 50,
+ }
+ }
+
+ _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314',
+
+ @staticmethod
+ def _parse_timeline_from(data):
+ # extracts the media timeline data from a GraphQL result
+ return data['data']['hashtag']['edge_hashtag_to_media']
+
+ @staticmethod
+ def _query_vars_for(data):
+ # returns a dictionary of variables to add to the timeline query based
+ # on the GraphQL of the original page
+ return {
+ 'tag_name':
+ data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
+ }
diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py
index 76cc5ec3e..59b0a90c3 100644
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -1,15 +1,13 @@
from __future__ import unicode_literals
+import json
+import re
+
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
-from ..utils import (
- determine_ext,
- int_or_none,
- xpath_text,
-)
class InternetVideoArchiveIE(InfoExtractor):
@@ -20,7 +18,7 @@ class InternetVideoArchiveIE(InfoExtractor):
'info_dict': {
'id': '194487',
'ext': 'mp4',
- 'title': 'KICK-ASS 2',
+ 'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
},
'params': {
@@ -33,68 +31,34 @@ class InternetVideoArchiveIE(InfoExtractor):
def _build_json_url(query):
return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
- @staticmethod
- def _build_xml_url(query):
- return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
-
def _real_extract(self, url):
- query = compat_urlparse.urlparse(url).query
- query_dic = compat_parse_qs(query)
- video_id = query_dic['publishedid'][0]
-
- if '/player/' in url:
- configuration = self._download_json(url, video_id)
-
- # There are multiple videos in the playlist whlie only the first one
- # matches the video played in browsers
- video_info = configuration['playlist'][0]
- title = video_info['title']
-
- formats = []
- for source in video_info['sources']:
- file_url = source['file']
- if determine_ext(file_url) == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
- if m3u8_formats:
- formats.extend(m3u8_formats)
- file_url = m3u8_formats[0]['url']
- formats.extend(self._extract_f4m_formats(
- file_url.replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- formats.extend(self._extract_mpd_formats(
- file_url.replace('.m3u8', '.mpd'),
- video_id, mpd_id='dash', fatal=False))
- else:
- a_format = {
- 'url': file_url,
- }
-
- if source.get('label') and source['label'][-4:] == ' kbs':
- tbr = int_or_none(source['label'][:-4])
- a_format.update({
- 'tbr': tbr,
- 'format_id': 'http-%d' % tbr,
- })
- formats.append(a_format)
-
- self._sort_formats(formats)
-
- description = video_info.get('description')
- thumbnail = video_info.get('image')
- else:
- configuration = self._download_xml(url, video_id)
- formats = [{
- 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
- }]
- thumbnail = xpath_text(configuration, './image', 'thumbnail')
- title = 'InternetVideoArchive video %s' % video_id
- description = None
+ query = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = query['publishedid'][0]
+ data = self._download_json(
+ 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
+ video_id, data=json.dumps({
+ 'customerid': query['customerid'][0],
+ 'publishedid': video_id,
+ }).encode())
+ title = data['Title']
+ formats = self._extract_m3u8_formats(
+ data['VideoUrl'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ file_url = formats[0]['url']
+ if '.ism/' in file_url:
+ replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
+ formats.extend(self._extract_f4m_formats(
+ replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
+ self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
- 'thumbnail': thumbnail,
- 'description': description,
+ 'thumbnail': data.get('PosterUrl'),
+ 'description': data.get('Description'),
}
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
index 1d58d6e85..53a550c11 100644
--- a/youtube_dl/extractor/iprima.py
+++ b/youtube_dl/extractor/iprima.py
@@ -12,16 +12,26 @@ from ..utils import (
class IPrimaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
+ _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_GEO_BYPASS = False
_TESTS = [{
- 'url': 'http://play.iprima.cz/gondici-s-r-o-33',
+ 'url': 'https://prima.iprima.cz/particka/92-epizoda',
'info_dict': {
- 'id': 'p136534',
+ 'id': 'p51388',
'ext': 'mp4',
- 'title': 'Gondíci s. r. o. (34)',
- 'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
+ 'title': 'Partička (92)',
+ 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+ 'info_dict': {
+ 'id': 'p681554',
+ 'ext': 'mp4',
+ 'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
},
'params': {
'skip_download': True, # m3u8 download
@@ -41,6 +51,24 @@ class IPrimaIE(InfoExtractor):
# iframe prima.iprima.cz
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
'only_matching': True,
+ }, {
+ 'url': 'http://www.iprima.cz/filmy/desne-rande',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -50,9 +78,15 @@ class IPrimaIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title')
+
video_id = self._search_regex(
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
- r'data-product="([^"]+)">'),
+ r'data-product="([^"]+)">',
+ r'id=["\']player-(p\d+)"',
+ r'playerId\s*:\s*["\']player-(p\d+)'),
webpage, 'real id')
playerpage = self._download_webpage(
@@ -107,8 +141,8 @@ class IPrimaIE(InfoExtractor):
return {
'id': video_id,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'formats': formats,
- 'description': self._og_search_description(webpage),
+ 'description': self._og_search_description(webpage, default=None),
}
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py
index 4b081bd46..cd11aa70f 100644
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -383,9 +383,9 @@ class IqiyiIE(InfoExtractor):
self._sleep(5, video_id)
self._sort_formats(formats)
- title = (get_element_by_id('widget-videotitle', webpage) or
- clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage)) or
- self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
+ title = (get_element_by_id('widget-videotitle', webpage)
+ or clean_html(get_element_by_attribute('class', 'mod-play-tit', webpage))
+ or self._html_search_regex(r'<span[^>]+data-videochanged-title="word"[^>]*>([^<]+)</span>', webpage, 'title'))
return {
'id': video_id,
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index de65b6bb4..ad2f4eca5 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -77,10 +77,10 @@ class ITVIE(InfoExtractor):
return etree.SubElement(element, _add_ns(name))
production_id = (
- params.get('data-video-autoplay-id') or
- '%s#001' % (
- params.get('data-video-episode-id') or
- video_id.replace('a', '/')))
+ params.get('data-video-autoplay-id')
+ or '%s#001' % (
+ params.get('data-video-episode-id')
+ or video_id.replace('a', '/')))
req_env = etree.Element(_add_ns('soapenv:Envelope'))
_add_sub_element(req_env, 'soapenv:Header')
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
index cb51cef2d..b5a740a01 100644
--- a/youtube_dl/extractor/ivi.py
+++ b/youtube_dl/extractor/ivi.py
@@ -1,8 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import json
+import re
+import sys
from .common import InfoExtractor
from ..utils import (
@@ -15,9 +16,11 @@ from ..utils import (
class IviIE(InfoExtractor):
IE_DESC = 'ivi.ru'
IE_NAME = 'ivi'
- _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
_GEO_BYPASS = False
_GEO_COUNTRIES = ['RU']
+ _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
+ _LIGHT_URL = 'https://api.ivi.ru/light/'
_TESTS = [
# Single movie
@@ -65,7 +68,11 @@ class IviIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
'skip': 'Only works from Russia',
- }
+ },
+ {
+ 'url': 'https://www.ivi.tv/watch/33560/',
+ 'only_matching': True,
+ },
]
# Sorted by quality
@@ -76,48 +83,96 @@ class IviIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- data = {
+ data = json.dumps({
'method': 'da.content.get',
'params': [
video_id, {
- 'site': 's183',
+ 'site': 's%d',
'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
'contentid': video_id
}
]
- }
+ })
- video_json = self._download_json(
- 'http://api.digitalaccess.ru/api/json/', video_id,
- 'Downloading video JSON', data=json.dumps(data))
-
- if 'error' in video_json:
- error = video_json['error']
- origin = error['origin']
- if origin == 'NotAllowedForLocation':
- self.raise_geo_restricted(
- msg=error['message'], countries=self._GEO_COUNTRIES)
- elif origin == 'NoRedisValidData':
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- raise ExtractorError(
- 'Unable to download video %s: %s' % (video_id, error['message']),
- expected=True)
+ bundled = hasattr(sys, 'frozen')
- result = video_json['result']
+ for site in (353, 183):
+ content_data = (data % site).encode()
+ if site == 353:
+ if bundled:
+ continue
+ try:
+ from Cryptodome.Cipher import Blowfish
+ from Cryptodome.Hash import CMAC
+ pycryptodomex_found = True
+ except ImportError:
+ pycryptodomex_found = False
+ continue
- quality = qualities(self._KNOWN_FORMATS)
+ timestamp = (self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading timestamp JSON', data=json.dumps({
+ 'method': 'da.timestamp.get',
+ 'params': []
+ }).encode(), fatal=False) or {}).get('result')
+ if not timestamp:
+ continue
- formats = [{
- 'url': x['url'],
- 'format_id': x.get('content_format'),
- 'quality': quality(x.get('content_format')),
- } for x in result['files'] if x.get('url')]
+ query = {
+ 'ts': timestamp,
+ 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(),
+ }
+ else:
+ query = {}
- self._sort_formats(formats)
+ video_json = self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading video JSON', data=content_data, query=query)
+ error = video_json.get('error')
+ if error:
+ origin = error.get('origin')
+ message = error.get('message') or error.get('user_message')
+ extractor_msg = 'Unable to download video %s'
+ if origin == 'NotAllowedForLocation':
+ self.raise_geo_restricted(message, self._GEO_COUNTRIES)
+ elif origin == 'NoRedisValidData':
+ extractor_msg = 'Video %s does not exist'
+ elif site == 353:
+ continue
+ elif bundled:
+ raise ExtractorError(
+ 'This feature does not work from bundled exe. Run youtube-dl from sources.',
+ expected=True)
+ elif not pycryptodomex_found:
+ raise ExtractorError(
+ 'pycryptodomex not found. Please install it.',
+ expected=True)
+ elif message:
+ extractor_msg += ': ' + message
+ raise ExtractorError(extractor_msg % video_id, expected=True)
+ else:
+ break
+
+ result = video_json['result']
title = result['title']
- duration = int_or_none(result.get('duration'))
+ quality = qualities(self._KNOWN_FORMATS)
+
+ formats = []
+ for f in result.get('files', []):
+ f_url = f.get('url')
+ content_format = f.get('content_format')
+ if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': content_format,
+ 'quality': quality(content_format),
+ 'filesize': int_or_none(f.get('size_in_bytes')),
+ })
+ self._sort_formats(formats)
+
compilation = result.get('compilation')
episode = title if compilation else None
@@ -154,7 +209,7 @@ class IviIE(InfoExtractor):
'episode_number': episode_number,
'thumbnails': thumbnails,
'description': description,
- 'duration': duration,
+ 'duration': int_or_none(result.get('duration')),
'formats': formats,
}
@@ -184,7 +239,7 @@ class IviCompilationIE(InfoExtractor):
self.url_result(
'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
for serie in re.findall(
- r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
+ r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py
index 595d7a5b7..490efa8fb 100644
--- a/youtube_dl/extractor/jamendo.py
+++ b/youtube_dl/extractor/jamendo.py
@@ -1,33 +1,28 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
+import hashlib
+import random
-from ..compat import compat_urlparse
+from ..compat import compat_str
from .common import InfoExtractor
-from ..utils import parse_duration
-
-
-class JamendoBaseIE(InfoExtractor):
- def _extract_meta(self, webpage, fatal=True):
- title = self._og_search_title(
- webpage, default=None) or self._search_regex(
- r'<title>([^<]+)', webpage,
- 'title', default=None)
- if title:
- title = self._search_regex(
- r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
- if not title:
- title = self._html_search_meta(
- 'name', webpage, 'title', fatal=fatal)
- mobj = re.search(r'(.+) - (.+)', title or '')
- artist, second = mobj.groups() if mobj else [None] * 2
- return title, artist, second
-
-
-class JamendoIE(JamendoBaseIE):
- _VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
- _TEST = {
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+)
+
+
+class JamendoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ licensing\.jamendo\.com/[^/]+|
+ (?:www\.)?jamendo\.com
+ )
+ /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
+ '''
+ _TESTS = [{
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
'info_dict': {
@@ -38,18 +33,30 @@ class JamendoIE(JamendoBaseIE):
'artist': 'Maya Filipič',
'track': 'Stories from Emona I',
'duration': 210,
- 'thumbnail': r're:^https?://.*\.jpg'
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1217438117,
+ 'upload_date': '20080730',
}
- }
+ }, {
+ 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = self._VALID_URL_RE.match(url)
- track_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- title, artist, track = self._extract_meta(webpage)
+ track_id, display_id = self._VALID_URL_RE.match(url).groups()
+ webpage = self._download_webpage(
+ 'https://www.jamendo.com/track/' + track_id, track_id)
+ models = self._parse_json(self._html_search_regex(
+ r"data-bundled-models='([^']+)",
+ webpage, 'bundled models'), track_id)
+ track = models['track']['models'][0]
+ title = track_name = track['name']
+ get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+ artist = get_model('artist')
+ artist_name = artist.get('name')
+ if artist_name:
+ title = '%s - %s' % (artist_name, title)
+ album = get_model('album')
formats = [{
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@@ -65,31 +72,58 @@ class JamendoIE(JamendoBaseIE):
))]
self._sort_formats(formats)
- thumbnail = self._html_search_meta(
- 'image', webpage, 'thumbnail', fatal=False)
- duration = parse_duration(self._search_regex(
- r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
- webpage, 'duration', fatal=False))
+ urls = []
+ thumbnails = []
+ for _, covers in track.get('cover', {}).items():
+ for cover_id, cover_url in covers.items():
+ if not cover_url or cover_url in urls:
+ continue
+ urls.append(cover_url)
+ size = int_or_none(cover_id.lstrip('size'))
+ thumbnails.append({
+ 'id': cover_id,
+ 'url': cover_url,
+ 'width': size,
+ 'height': size,
+ })
+
+ tags = []
+ for tag in track.get('tags', []):
+ tag_name = tag.get('name')
+ if not tag_name:
+ continue
+ tags.append(tag_name)
+
+ stats = track.get('stats') or {}
return {
'id': track_id,
'display_id': display_id,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'title': title,
- 'duration': duration,
- 'artist': artist,
- 'track': track,
- 'formats': formats
+ 'description': track.get('description'),
+ 'duration': int_or_none(track.get('duration')),
+ 'artist': artist_name,
+ 'track': track_name,
+ 'album': album.get('name'),
+ 'formats': formats,
+ 'license': '-'.join(track.get('licenseCC', [])) or None,
+ 'timestamp': int_or_none(track.get('dateCreated')),
+ 'view_count': int_or_none(stats.get('listenedAll')),
+ 'like_count': int_or_none(stats.get('favorited')),
+ 'average_rating': int_or_none(stats.get('averageNote')),
+ 'tags': tags,
}
-class JamendoAlbumIE(JamendoBaseIE):
- _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
+class JamendoAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
'info_dict': {
'id': '121486',
- 'title': 'Shearer - Duck On Cover'
+ 'title': 'Duck On Cover',
+ 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
},
'playlist': [{
'md5': 'e1a2fcb42bda30dfac990212924149a8',
@@ -99,6 +133,8 @@ class JamendoAlbumIE(JamendoBaseIE):
'title': 'Shearer - Warmachine',
'artist': 'Shearer',
'track': 'Warmachine',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
}
}, {
'md5': '1f358d7b2f98edfe90fd55dac0799d50',
@@ -108,6 +144,8 @@ class JamendoAlbumIE(JamendoBaseIE):
'title': 'Shearer - Without Your Ghost',
'artist': 'Shearer',
'track': 'Without Your Ghost',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
}
}],
'params': {
@@ -115,24 +153,35 @@ class JamendoAlbumIE(JamendoBaseIE):
}
}
+ def _call_api(self, resource, resource_id):
+ path = '/api/%ss' % resource
+ rand = compat_str(random.random())
+ return self._download_json(
+ 'https://www.jamendo.com' + path, resource_id, query={
+ 'id[]': resource_id,
+ }, headers={
+ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+ })[0]
+
def _real_extract(self, url):
- mobj = self._VALID_URL_RE.match(url)
- album_id = mobj.group('id')
-
- webpage = self._download_webpage(url, mobj.group('display_id'))
-
- title, artist, album = self._extract_meta(webpage, fatal=False)
-
- entries = [{
- '_type': 'url_transparent',
- 'url': compat_urlparse.urljoin(url, m.group('path')),
- 'ie_key': JamendoIE.ie_key(),
- 'id': self._search_regex(
- r'/track/(\d+)', m.group('path'), 'track id', default=None),
- 'artist': artist,
- 'album': album,
- } for m in re.finditer(
- r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
- webpage)]
-
- return self.playlist_result(entries, album_id, title)
+ album_id = self._match_id(url)
+ album = self._call_api('album', album_id)
+ album_name = album.get('name')
+
+ entries = []
+ for track in album.get('tracks', []):
+ track_id = track.get('id')
+ if not track_id:
+ continue
+ track_id = compat_str(track_id)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': 'https://www.jamendo.com/track/' + track_id,
+ 'ie_key': JamendoIE.ie_key(),
+ 'id': track_id,
+ 'album': album_name,
+ })
+
+ return self.playlist_result(
+ entries, album_id, album_name,
+ clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py
index d9f8dbfd2..62b28e980 100644
--- a/youtube_dl/extractor/joj.py
+++ b/youtube_dl/extractor/joj.py
@@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
bitrates = self._parse_json(
self._search_regex(
- r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py
deleted file mode 100644
index 4b5f346d1..000000000
--- a/youtube_dl/extractor/jpopsukitv.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
-)
-
-
-class JpopsukiIE(InfoExtractor):
- IE_NAME = 'jpopsuki.tv'
- _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
-
- _TEST = {
- 'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
- 'md5': '88018c0c1a9b1387940e90ec9e7e198e',
- 'info_dict': {
- 'id': '00be659d23b0b40508169cdee4545771',
- 'ext': 'mp4',
- 'title': 'ayumi hamasaki - evolution',
- 'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
- 'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
- 'uploader': 'plama_chan',
- 'uploader_id': '404',
- 'upload_date': '20121101'
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
- r'<source src="(.*?)" type', webpage, 'video url')
-
- video_title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- uploader = self._html_search_regex(
- r'<li>from: <a href="/user/view/user/(.*?)/uid/',
- webpage, 'video uploader', fatal=False)
- uploader_id = self._html_search_regex(
- r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
- webpage, 'video uploader_id', fatal=False)
- upload_date = unified_strdate(self._html_search_regex(
- r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
- fatal=False))
- view_count_str = self._html_search_regex(
- r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
- fatal=False)
- comment_count_str = self._html_search_regex(
- r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
- fatal=False)
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': video_title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
- 'view_count': int_or_none(view_count_str),
- 'comment_count': int_or_none(comment_count_str),
- }
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
index 63d0dc998..c34b5f5e6 100644
--- a/youtube_dl/extractor/jwplatform.py
+++ b/youtube_dl/extractor/jwplatform.py
@@ -4,11 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import unsmuggle_url
class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://content\.jwplatform\.com/(?:feeds|players|jw6)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
- _TEST = {
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
'info_dict': {
@@ -19,7 +20,10 @@ class JWPlatformIE(InfoExtractor):
'upload_date': '20081127',
'timestamp': 1227796140,
}
- }
+ }, {
+ 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
+ 'only_matching': True,
+ }]
@staticmethod
def _extract_url(webpage):
@@ -29,10 +33,14 @@ class JWPlatformIE(InfoExtractor):
@staticmethod
def _extract_urls(webpage):
return re.findall(
- r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
+ r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
webpage)
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
video_id = self._match_id(url)
- json_data = self._download_json('http://content.jwplatform.com/feeds/%s.json' % video_id, video_id)
+ json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
return self._parse_jwplayer_data(json_data, video_id)
diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py
index 7fa140b0c..32935bb28 100644
--- a/youtube_dl/extractor/kakao.py
+++ b/youtube_dl/extractor/kakao.py
@@ -6,14 +6,15 @@ from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
+ strip_or_none,
unified_timestamp,
update_url_query,
)
class KakaoIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
- _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
+ _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
+ _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
_TESTS = [{
'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
@@ -36,7 +37,7 @@ class KakaoIE(InfoExtractor):
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
'uploader_id': 2653210,
- 'uploader': '쇼 음악중심',
+ 'uploader': '쇼! 음악중심',
'timestamp': 1485684628,
'upload_date': '20170129',
}
@@ -44,6 +45,8 @@ class KakaoIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
+ display_id = video_id.rstrip('@my')
+ api_base = self._API_BASE_TMPL % video_id
player_header = {
'Referer': update_url_query(
@@ -55,20 +58,23 @@ class KakaoIE(InfoExtractor):
})
}
- QUERY_COMMON = {
+ query = {
'player': 'monet_html5',
'referer': url,
'uuid': '',
'service': 'kakao_tv',
'section': '',
'dteType': 'PC',
+ 'fields': ','.join([
+ '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
+ 'description', 'channelId', 'createTime', 'duration', 'playCount',
+ 'likeCount', 'commentCount', 'tagList', 'channel', 'name',
+ 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
+ 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
}
- query = QUERY_COMMON.copy()
- query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
impress = self._download_json(
- '%s/%s/impress' % (self._API_BASE, video_id),
- video_id, 'Downloading video info',
+ api_base + 'impress', display_id, 'Downloading video info',
query=query, headers=player_header)
clip_link = impress['clipLink']
@@ -76,32 +82,22 @@ class KakaoIE(InfoExtractor):
title = clip.get('title') or clip_link.get('displayTitle')
- tid = impress.get('tid', '')
-
- query = QUERY_COMMON.copy()
- query.update({
- 'tid': tid,
- 'profile': 'HIGH',
- })
- raw = self._download_json(
- '%s/%s/raw' % (self._API_BASE, video_id),
- video_id, 'Downloading video formats info',
- query=query, headers=player_header)
+ query['tid'] = impress.get('tid', '')
formats = []
- for fmt in raw.get('outputList', []):
+ for fmt in clip.get('videoOutputList', []):
try:
profile_name = fmt['profile']
+ if profile_name == 'AUDIO':
+ continue
+ query.update({
+ 'profile': profile_name,
+ 'fields': '-*,url',
+ })
fmt_url_json = self._download_json(
- '%s/%s/raw/videolocation' % (self._API_BASE, video_id),
- video_id,
+ api_base + 'raw/videolocation', display_id,
'Downloading video URL for profile %s' % profile_name,
- query={
- 'service': 'kakao_tv',
- 'section': '',
- 'tid': tid,
- 'profile': profile_name
- }, headers=player_header, fatal=False)
+ query=query, headers=player_header, fatal=False)
if fmt_url_json is None:
continue
@@ -113,7 +109,8 @@ class KakaoIE(InfoExtractor):
'width': int_or_none(fmt.get('width')),
'height': int_or_none(fmt.get('height')),
'format_note': fmt.get('label'),
- 'filesize': int_or_none(fmt.get('filesize'))
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'tbr': int_or_none(fmt.get('kbps')),
})
except KeyError:
pass
@@ -134,9 +131,9 @@ class KakaoIE(InfoExtractor):
})
return {
- 'id': video_id,
+ 'id': display_id,
'title': title,
- 'description': clip.get('description'),
+ 'description': strip_or_none(clip.get('description')),
'uploader': clip_link.get('channel', {}).get('name'),
'uploader_id': clip_link.get('channelId'),
'thumbnails': thumbs,
@@ -146,4 +143,5 @@ class KakaoIE(InfoExtractor):
'like_count': int_or_none(clip.get('likeCount')),
'comment_count': int_or_none(clip.get('commentCount')),
'formats': formats,
+ 'tags': clip.get('tagList'),
}
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
index 04f68fce4..49d13460d 100644
--- a/youtube_dl/extractor/kaltura.py
+++ b/youtube_dl/extractor/kaltura.py
@@ -103,14 +103,24 @@ class KalturaIE(InfoExtractor):
{
'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
'only_matching': True,
+ },
+ {
+ # unavailable source format
+ 'url': 'kaltura:513551:1_66x4rg7o',
+ 'only_matching': True,
}
]
@staticmethod
def _extract_url(webpage):
+ urls = KalturaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
- mobj = (
- re.search(
+ finditer = (
+ re.finditer(
r"""(?xs)
kWidget\.(?:thumb)?[Ee]mbed\(
\{.*?
@@ -118,8 +128,8 @@ class KalturaIE(InfoExtractor):
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
- """, webpage) or
- re.search(
+ """, webpage)
+ or re.finditer(
r'''(?xs)
(?P<q1>["'])
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@@ -132,8 +142,8 @@ class KalturaIE(InfoExtractor):
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
)
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
- ''', webpage) or
- re.search(
+ ''', webpage)
+ or re.finditer(
r'''(?xs)
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
@@ -143,16 +153,21 @@ class KalturaIE(InfoExtractor):
(?P=q1)
''', webpage)
)
- if mobj:
+ urls = []
+ for mobj in finditer:
embed_info = mobj.groupdict()
+ for k, v in embed_info.items():
+ if v:
+ embed_info[k] = v.strip()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
- service_url = re.search(
- r'<script[^>]+src=["\']((?:https?:)?//.+?)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+ service_mobj = re.search(
+ r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
- if service_url:
- url = smuggle_url(url, {'service_url': service_url.group(1)})
- return url
+ if service_mobj:
+ url = smuggle_url(url, {'service_url': service_mobj.group('id')})
+ urls.append(url)
+ return urls
def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
params = actions[0]
@@ -192,6 +207,8 @@ class KalturaIE(InfoExtractor):
'entryId': video_id,
'service': 'baseentry',
'ks': '{1:result:ks}',
+ 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ 'responseProfile:type': 1,
},
{
'action': 'getbyentryid',
@@ -302,12 +319,17 @@ class KalturaIE(InfoExtractor):
f['fileExt'] = 'mp4'
video_url = sign_url(
'%s/flavorId/%s' % (data_url, f['id']))
+ format_id = '%(fileExt)s-%(bitrate)s' % f
+ # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
+ if f.get('isOriginal') is True and not self._is_valid_url(
+ video_url, entry_id, format_id):
+ continue
# audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
# -f mp4-56)
vcodec = 'none' if 'videoCodecId' not in f and f.get(
'frameRate') == 0 else f.get('videoCodecId')
formats.append({
- 'format_id': '%(fileExt)s-%(bitrate)s' % f,
+ 'format_id': format_id,
'ext': f.get('fileExt'),
'tbr': int_or_none(f['bitrate']),
'fps': int_or_none(f.get('frameRate')),
diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dl/extractor/karrierevideos.py
index f236a2f78..7b291e0a0 100644
--- a/youtube_dl/extractor/karrierevideos.py
+++ b/youtube_dl/extractor/karrierevideos.py
@@ -47,8 +47,8 @@ class KarriereVideosIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- title = (self._html_search_meta('title', webpage, default=None) or
- self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
+ title = (self._html_search_meta('title', webpage, default=None)
+ or self._search_regex(r'<h1 class="title">([^<]+)</h1>', webpage, 'video title'))
video_id = self._search_regex(
r'/config/video/(.+?)\.xml', webpage, 'video id')
diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py
deleted file mode 100644
index 94a03d277..000000000
--- a/youtube_dl/extractor/keek.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class KeekIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
- IE_NAME = 'keek'
- _TEST = {
- 'url': 'https://www.keek.com/keek/NODfbab',
- 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
- 'info_dict': {
- 'id': 'NODfbab',
- 'ext': 'mp4',
- 'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
- 'uploader': 'ytdl',
- 'uploader_id': 'eGT5bab',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- return {
- 'id': video_id,
- 'url': self._og_search_video_url(webpage),
- 'ext': 'mp4',
- 'title': self._og_search_description(webpage).strip(),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader': self._search_regex(
- r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
- 'uploader', fatal=False, group='uploader'),
- 'uploader_id': self._search_regex(
- r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
- 'uploader id', fatal=False, group='uploader_id'),
- }
diff --git a/youtube_dl/extractor/kinja.py b/youtube_dl/extractor/kinja.py
new file mode 100644
index 000000000..79e3026d2
--- /dev/null
+++ b/youtube_dl/extractor/kinja.py
@@ -0,0 +1,221 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class KinjaEmbedIE(InfoExtractor):
+ IENAME = 'kinja:embed'
+ _DOMAIN_REGEX = r'''(?:[^.]+\.)?
+ (?:
+ avclub|
+ clickhole|
+ deadspin|
+ gizmodo|
+ jalopnik|
+ jezebel|
+ kinja|
+ kotaku|
+ lifehacker|
+ splinternews|
+ the(?:inventory|onion|root|takeout)
+ )\.com'''
+ _COMMON_REGEX = r'''/
+ (?:
+ ajax/inset|
+ embed/video
+ )/iframe\?.*?\bid='''
+ _VALID_URL = r'''(?x)https?://%s%s
+ (?P<type>
+ fb|
+ imgur|
+ instagram|
+ jwp(?:layer)?-video|
+ kinjavideo|
+ mcp|
+ megaphone|
+ ooyala|
+ soundcloud(?:-playlist)?|
+ tumblr-post|
+ twitch-stream|
+ twitter|
+ ustream-channel|
+ vimeo|
+ vine|
+ youtube-(?:list|video)
+ )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _TESTS = [{
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
+ 'only_matching': True,
+ }]
+ _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
+ _PROVIDER_MAP = {
+ 'fb': ('facebook.com/video.php?v=', 'Facebook'),
+ 'imgur': ('imgur.com/', 'Imgur'),
+ 'instagram': ('instagram.com/p/', 'Instagram'),
+ 'jwplayer-video': _JWPLATFORM_PROVIDER,
+ 'jwp-video': _JWPLATFORM_PROVIDER,
+ 'megaphone': ('player.megaphone.fm/', 'Generic'),
+ 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
+ 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
+ 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
+ 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
+ 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
+ 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
+ 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
+ 'vimeo': ('vimeo.com/', 'Vimeo'),
+ 'vine': ('vine.co/v/', 'Vine'),
+ 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
+ 'youtube-video': ('youtube.com/embed/', 'Youtube'),
+ }
+
+ @staticmethod
+ def _extract_urls(webpage, url):
+ return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
+ r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ provider = self._PROVIDER_MAP.get(video_type)
+ if provider:
+ video_id = compat_urllib_parse_unquote(video_id)
+ if video_type == 'tumblr-post':
+ video_id, blog = video_id.split('-', 1)
+ result_url = provider[0] % (blog, video_id)
+ elif video_type == 'youtube-list':
+ video_id, playlist_id = video_id.split('/')
+ result_url = provider[0] % (video_id, playlist_id)
+ else:
+ if video_type == 'ooyala':
+ video_id = video_id.split('/')[0]
+ result_url = provider[0] + video_id
+ return self.url_result('http://' + result_url, provider[1])
+
+ if video_type == 'kinjavideo':
+ data = self._download_json(
+ 'https://kinja.com/api/core/video/views/videoById',
+ video_id, query={'videoId': video_id})['data']
+ title = data['title']
+
+ formats = []
+ for k in ('signedPlaylist', 'streaming'):
+ m3u8_url = data.get(k + 'Url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnail = None
+ poster = data.get('poster') or {}
+ poster_id = poster.get('id')
+ if poster_id:
+ thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'formats': formats,
+ 'tags': data.get('tags'),
+ 'timestamp': int_or_none(try_get(
+ data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
+ 'thumbnail': thumbnail,
+ 'uploader': data.get('network'),
+ }
+ else:
+ video_data = self._download_json(
+ 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
+ video_id)['videoMetadata']
+ iptc = video_data['photoVideoMetadataIPTC']
+ title = iptc['title']['en']
+ fmg = video_data.get('photoVideoMetadata_fmg') or {}
+ tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
+ data = self._download_json(
+ tvss_domain + '/api/v3/video-auth/url-signature-tokens',
+ video_id, query={'mcpids': video_id})['data'][0]
+ formats = []
+
+ rendition_url = data.get('renditionUrl')
+ if rendition_url:
+ formats = self._extract_m3u8_formats(
+ rendition_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ fallback_rendition_url = data.get('fallbackRenditionUrl')
+ if fallback_rendition_url:
+ formats.append({
+ 'format_id': 'fallback',
+ 'tbr': int_or_none(self._search_regex(
+ r'_(\d+)\.mp4', fallback_rendition_url,
+ 'bitrate', default=None)),
+ 'url': fallback_rendition_url,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
+ 'uploader': fmg.get('network'),
+ 'duration': int_or_none(iptc.get('fileDuration')),
+ 'formats': formats,
+ 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
+ 'timestamp': parse_iso8601(iptc.get('dateReleased')),
+ }
diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py
deleted file mode 100644
index 1fda45107..000000000
--- a/youtube_dl/extractor/kontrtube.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
-)
-
-
-class KontrTubeIE(InfoExtractor):
- IE_NAME = 'kontrtube'
- IE_DESC = 'KontrTube.ru - Труба зовёт'
- _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
-
- _TEST = {
- 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
- 'md5': '975a991a4926c9a85f383a736a2e6b80',
- 'info_dict': {
- 'id': '2678',
- 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
- 'ext': 'mp4',
- 'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
- 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
- 'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
- 'duration': 270,
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(
- url, display_id, 'Downloading page')
-
- video_url = self._search_regex(
- r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
- thumbnail = self._search_regex(
- r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
- title = self._html_search_regex(
- r'(?s)<h2>(.+?)</h2>', webpage, 'title')
- description = self._html_search_meta(
- 'description', webpage, 'description')
-
- duration = self._search_regex(
- r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
- if duration:
- duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
-
- view_count = self._search_regex(
- r'Просмотров: <em>([^<]+)</em>',
- webpage, 'view count', fatal=False)
- if view_count:
- view_count = int_or_none(view_count.replace(' ', ''))
-
- comment_count = int_or_none(self._search_regex(
- r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': int_or_none(view_count),
- 'comment_count': int_or_none(comment_count),
- }
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py
index 63e10125e..cc5b2a1c1 100644
--- a/youtube_dl/extractor/kuwo.py
+++ b/youtube_dl/extractor/kuwo.py
@@ -215,7 +215,7 @@ class KuwoSingerIE(InfoExtractor):
'title': 'Ali',
},
'playlist_mincount': 95,
- 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
+ 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/ytdl-org/youtube-dl/jobs/78878540
}]
PAGE_SIZE = 15
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py
index 6373268c4..c3b4ffa7e 100644
--- a/youtube_dl/extractor/la7.py
+++ b/youtube_dl/extractor/la7.py
@@ -20,7 +20,7 @@ class LA7IE(InfoExtractor):
'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
'info_dict': {
- 'id': 'inccool8-02-10-2015-163722',
+ 'id': '0_42j6wd36',
'ext': 'mp4',
'title': 'Inc.Cool8',
'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
@@ -57,7 +57,7 @@ class LA7IE(InfoExtractor):
return {
'_type': 'url_transparent',
'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
- 'service_url': 'http://kdam.iltrovatore.it',
+ 'service_url': 'http://nkdam.iltrovatore.it',
}),
'id': video_id,
'title': player_data['title'],
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dl/extractor/laola1tv.py
index c7f813370..fa217365a 100644
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dl/extractor/laola1tv.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import json
+import re
from .common import InfoExtractor
from ..utils import (
@@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
def _extract_token_url(self, stream_access_url, video_id, data):
return self._download_json(
- stream_access_url, video_id, headers={
+ self._proto_relative_url(stream_access_url, 'https:'), video_id,
+ headers={
'Content-Type': 'application/json',
}, data=json.dumps(data).encode())['data']['stream-access'][0]
@@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
}
-class Laola1TvIE(Laola1TvEmbedIE):
+class Laola1TvBaseIE(Laola1TvEmbedIE):
+ def _extract_video(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if 'Dieser Livestream ist bereits beendet.' in webpage:
+ raise ExtractorError('This live stream has already finished.', expected=True)
+
+ conf = self._parse_json(self._search_regex(
+ r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
+ display_id,
+ transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
+ video_id = conf['videoid']
+
+ config = self._download_json(conf['configUrl'], video_id, query={
+ 'videoid': video_id,
+ 'partnerid': conf['partnerid'],
+ 'language': conf.get('language', ''),
+ 'portal': conf.get('portalid', ''),
+ })
+ error = config.get('error')
+ if error:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ video_data = config['video']
+ title = video_data['title']
+ is_live = video_data.get('isLivestream') and video_data.get('isLive')
+ meta = video_data.get('metaInformation')
+ sports = meta.get('sports')
+ categories = sports.split(',') if sports else []
+
+ token_url = self._extract_token_url(
+ video_data['streamAccess'], video_id,
+ video_data['abo']['required'])
+
+ formats = self._extract_formats(token_url, video_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('image'),
+ 'categories': categories,
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class Laola1TvIE(Laola1TvBaseIE):
IE_NAME = 'laola1tv'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
+
_TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': {
@@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
+ return self._extract_video(url)
- webpage = self._download_webpage(url, display_id)
- if 'Dieser Livestream ist bereits beendet.' in webpage:
- raise ExtractorError('This live stream has already finished.', expected=True)
-
- conf = self._parse_json(self._search_regex(
- r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
- display_id, js_to_json)
-
- video_id = conf['videoid']
-
- config = self._download_json(conf['configUrl'], video_id, query={
- 'videoid': video_id,
- 'partnerid': conf['partnerid'],
- 'language': conf.get('language', ''),
- 'portal': conf.get('portalid', ''),
- })
- error = config.get('error')
- if error:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
- video_data = config['video']
- title = video_data['title']
- is_live = video_data.get('isLivestream') and video_data.get('isLive')
- meta = video_data.get('metaInformation')
- sports = meta.get('sports')
- categories = sports.split(',') if sports else []
-
- token_url = self._extract_token_url(
- video_data['streamAccess'], video_id,
- video_data['abo']['required'])
+class EHFTVIE(Laola1TvBaseIE):
+ IE_NAME = 'ehftv'
+ _VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
- formats = self._extract_formats(token_url, video_id)
+ _TESTS = [{
+ 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
+ 'info_dict': {
+ 'id': '1166761',
+ 'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
+ 'ext': 'mp4',
+ 'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
+ 'is_live': False,
+ 'categories': ['Handball'],
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': self._live_title(title) if is_live else title,
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('image'),
- 'categories': categories,
- 'formats': formats,
- 'is_live': is_live,
- }
+ def _real_extract(self, url):
+ return self._extract_video(url)
class ITTFIE(InfoExtractor):
diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py
deleted file mode 100644
index 1435e090e..000000000
--- a/youtube_dl/extractor/learnr.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class LearnrIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
- 'md5': '3719fdf0a68397f49899e82c308a89de',
- 'info_dict': {
- 'id': '51624',
- 'ext': 'mp4',
- 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
- 'description': 'md5:b36dbfa92350176cdf12b4d388485503',
- 'uploader': 'LearnCode.academy',
- 'uploader_id': 'learncodeacademy',
- 'upload_date': '20131021',
- },
- 'add_ie': ['Youtube'],
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- return {
- '_type': 'url_transparent',
- 'url': self._search_regex(
- r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
- 'id': video_id,
- }
diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py
new file mode 100644
index 000000000..1b2dcef46
--- /dev/null
+++ b/youtube_dl/extractor/lecturio.py
@@ -0,0 +1,243 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LecturioBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
+ _LOGIN_URL = 'https://app.lecturio.com/en/login'
+ _NETRC_MACHINE = 'lecturio'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ # Sets some cookies
+ _, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(url_handle):
+ return self._LOGIN_URL not in url_handle.geturl()
+
+ # Already logged in
+ if is_logged(urlh):
+ return
+
+ login_form = {
+ 'signin[email]': username,
+ 'signin[password]': password,
+ 'signin[remember]': 'on',
+ }
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ # Logged in successfully
+ if is_logged(urlh):
+ return
+
+ errors = self._html_search_regex(
+ r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
+ 'errors', default=None)
+ if errors:
+ raise ExtractorError('Unable to login: %s' % errors, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class LecturioIE(LecturioBaseIE):
+ _VALID_URL = r'''(?x)
+ https://
+ (?:
+ app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+ (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
+ 'md5': '9a42cf1d8282a6311bf7211bbde26fde',
+ 'info_dict': {
+ 'id': '39634',
+ 'ext': 'mp4',
+ 'title': 'Important Concepts and Terms — Introduction to Microbiology',
+ },
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
+ 'only_matching': True,
+ }]
+
+ _CC_LANGS = {
+ 'Arabic': 'ar',
+ 'Bulgarian': 'bg',
+ 'German': 'de',
+ 'English': 'en',
+ 'Spanish': 'es',
+ 'Persian': 'fa',
+ 'French': 'fr',
+ 'Japanese': 'ja',
+ 'Polish': 'pl',
+ 'Pashto': 'ps',
+ 'Russian': 'ru',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ nt = mobj.group('nt') or mobj.group('nt_de')
+ lecture_id = mobj.group('id')
+ display_id = nt or lecture_id
+ api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
+ video = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ title = video['title'].strip()
+ if not lecture_id:
+ pid = video.get('productId') or video.get('uid')
+ if pid:
+ spid = pid.split('_')
+ if spid and len(spid) == 2:
+ lecture_id = spid[1]
+
+ formats = []
+ for format_ in video['content']['media']:
+ if not isinstance(format_, dict):
+ continue
+ file_ = format_.get('file')
+ if not file_:
+ continue
+ ext = determine_ext(file_)
+ if ext == 'smil':
+ # smil contains only broken RTMP formats anyway
+ continue
+ file_url = url_or_none(file_)
+ if not file_url:
+ continue
+ label = str_or_none(format_.get('label'))
+ filesize = int_or_none(format_.get('fileSize'))
+ f = {
+ 'url': file_url,
+ 'format_id': label,
+ 'filesize': float_or_none(filesize, invscale=1000)
+ }
+ if label:
+ mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
+ if mobj:
+ f.update({
+ 'format_id': mobj.group(2),
+ 'height': int(mobj.group(1)),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ automatic_captions = {}
+ captions = video.get('captions') or []
+ for cc in captions:
+ cc_url = cc.get('url')
+ if not cc_url:
+ continue
+ cc_label = cc.get('translatedCode')
+ lang = cc.get('languageCode') or self._search_regex(
+ r'/([a-z]{2})_', cc_url, 'lang',
+ default=cc_label.split()[0] if cc_label else 'en')
+ original_lang = self._search_regex(
+ r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
+ default=None)
+ sub_dict = (automatic_captions
+ if 'auto-translated' in cc_label or original_lang
+ else subtitles)
+ sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': lecture_id or nt,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class LecturioCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
+ 'info_dict': {
+ 'id': 'microbiology-introduction',
+ 'title': 'Microbiology: Introduction',
+ 'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
+ },
+ 'playlist_count': 45,
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://app.lecturio.com/#/course/c/6434',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ nt, course_id = re.match(self._VALID_URL, url).groups()
+ display_id = nt or course_id
+ api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
+ course = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ entries = []
+ for lecture in course.get('lectures', []):
+ lecture_id = str_or_none(lecture.get('id'))
+ lecture_url = lecture.get('url')
+ if lecture_url:
+ lecture_url = urljoin(url, lecture_url)
+ else:
+ lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+ return self.playlist_result(
+ entries, display_id, course.get('title'),
+ clean_html(course.get('description')))
+
+
+class LecturioDeCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
+ _TEST = {
+ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
+ webpage):
+ lecture_url = urljoin(url, mobj.group('url'))
+ lecture_id = mobj.group('id')
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+
+ title = self._search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
+
+ return self.playlist_result(entries, display_id, title)
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dl/extractor/leeco.py
index 8dd1ce0d0..7dc0ad794 100644
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dl/extractor/leeco.py
@@ -326,7 +326,7 @@ class LetvCloudIE(InfoExtractor):
elif play_json.get('code'):
raise ExtractorError('Letv cloud returned error %d' % play_json['code'], expected=True)
else:
- raise ExtractorError('Letv cloud returned an unknwon error')
+ raise ExtractorError('Letv cloud returned an unknown error')
def b64decode(s):
return compat_b64decode(s).decode('utf-8')
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
index b312e77f1..1e3c19dfd 100644
--- a/youtube_dl/extractor/lego.py
+++ b/youtube_dl/extractor/lego.py
@@ -2,23 +2,24 @@
from __future__ import unicode_literals
import re
+import uuid
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError
from ..utils import (
- unescapeHTML,
- parse_duration,
- get_element_by_class,
+ ExtractorError,
+ int_or_none,
+ qualities,
)
class LEGOIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
+ _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
_TESTS = [{
'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
'md5': 'f34468f176cfd76488767fc162c405fa',
'info_dict': {
- 'id': '55492d823b1b4d5e985787fa8c2973b1',
+ 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
'ext': 'mp4',
'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
@@ -26,103 +27,123 @@ class LEGOIE(InfoExtractor):
}, {
# geo-restricted but the contentUrl contain a valid url
'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
- 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
+ 'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
'info_dict': {
- 'id': '13bdc2299ab24d9685701a915b3d71e7',
+ 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
'ext': 'mp4',
- 'title': 'Aflevering 20 - Helden van het koninkrijk',
+ 'title': 'Aflevering 20: Helden van het koninkrijk',
'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+ 'age_limit': 5,
},
}, {
- # special characters in title
- 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
+ # with subtitle
+ 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
'info_dict': {
- 'id': '9685ee9d12e84ff38e84b4e3d0db533d',
+ 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
'ext': 'mp4',
- 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
- 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
+ 'title': 'De kleine puppy',
+ 'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
+ 'age_limit': 1,
+ 'subtitles': {
+ 'nl': [{
+ 'ext': 'srt',
+ 'url': r're:^https://.+\.srt$',
+ }],
+ },
},
'params': {
'skip_download': True,
},
}]
- _BITRATES = [256, 512, 1024, 1536, 2560]
+ _QUALITIES = {
+ 'Lowest': (64, 180, 320),
+ 'Low': (64, 270, 480),
+ 'Medium': (96, 360, 640),
+ 'High': (128, 540, 960),
+ 'Highest': (128, 720, 1280),
+ }
def _real_extract(self, url):
locale, video_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, video_id)
- title = get_element_by_class('video-header', webpage).strip()
- progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
- streaming_base = 'http://legoprod-f.akamaihd.net/'
- content_url = self._html_search_meta('contentUrl', webpage)
- path = self._search_regex(
- r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
- content_url, 'video path', default=None)
- if not path:
- player_url = self._proto_relative_url(self._search_regex(
- r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
- webpage, 'player url', default=None))
- if not player_url:
- base_url = self._proto_relative_url(self._search_regex(
- r'data-baseurl="([^"]+)"', webpage, 'base url',
- default='http://www.lego.com/%s/mediaplayer/video/' % locale))
- player_url = base_url + video_id
- player_webpage = self._download_webpage(player_url, video_id)
- video_data = self._parse_json(unescapeHTML(self._search_regex(
- r"video='([^']+)'", player_webpage, 'video data')), video_id)
- progressive_base = self._search_regex(
- r'data-video-progressive-url="([^"]+)"',
- player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
- streaming_base = self._search_regex(
- r'data-video-streaming-url="([^"]+)"',
- player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
- item_id = video_data['ItemId']
+ countries = [locale.split('-')[1].upper()]
+ self._initialize_geo_bypass({
+ 'countries': countries,
+ })
- net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
- base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
- path = '/'.join([net_storage_path, base_path])
- streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
+ try:
+ item = self._download_json(
+ # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
+ 'https://services.slingshot.lego.com/mediaplayer/v2',
+ video_id, query={
+ 'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
+ }, headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
+ self.raise_geo_restricted(countries=countries)
+ raise
- formats = self._extract_akamai_formats(
- '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
- m3u8_formats = list(filter(
- lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
- formats))
- if len(m3u8_formats) == len(self._BITRATES):
- self._sort_formats(m3u8_formats)
- for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
- progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
- mp4_f = m3u8_format.copy()
- mp4_f.update({
- 'url': progressive_base_url + 'mp4',
- 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
- 'protocol': 'http',
- })
- web_f = {
- 'url': progressive_base_url + 'webm',
- 'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
- 'width': m3u8_format['width'],
- 'height': m3u8_format['height'],
- 'tbr': m3u8_format.get('tbr'),
- 'ext': 'webm',
+ video = item['Video']
+ video_id = video['Id']
+ title = video['Title']
+
+ q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
+ formats = []
+ for video_source in item.get('VideoFormats', []):
+ video_source_url = video_source.get('Url')
+ if not video_source_url:
+ continue
+ video_source_format = video_source.get('Format')
+ if video_source_format == 'F4M':
+ formats.extend(self._extract_f4m_formats(
+ video_source_url, video_id,
+ f4m_id=video_source_format, fatal=False))
+ elif video_source_format == 'M3U8':
+ formats.extend(self._extract_m3u8_formats(
+ video_source_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ else:
+ video_source_quality = video_source.get('Quality')
+ format_id = []
+ for v in (video_source_format, video_source_quality):
+ if v:
+ format_id.append(v)
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'quality': q(video_source_quality),
+ 'url': video_source_url,
}
- formats.extend([web_f, mp4_f])
- else:
- for bitrate in self._BITRATES:
- for ext in ('web', 'mp4'):
- formats.append({
- 'format_id': '%s-%s' % (ext, bitrate),
- 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
- 'tbr': bitrate,
- 'ext': ext,
- })
+ quality = self._QUALITIES.get(video_source_quality)
+ if quality:
+ f.update({
+ 'abr': quality[0],
+ 'height': quality[1],
+ 'width': quality[2],
+ }),
+ formats.append(f)
self._sort_formats(formats)
+ subtitles = {}
+ sub_file_id = video.get('SubFileId')
+ if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
+ net_storage_path = video.get('NetstoragePath')
+ invariant_id = video.get('InvariantId')
+ video_file_id = video.get('VideoFileId')
+ video_version = video.get('VideoVersion')
+ if net_storage_path and invariant_id and video_file_id and video_version:
+ subtitles.setdefault(locale[:2], []).append({
+ 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
+ })
+
return {
'id': video_id,
'title': title,
- 'description': self._html_search_meta('description', webpage),
- 'thumbnail': self._html_search_meta('thumbnail', webpage),
- 'duration': parse_duration(self._html_search_meta('duration', webpage)),
+ 'description': video.get('Description'),
+ 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
+ 'duration': int_or_none(video.get('Length')),
'formats': formats,
+ 'subtitles': subtitles,
+ 'age_limit': int_or_none(video.get('AgeFrom')),
+ 'season': video.get('SeasonTitle'),
+ 'season_number': int_or_none(video.get('Season')) or None,
+ 'episode_number': int_or_none(video.get('Episode')) or None,
}
diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dl/extractor/libraryofcongress.py
index 40295a30b..03f205144 100644
--- a/youtube_dl/extractor/libraryofcongress.py
+++ b/youtube_dl/extractor/libraryofcongress.py
@@ -16,16 +16,15 @@ from ..utils import (
class LibraryOfCongressIE(InfoExtractor):
IE_NAME = 'loc'
IE_DESC = 'Library of Congress'
- _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
_TESTS = [{
# embedded via <div class="media-player"
'url': 'http://loc.gov/item/90716351/',
- 'md5': '353917ff7f0255aa6d4b80a034833de8',
+ 'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
'info_dict': {
'id': '90716351',
'ext': 'mp4',
'title': "Pa's trip to Mars",
- 'thumbnail': r're:^https?://.*\.jpg$',
'duration': 0,
'view_count': int,
},
@@ -57,6 +56,12 @@ class LibraryOfCongressIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.loc.gov/item/ihas.200197114/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -67,12 +72,13 @@ class LibraryOfCongressIE(InfoExtractor):
(r'id=(["\'])media-player-(?P<id>.+?)\1',
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
- r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
+ r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
+ r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
webpage, 'media id', group='id')
data = self._download_json(
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
- video_id)['mediaObject']
+ media_id)['mediaObject']
derivative = data['derivatives'][0]
media_url = derivative['derivativeUrl']
@@ -89,25 +95,29 @@ class LibraryOfCongressIE(InfoExtractor):
if ext not in ('mp4', 'mp3'):
media_url += '.mp4' if is_video else '.mp3'
- if 'vod/mp4:' in media_url:
- formats = [{
- 'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
+ formats = []
+ if '/vod/mp4:' in media_url:
+ formats.append({
+ 'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
'format_id': 'hls',
'ext': 'mp4',
'protocol': 'm3u8_native',
'quality': 1,
- }]
- elif 'vod/mp3:' in media_url:
- formats = [{
- 'url': media_url.replace('vod/mp3:', ''),
- 'vcodec': 'none',
- }]
+ })
+ http_format = {
+ 'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
+ 'format_id': 'http',
+ 'quality': 1,
+ }
+ if not is_video:
+ http_format['vcodec'] = 'none'
+ formats.append(http_format)
download_urls = set()
for m in re.finditer(
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?:&nbsp;|\s+)\((?P<size>.+?)\))?\s*<', webpage):
format_id = m.group('id').lower()
- if format_id == 'gif':
+ if format_id in ('gif', 'jpeg'):
continue
download_url = m.group('url')
if download_url in download_urls:
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dl/extractor/libsyn.py
index f7311f483..2cf444258 100644
--- a/youtube_dl/extractor/libsyn.py
+++ b/youtube_dl/extractor/libsyn.py
@@ -1,12 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
+ clean_html,
+ get_element_by_class,
parse_duration,
+ strip_or_none,
unified_strdate,
)
@@ -21,7 +23,9 @@ class LibsynIE(InfoExtractor):
'id': '6385796',
'ext': 'mp3',
'title': "Champion Minded - Developing a Growth Mindset",
- 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
+ # description fetched using another request:
+ # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
+ # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.',
'upload_date': '20180320',
'thumbnail': 're:^https?://.*',
},
@@ -38,22 +42,36 @@ class LibsynIE(InfoExtractor):
}]
def _real_extract(self, url):
- m = re.match(self._VALID_URL, url)
- video_id = m.group('id')
- url = m.group('mainurl')
+ url, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, video_id)
- podcast_title = self._search_regex(
- r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None)
- if podcast_title:
- podcast_title = podcast_title.strip()
- episode_title = self._search_regex(
- r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title')
- if episode_title:
- episode_title = episode_title.strip()
+ data = self._parse_json(self._search_regex(
+ r'var\s+playlistItem\s*=\s*({.+?});',
+ webpage, 'JSON data block'), video_id)
+
+ episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage)
+ if not episode_title:
+ self._search_regex(
+ [r'data-title="([^"]+)"', r'<title>(.+?)</title>'],
+ webpage, 'episode title')
+ episode_title = episode_title.strip()
+
+ podcast_title = strip_or_none(clean_html(self._search_regex(
+ r'<h3>([^<]+)</h3>', webpage, 'podcast title',
+ default=None) or get_element_by_class('podcast-title', webpage)))
title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title
+ formats = []
+ for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
+ f_url = data.get(k)
+ if not f_url:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+
description = self._html_search_regex(
r'<p\s+id="info_text_body">(.+?)</p>', webpage,
'description', default=None)
@@ -61,27 +79,15 @@ class LibsynIE(InfoExtractor):
# Strip non-breaking and normal spaces
description = description.replace('\u00A0', ' ').strip()
release_date = unified_strdate(self._search_regex(
- r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False))
-
- data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block')
- data = json.loads(data_json)
-
- formats = [{
- 'url': data['media_url'],
- 'format_id': 'main',
- }, {
- 'url': data['media_url_libsyn'],
- 'format_id': 'libsyn',
- }]
- thumbnail = data.get('thumbnail_url')
- duration = parse_duration(data.get('duration'))
+ r'<div class="release_date">Released: ([^<]+)<',
+ webpage, 'release date', default=None) or data.get('release_date'))
return {
'id': video_id,
'title': title,
'description': description,
- 'thumbnail': thumbnail,
+ 'thumbnail': data.get('thumbnail_url'),
'upload_date': release_date,
- 'duration': duration,
+ 'duration': parse_duration(data.get('duration')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
index 729d8de50..39f74d282 100644
--- a/youtube_dl/extractor/limelight.py
+++ b/youtube_dl/extractor/limelight.py
@@ -18,7 +18,6 @@ from ..utils import (
class LimelightBaseIE(InfoExtractor):
_PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
- _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
@classmethod
def _extract_urls(cls, webpage, source_url):
@@ -70,7 +69,8 @@ class LimelightBaseIE(InfoExtractor):
try:
return self._download_json(
self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
- item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
+ item_id, 'Downloading PlaylistService %s JSON' % method,
+ fatal=fatal, headers=headers)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
@@ -79,22 +79,22 @@ class LimelightBaseIE(InfoExtractor):
raise ExtractorError(error, expected=True)
raise
- def _call_api(self, organization_id, item_id, method):
- return self._download_json(
- self._API_URL % (organization_id, self._API_PATH, item_id, method),
- item_id, 'Downloading API %s JSON' % method)
-
- def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
+ def _extract(self, item_id, pc_method, mobile_method, referer=None):
pc = self._call_playlist_service(item_id, pc_method, referer=referer)
- metadata = self._call_api(pc['orgId'], item_id, meta_method)
- mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
- return pc, mobile, metadata
+ mobile = self._call_playlist_service(
+ item_id, mobile_method, fatal=False, referer=referer)
+ return pc, mobile
+
+ def _extract_info(self, pc, mobile, i, referer):
+ get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
+ pc_item = get_item(pc, 'playlistItems')
+ mobile_item = get_item(mobile, 'mediaList')
+ video_id = pc_item.get('mediaId') or mobile_item['mediaId']
+ title = pc_item.get('title') or mobile_item['title']
- def _extract_info(self, streams, mobile_urls, properties):
- video_id = properties['media_id']
formats = []
urls = []
- for stream in streams:
+ for stream in pc_item.get('streams', []):
stream_url = stream.get('url')
if not stream_url or stream.get('drmProtected') or stream_url in urls:
continue
@@ -155,7 +155,7 @@ class LimelightBaseIE(InfoExtractor):
})
formats.append(fmt)
- for mobile_url in mobile_urls:
+ for mobile_url in mobile_item.get('mobileUrls', []):
media_url = mobile_url.get('mobileUrl')
format_id = mobile_url.get('targetMediaPlatform')
if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
@@ -179,54 +179,34 @@ class LimelightBaseIE(InfoExtractor):
self._sort_formats(formats)
- title = properties['title']
- description = properties.get('description')
- timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
- duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
- filesize = int_or_none(properties.get('total_storage_in_bytes'))
- categories = [properties.get('category')]
- tags = properties.get('tags', [])
- thumbnails = [{
- 'url': thumbnail['url'],
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
-
subtitles = {}
- for caption in properties.get('captions', []):
- lang = caption.get('language_code')
- subtitles_url = caption.get('url')
- if lang and subtitles_url:
- subtitles.setdefault(lang, []).append({
- 'url': subtitles_url,
- })
- closed_captions_url = properties.get('closed_captions_url')
- if closed_captions_url:
- subtitles.setdefault('en', []).append({
- 'url': closed_captions_url,
- 'ext': 'ttml',
- })
+ for flag in mobile_item.get('flags'):
+ if flag == 'ClosedCaptions':
+ closed_captions = self._call_playlist_service(
+ video_id, 'getClosedCaptionsDetailsByMediaId',
+ False, referer) or []
+ for cc in closed_captions:
+ cc_url = cc.get('webvttFileUrl')
+ if not cc_url:
+ continue
+ lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ break
+
+ get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
return {
'id': video_id,
'title': title,
- 'description': description,
+ 'description': get_meta('description'),
'formats': formats,
- 'timestamp': timestamp,
- 'duration': duration,
- 'filesize': filesize,
- 'categories': categories,
- 'tags': tags,
- 'thumbnails': thumbnails,
+ 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
+ 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
'subtitles': subtitles,
}
- def _extract_info_helper(self, pc, mobile, i, metadata):
- return self._extract_info(
- try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
- try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
- metadata)
-
class LimelightMediaIE(LimelightBaseIE):
IE_NAME = 'limelight'
@@ -251,8 +231,6 @@ class LimelightMediaIE(LimelightBaseIE):
'description': 'md5:8005b944181778e313d95c1237ddb640',
'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 144.23,
- 'timestamp': 1244136834,
- 'upload_date': '20090604',
},
'params': {
# m3u8 download
@@ -268,30 +246,29 @@ class LimelightMediaIE(LimelightBaseIE):
'title': '3Play Media Overview Video',
'thumbnail': r're:^https?://.*\.jpeg$',
'duration': 78.101,
- 'timestamp': 1338929955,
- 'upload_date': '20120605',
- 'subtitles': 'mincount:9',
+ # TODO: extract all languages that were accessible via API
+ # 'subtitles': 'mincount:9',
+ 'subtitles': 'mincount:1',
},
}, {
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
'only_matching': True,
}]
_PLAYLIST_SERVICE_PATH = 'media'
- _API_PATH = 'media'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'),
})
- pc, mobile, metadata = self._extract(
+ pc, mobile = self._extract(
video_id, 'getPlaylistByMediaId',
- 'getMobilePlaylistByMediaId', 'properties',
- smuggled_data.get('source_url'))
+ 'getMobilePlaylistByMediaId', source_url)
- return self._extract_info_helper(pc, mobile, 0, metadata)
+ return self._extract_info(pc, mobile, 0, source_url)
class LimelightChannelIE(LimelightBaseIE):
@@ -313,6 +290,7 @@ class LimelightChannelIE(LimelightBaseIE):
'info_dict': {
'id': 'ab6a524c379342f9b23642917020c082',
'title': 'Javascript Sample Code',
+ 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
},
'playlist_mincount': 3,
}, {
@@ -320,22 +298,23 @@ class LimelightChannelIE(LimelightBaseIE):
'only_matching': True,
}]
_PLAYLIST_SERVICE_PATH = 'channel'
- _API_PATH = 'channels'
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
channel_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
- pc, mobile, medias = self._extract(
+ pc, mobile = self._extract(
channel_id, 'getPlaylistByChannelId',
'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
- 'media', smuggled_data.get('source_url'))
+ source_url)
entries = [
- self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
- for i in range(len(medias['media_list']))]
+ self._extract_info(pc, mobile, i, source_url)
+ for i in range(len(pc['playlistItems']))]
- return self.playlist_result(entries, channel_id, pc['title'])
+ return self.playlist_result(
+ entries, channel_id, pc.get('title'), mobile.get('description'))
class LimelightChannelListIE(LimelightBaseIE):
@@ -368,10 +347,12 @@ class LimelightChannelListIE(LimelightBaseIE):
def _real_extract(self, url):
channel_list_id = self._match_id(url)
- channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
+ channel_list = self._call_playlist_service(
+ channel_list_id, 'getMobileChannelListById')
entries = [
self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
for channel in channel_list['channelList']]
- return self.playlist_result(entries, channel_list_id, channel_list['title'])
+ return self.playlist_result(
+ entries, channel_list_id, channel_list['title'])
diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dl/extractor/linkedin.py
new file mode 100644
index 000000000..26fc703d1
--- /dev/null
+++ b/youtube_dl/extractor/linkedin.py
@@ -0,0 +1,182 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinkedInLearningBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'linkedin'
+ _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning'
+
+ def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
+ query = {
+ 'courseSlug': course_slug,
+ 'fields': fields,
+ 'q': 'slugs',
+ }
+ sub = ''
+ if video_slug:
+ query.update({
+ 'videoSlug': video_slug,
+ 'resolution': '_%s' % resolution,
+ })
+ sub = ' %dp' % resolution
+ api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
+ return self._download_json(
+ api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
+ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
+ }, query=query)['elements'][0]
+
+ def _get_urn_id(self, video_data):
+ urn = video_data.get('urn')
+ if urn:
+ mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
+ if mobj:
+ return mobj.group(1)
+
+ def _get_video_id(self, video_data, course_slug, video_slug):
+ return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug)
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+ action_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
+ default='https://www.linkedin.com/uas/login-submit', group='url'))
+ data = self._hidden_inputs(login_page)
+ data.update({
+ 'session_key': email,
+ 'session_password': password,
+ })
+ login_submit_page = self._download_webpage(
+ action_url, None, 'Logging in',
+ data=urlencode_postdata(data))
+ error = self._search_regex(
+ r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
+ login_submit_page, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+
+class LinkedInLearningIE(LinkedInLearningBaseIE):
+ IE_NAME = 'linkedin:learning'
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
+ 'md5': 'a1d74422ff0d5e66a792deb996693167',
+ 'info_dict': {
+ 'id': '90426',
+ 'ext': 'mp4',
+ 'title': 'Welcome',
+ 'timestamp': 1430396150.82,
+ 'upload_date': '20150430',
+ },
+ }
+
+ def _real_extract(self, url):
+ course_slug, video_slug = re.match(self._VALID_URL, url).groups()
+
+ video_data = None
+ formats = []
+ for width, height in ((640, 360), (960, 540), (1280, 720)):
+ video_data = self._call_api(
+ course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
+
+ video_url_data = video_data.get('url') or {}
+ progressive_url = video_url_data.get('progressiveUrl')
+ if progressive_url:
+ formats.append({
+ 'format_id': 'progressive-%dp' % height,
+ 'url': progressive_url,
+ 'height': height,
+ 'width': width,
+ 'source_preference': 1,
+ })
+
+ title = video_data['title']
+
+ audio_url = video_data.get('audio', {}).get('progressiveUrl')
+ if audio_url:
+ formats.append({
+ 'abr': 64,
+ 'ext': 'm4a',
+ 'format_id': 'audio',
+ 'url': audio_url,
+ 'vcodec': 'none',
+ })
+
+ streaming_url = video_url_data.get('streamingUrl')
+ if streaming_url:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_slug, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
+
+ return {
+ 'id': self._get_video_id(video_data, course_slug, video_slug),
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('defaultThumbnail'),
+ 'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
+ 'duration': int_or_none(video_data.get('durationInSeconds')),
+ }
+
+
+class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
+ IE_NAME = 'linkedin:learning:course'
+ _VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
+ _TEST = {
+ 'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
+ 'info_dict': {
+ 'id': 'programming-foundations-fundamentals',
+ 'title': 'Programming Foundations: Fundamentals',
+ 'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
+ },
+ 'playlist_mincount': 61,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_slug = self._match_id(url)
+ course_data = self._call_api(course_slug, 'chapters,description,title')
+
+ entries = []
+ for chapter_number, chapter in enumerate(course_data.get('chapters', []), 1):
+ chapter_title = chapter.get('title')
+ chapter_id = self._get_urn_id(chapter)
+ for video in chapter.get('videos', []):
+ video_slug = video.get('slug')
+ if not video_slug:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'id': self._get_video_id(video, course_slug, video_slug),
+ 'title': video.get('title'),
+ 'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_number,
+ 'chapter_id': chapter_id,
+ 'ie_key': LinkedInLearningIE.ie_key(),
+ })
+
+ return self.playlist_result(
+ entries, course_slug,
+ course_data.get('title'),
+ course_data.get('description'))
diff --git a/youtube_dl/extractor/linuxacademy.py b/youtube_dl/extractor/linuxacademy.py
new file mode 100644
index 000000000..23ca965d9
--- /dev/null
+++ b/youtube_dl/extractor/linuxacademy.py
@@ -0,0 +1,173 @@
+from __future__ import unicode_literals
+
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_HTTPError,
+)
+from ..utils import (
+ ExtractorError,
+ orderedSet,
+ unescapeHTML,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinuxAcademyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?linuxacademy\.com/cp/
+ (?:
+ courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
+ modules/view/id/(?P<course_id>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
+ 'info_dict': {
+ 'id': '1498-2',
+ 'ext': 'mp4',
+ 'title': "Introduction to the Practitioner's Brief",
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Linux Academy account credentials',
+ }, {
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://linuxacademy.com/cp/modules/view/id/154',
+ 'info_dict': {
+ 'id': '154',
+ 'title': 'AWS Certified Cloud Practitioner',
+ 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
+ },
+ 'playlist_count': 41,
+ 'skip': 'Requires Linux Academy account credentials',
+ }]
+
+ _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
+ _ORIGIN_URL = 'https://linuxacademy.com'
+ _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
+ _NETRC_MACHINE = 'linuxacademy'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def random_string():
+ return ''.join([
+ random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
+ for _ in range(32)])
+
+ webpage, urlh = self._download_webpage_handle(
+ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
+ 'client_id': self._CLIENT_ID,
+ 'response_type': 'token id_token',
+ 'redirect_uri': self._ORIGIN_URL,
+ 'scope': 'openid email user_impersonation profile',
+ 'audience': self._ORIGIN_URL,
+ 'state': random_string(),
+ 'nonce': random_string(),
+ })
+
+ login_data = self._parse_json(
+ self._search_regex(
+ r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'login info', group='value'), None,
+ transform_source=lambda x: compat_b64decode(x).decode('utf-8')
+ )['extraParams']
+
+ login_data.update({
+ 'client_id': self._CLIENT_ID,
+ 'redirect_uri': self._ORIGIN_URL,
+ 'tenant': 'lacausers',
+ 'connection': 'Username-Password-Authentication',
+ 'username': username,
+ 'password': password,
+ 'sso': 'true',
+ })
+
+ login_state_url = urlh.geturl()
+
+ try:
+ login_page = self._download_webpage(
+ 'https://login.linuxacademy.com/usernamepassword/login', None,
+ 'Downloading login page', data=json.dumps(login_data).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read(), None)
+ message = error.get('description') or error['code']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+ raise
+
+ callback_page, urlh = self._download_webpage_handle(
+ 'https://login.linuxacademy.com/login/callback', None,
+ 'Downloading callback page',
+ data=urlencode_postdata(self._hidden_inputs(login_page)),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+
+ access_token = self._search_regex(
+ r'access_token=([^=&]+)', urlh.geturl(),
+ 'access token')
+
+ self._download_webpage(
+ 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
+ % access_token, None, 'Downloading token validation page')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
+ item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
+
+ webpage = self._download_webpage(url, item_id)
+
+ # course path
+ if course_id:
+ entries = [
+ self.url_result(
+ urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
+ for lesson_url in orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
+ webpage))]
+ title = unescapeHTML(self._html_search_regex(
+ (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
+ r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'title', default=None, group='value'))
+ description = unescapeHTML(self._html_search_regex(
+ r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'description', default=None, group='value'))
+ return self.playlist_result(entries, course_id, title, description)
+
+ # single video path
+ info = self._extract_jwplayer_data(
+ webpage, item_id, require_title=False, m3u8_id='hls',)
+ title = self._search_regex(
+ (r'>Lecture\s*:\s*(?P<value>[^<]+)',
+ r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ info.update({
+ 'id': item_id,
+ 'title': title,
+ })
+ return info
diff --git a/youtube_dl/extractor/livejournal.py b/youtube_dl/extractor/livejournal.py
new file mode 100644
index 000000000..3a9f4553f
--- /dev/null
+++ b/youtube_dl/extractor/livejournal.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import int_or_none
+
+
+class LiveJournalIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^.]+\.)?livejournal\.com/video/album/\d+.+?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://andrei-bt.livejournal.com/video/album/407/?mode=view&id=51272',
+ 'md5': 'adaf018388572ced8a6f301ace49d4b2',
+ 'info_dict': {
+ 'id': '1263729',
+ 'ext': 'mp4',
+ 'title': 'Истребители против БПЛА',
+ 'upload_date': '20190624',
+ 'timestamp': 1561406715,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ record = self._parse_json(self._search_regex(
+ r'Site\.page\s*=\s*({.+?});', webpage,
+ 'page data'), video_id)['video']['record']
+ storage_id = compat_str(record['storageid'])
+ title = record.get('name')
+ if title:
+ # remove filename extension(.mp4, .mov, etc...)
+ title = title.rsplit('.', 1)[0]
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': record.get('thumbnail'),
+ 'timestamp': int_or_none(record.get('timecreate')),
+ 'url': 'eagleplatform:vc.videos.livejournal.com:' + storage_id,
+ 'ie_key': 'EaglePlatform',
+ }
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py
index 26671753c..4ac437c8b 100644
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dl/extractor/liveleak.py
@@ -44,7 +44,7 @@ class LiveLeakIE(InfoExtractor):
},
'skip': 'Video is dead',
}, {
- # Covers https://github.com/rg3/youtube-dl/pull/5983
+ # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
# Multiple resolutions
'url': 'http://www.liveleak.com/view?i=801_1409392012',
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
@@ -57,7 +57,7 @@ class LiveLeakIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$'
}
}, {
- # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
+ # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
@@ -82,12 +82,16 @@ class LiveLeakIE(InfoExtractor):
}, {
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
'only_matching': True,
+ }, {
+ # No original video
+ 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
+ 'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
- r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[if]=[\w_]+[^"]+)"',
+ r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
webpage)
def _real_extract(self, url):
@@ -120,13 +124,29 @@ class LiveLeakIE(InfoExtractor):
}
for idx, info_dict in enumerate(entries):
+ formats = []
for a_format in info_dict['formats']:
if not a_format.get('height'):
a_format['height'] = int_or_none(self._search_regex(
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
default=None))
-
- self._sort_formats(info_dict['formats'])
+ formats.append(a_format)
+
+ # Removing '.*.mp4' gives the raw video, which is essentially
+ # the same video without the LiveLeak logo at the top (see
+ # https://github.com/ytdl-org/youtube-dl/pull/4768)
+ orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
+ if a_format['url'] != orig_url:
+ format_id = a_format.get('format_id')
+ format_id = 'original' + ('-' + format_id if format_id else '')
+ if self._is_valid_url(orig_url, video_id, format_id):
+ formats.append({
+ 'format_id': format_id,
+ 'url': orig_url,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
# Don't append entry ID for one-video pages to keep backward compatibility
if len(entries) > 1:
@@ -146,7 +166,7 @@ class LiveLeakIE(InfoExtractor):
class LiveLeakEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[if])=(?P<id>[\w_]+)'
+ _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
# See generic.py for actual test cases
_TESTS = [{
@@ -158,15 +178,14 @@ class LiveLeakEmbedIE(InfoExtractor):
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- kind, video_id = mobj.group('kind', 'id')
+ kind, video_id = re.match(self._VALID_URL, url).groups()
if kind == 'f':
webpage = self._download_webpage(url, video_id)
liveleak_url = self._search_regex(
- r'logourl\s*:\s*(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
+ r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
webpage, 'LiveLeak URL', group='url')
- elif kind == 'i':
- liveleak_url = 'http://www.liveleak.com/view?i=%s' % video_id
+ else:
+ liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index c4776bbf3..e55b1a202 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -363,7 +363,4 @@ class LivestreamShortenerIE(InfoExtractor):
id = mobj.group('id')
webpage = self._download_webpage(url, id)
- return {
- '_type': 'url',
- 'url': self._og_search_url(webpage),
- }
+ return self.url_result(self._og_search_url(webpage))
diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py
index cfec0d3d0..3e71852aa 100644
--- a/youtube_dl/extractor/lnkgo.py
+++ b/youtube_dl/extractor/lnkgo.py
@@ -5,24 +5,27 @@ import re
from .common import InfoExtractor
from ..utils import (
+ clean_html,
+ compat_str,
int_or_none,
- unified_strdate,
+ parse_iso8601,
)
class LnkGoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
+ _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
_TESTS = [{
- 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
+ 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
'info_dict': {
- 'id': '46712',
+ 'id': '10809',
'ext': 'mp4',
- 'title': 'Yra kaip yra',
- 'upload_date': '20150107',
- 'description': 'md5:d82a5e36b775b7048617f263a0e3475e',
- 'age_limit': 7,
- 'duration': 3019,
- 'thumbnail': r're:^https?://.*\.jpg$'
+ 'title': "Put'ka: Trys Klausimai",
+ 'upload_date': '20161216',
+ 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
+ 'age_limit': 18,
+ 'duration': 117,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1481904000,
},
'params': {
'skip_download': True, # HLS download
@@ -30,20 +33,21 @@ class LnkGoIE(InfoExtractor):
}, {
'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
'info_dict': {
- 'id': '47289',
+ 'id': '10467',
'ext': 'mp4',
'title': 'Nėrdas: Kompiuterio Valymas',
'upload_date': '20150113',
'description': 'md5:7352d113a242a808676ff17e69db6a69',
'age_limit': 18,
'duration': 346,
- 'thumbnail': r're:^https?://.*\.jpg$'
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421164800,
},
'params': {
'skip_download': True, # HLS download
},
}, {
- 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
+ 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
'only_matching': True,
}]
_AGE_LIMITS = {
@@ -51,66 +55,34 @@ class LnkGoIE(InfoExtractor):
'N-14': 14,
'S': 18,
}
+ _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, display_id, 'Downloading player webpage')
-
- video_id = self._search_regex(
- r'data-ep="([^"]+)"', webpage, 'video ID')
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- upload_date = unified_strdate(self._search_regex(
- r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
-
- thumbnail_w = int_or_none(
- self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
- thumbnail_h = int_or_none(
- self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False))
- thumbnail = {
- 'url': self._og_search_thumbnail(webpage),
- }
- if thumbnail_w and thumbnail_h:
- thumbnail.update({
- 'width': thumbnail_w,
- 'height': thumbnail_h,
- })
-
- config = self._parse_json(self._search_regex(
- r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
-
- if config.get('pGeo'):
- self.report_warning(
- 'This content might not be available in your country due to copyright reasons')
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
- formats = [{
- 'format_id': 'hls',
- 'ext': 'mp4',
- 'url': config['EpisodeVideoLink_HLS'],
- }]
-
- m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
- if m:
- formats.append({
- 'format_id': 'rtmp',
- 'ext': 'flv',
- 'url': m.group('url'),
- 'play_path': m.group('play_path'),
- 'page_url': url,
- })
+ video_info = self._download_json(
+ 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
+ display_id)['videoConfig']['videoInfo']
+ video_id = compat_str(video_info['id'])
+ title = video_info['title']
+ prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
+ formats = self._extract_m3u8_formats(
+ self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
+ video_id, 'mp4', 'm3u8_native')
self._sort_formats(formats)
+ poster_image = video_info.get('posterImage')
+
return {
'id': video_id,
'display_id': display_id,
'title': title,
'formats': formats,
- 'thumbnails': [thumbnail],
- 'duration': int_or_none(config.get('VideoTime')),
- 'description': description,
- 'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
- 'upload_date': upload_date,
+ 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None,
+ 'duration': int_or_none(video_info.get('duration')),
+ 'description': clean_html(video_info.get('htmlDescription')),
+ 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
+ 'timestamp': parse_iso8601(video_info.get('airDate')),
+ 'view_count': int_or_none(video_info.get('viewsCount')),
}
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 4ba61cd8a..b3d8653d0 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -15,7 +15,7 @@ from ..utils import (
class LyndaBaseIE(InfoExtractor):
- _SIGNIN_URL = 'https://www.lynda.com/signin'
+ _SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
_USER_URL = 'https://www.lynda.com/signin/user'
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
@@ -117,6 +117,10 @@ class LyndaIE(LyndaBaseIE):
}, {
'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
'only_matching': True,
+ }, {
+ # Status="NotFound", Message="Transcript not found"
+ 'url': 'https://www.lynda.com/ASP-NET-tutorials/What-you-should-know/5034180/2811512-4.html',
+ 'only_matching': True,
}]
def _raise_unavailable(self, video_id):
@@ -247,12 +251,17 @@ class LyndaIE(LyndaBaseIE):
def _get_subtitles(self, video_id):
url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
- subs = self._download_json(url, None, False)
+ subs = self._download_webpage(
+ url, video_id, 'Downloading subtitles JSON', fatal=False)
+ if not subs or 'Status="NotFound"' in subs:
+ return {}
+ subs = self._parse_json(subs, video_id, fatal=False)
+ if not subs:
+ return {}
fixed_subs = self._fix_subtitles(subs)
if fixed_subs:
return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
- else:
- return {}
+ return {}
class LyndaCourseIE(LyndaBaseIE):
diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py
deleted file mode 100644
index 43db9929c..000000000
--- a/youtube_dl/extractor/macgamestore.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class MacGameStoreIE(InfoExtractor):
- IE_NAME = 'macgamestore'
- IE_DESC = 'MacGameStore trailers'
- _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
- 'md5': '8649b8ea684b6666b4c5be736ecddc61',
- 'info_dict': {
- 'id': '2450',
- 'ext': 'm4v',
- 'title': 'Crow',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- url, video_id, 'Downloading trailer page')
-
- if '>Missing Media<' in webpage:
- raise ExtractorError(
- 'Trailer %s does not exist' % video_id, expected=True)
-
- video_title = self._html_search_regex(
- r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
-
- video_url = self._html_search_regex(
- r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
- webpage, 'video URL')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': video_title
- }
diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py
index 6b0e64b7f..65cc474db 100644
--- a/youtube_dl/extractor/mailru.py
+++ b/youtube_dl/extractor/mailru.py
@@ -20,10 +20,10 @@ class MailRuIE(InfoExtractor):
IE_DESC = 'Видео@Mail.Ru'
_VALID_URL = r'''(?x)
https?://
- (?:(?:www|m)\.)?my\.mail\.ru/
+ (?:(?:www|m)\.)?my\.mail\.ru/+
(?:
video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
- (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
+ (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
(?:video/embed|\+/video/meta)/(?P<metaid>\d+)
)
'''
@@ -85,6 +85,14 @@ class MailRuIE(InfoExtractor):
{
'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
}
]
@@ -120,6 +128,12 @@ class MailRuIE(InfoExtractor):
'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
video_id, 'Downloading video JSON')
+ headers = {}
+
+ video_key = self._get_cookies('https://my.mail.ru').get('video_key')
+ if video_key:
+ headers['Cookie'] = 'video_key=%s' % video_key.value
+
formats = []
for f in video_data['videos']:
video_url = f.get('url')
@@ -132,6 +146,7 @@ class MailRuIE(InfoExtractor):
'url': video_url,
'format_id': format_id,
'height': height,
+ 'http_headers': headers,
})
self._sort_formats(formats)
@@ -237,7 +252,7 @@ class MailRuMusicSearchBaseIE(InfoExtractor):
class MailRuMusicIE(MailRuMusicSearchBaseIE):
IE_NAME = 'mailru:music'
IE_DESC = 'Музыка@Mail.Ru'
- _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)'
_TESTS = [{
'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
'md5': '0f8c22ef8c5d665b13ac709e63025610',
@@ -273,7 +288,7 @@ class MailRuMusicIE(MailRuMusicSearchBaseIE):
class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
IE_NAME = 'mailru:music:search'
IE_DESC = 'Музыка@Mail.Ru'
- _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://my.mail.ru/music/search/black%20shadow',
'info_dict': {
diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py
deleted file mode 100644
index 8eda69cfc..000000000
--- a/youtube_dl/extractor/makertv.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class MakerTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
- _TEST = {
- 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
- 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
- 'info_dict': {
- 'id': 'Fh3QgymL9gsc',
- 'ext': 'mp4',
- 'title': 'Maze Runner: The Scorch Trials Official Movie Review',
- 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
- 'upload_date': '20150918',
- 'timestamp': 1442549540,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': 'jwplatform:%s' % jwplatform_id,
- 'ie_key': 'JWPlatform',
- }
diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py
new file mode 100644
index 000000000..6f4fd927f
--- /dev/null
+++ b/youtube_dl/extractor/malltv.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import merge_dicts
+
+
+class MallTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'md5': '1c4a37f080e1f3023103a7b43458e518',
+ 'info_dict': {
+ 'id': 't0zzt0',
+ 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'ext': 'mp4',
+ 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
+ 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
+ 'duration': 216,
+ 'timestamp': 1538870400,
+ 'upload_date': '20181007',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, display_id, headers=self.geo_verification_headers())
+
+ SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
+ video_id = self._search_regex(
+ SOURCE_RE, webpage, 'video id', group='id')
+
+ media = self._parse_html5_media_entries(
+ url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
+ m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts(media, info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage, default=None) or display_id,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ })
diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py
index 482175a34..acee370e9 100644
--- a/youtube_dl/extractor/mangomolo.py
+++ b/youtube_dl/extractor/mangomolo.py
@@ -10,18 +10,21 @@ from ..utils import int_or_none
class MangomoloBaseIE(InfoExtractor):
+ _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+
def _get_real_id(self, page_id):
return page_id
def _real_extract(self, url):
page_id = self._get_real_id(self._match_id(url))
- webpage = self._download_webpage(url, page_id)
+ webpage = self._download_webpage(
+ 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
hidden_inputs = self._hidden_inputs(webpage)
m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
format_url = self._html_search_regex(
[
- r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
+ r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
r'<a[^>]+href="(rtsp://[^"]+)"'
], webpage, 'format url')
formats = self._extract_wowza_formats(
@@ -39,14 +42,16 @@ class MangomoloBaseIE(InfoExtractor):
class MangomoloVideoIE(MangomoloBaseIE):
- IE_NAME = 'mangomolo:video'
- _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
+ _TYPE = 'video'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
_IS_LIVE = False
class MangomoloLiveIE(MangomoloBaseIE):
- IE_NAME = 'mangomolo:live'
- _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+ _TYPE = 'live'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
_IS_LIVE = True
def _get_real_id(self, page_id):
diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dl/extractor/manyvids.py
index b94b3c2ab..e8d7163e4 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dl/extractor/manyvids.py
@@ -2,12 +2,18 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ str_to_int,
+ urlencode_postdata,
+)
class ManyVidsIE(InfoExtractor):
_VALID_URL = r'(?i)https?://(?:www\.)?manyvids\.com/video/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
+ # preview video
'url': 'https://www.manyvids.com/Video/133957/everthing-about-me/',
'md5': '03f11bb21c52dd12a05be21a5c7dcc97',
'info_dict': {
@@ -17,7 +23,18 @@ class ManyVidsIE(InfoExtractor):
'view_count': int,
'like_count': int,
},
- }
+ }, {
+ # full video
+ 'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
+ 'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
+ 'info_dict': {
+ 'id': '935718',
+ 'ext': 'mp4',
+ 'title': 'MY FACE REVEAL',
+ 'view_count': int,
+ 'like_count': int,
+ },
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -28,12 +45,41 @@ class ManyVidsIE(InfoExtractor):
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'video URL', group='url')
- title = '%s (Preview)' % self._html_search_regex(
- r'<h2[^>]+class="m-a-0"[^>]*>([^<]+)', webpage, 'title')
+ title = self._html_search_regex(
+ (r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
+ r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
+ webpage, 'title', default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', fatal=True)
+
+ if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
+ title += ' (Preview)'
+
+ mv_token = self._search_regex(
+ r'data-mvtoken=(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'mv token', default=None, group='value')
+
+ if mv_token:
+ # Sets some cookies
+ self._download_webpage(
+ 'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
+ video_id, fatal=False, data=urlencode_postdata({
+ 'mvtoken': mv_token,
+ 'vid': video_id,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest'
+ })
+
+ if determine_ext(video_url) == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ else:
+ formats = [{'url': video_url}]
like_count = int_or_none(self._search_regex(
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
- view_count = int_or_none(self._html_search_regex(
+ view_count = str_to_int(self._html_search_regex(
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
'view count', default=None))
@@ -42,7 +88,5 @@ class ManyVidsIE(InfoExtractor):
'title': title,
'view_count': view_count,
'like_count': like_count,
- 'formats': [{
- 'url': video_url,
- }],
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py
index 57f97409d..933df1495 100644
--- a/youtube_dl/extractor/mediaset.py
+++ b/youtube_dl/extractor/mediaset.py
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re
from .theplatform import ThePlatformBaseIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
ExtractorError,
int_or_none,
@@ -22,7 +26,7 @@ class MediasetIE(ThePlatformBaseIE):
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
player/index\.html\?.*?\bprogramGuid=
)
- )(?P<id>[0-9A-Z]{16})
+ )(?P<id>[0-9A-Z]{16,})
'''
_TESTS = [{
# full episode
@@ -57,7 +61,6 @@ class MediasetIE(ThePlatformBaseIE):
'uploader': 'Canale 5',
'uploader_id': 'C5',
},
- 'expected_warnings': ['HTTP Error 403: Forbidden'],
}, {
# clip
'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
@@ -73,15 +76,53 @@ class MediasetIE(ThePlatformBaseIE):
}, {
'url': 'mediaset:FAFU000000665924',
'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
+ 'only_matching': True,
}]
@staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
- webpage)]
+ def _extract_urls(ie, webpage):
+ def _qs(url):
+ return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+ def _program_guid(qs):
+ return qs.get('programGuid', [None])[0]
+
+ entries = []
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
+ webpage):
+ embed_url = mobj.group('url')
+ embed_qs = _qs(embed_url)
+ program_guid = _program_guid(embed_qs)
+ if program_guid:
+ entries.append(embed_url)
+ continue
+ video_id = embed_qs.get('id', [None])[0]
+ if not video_id:
+ continue
+ urlh = ie._request_webpage(
+ embed_url, video_id, note='Following embed URL redirect')
+ embed_url = urlh.geturl()
+ program_guid = _program_guid(_qs(embed_url))
+ if program_guid:
+ entries.append(embed_url)
+ return entries
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ for video in smil.findall(self._xpath_ns('.//video', namespace)):
+ video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
+ return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
def _real_extract(self, url):
guid = self._match_id(url)
@@ -92,14 +133,15 @@ class MediasetIE(ThePlatformBaseIE):
subtitles = {}
first_e = None
for asset_type in ('SD', 'HD'):
- for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
+ # TODO: fixup ISM+none manifest URLs
+ for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
try:
tp_formats, tp_subtitles = self._extract_theplatform_smil(
update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
'mbr': 'true',
'formats': f,
'assetTypes': asset_type,
- }), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
+ }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
except ExtractorError as e:
if not first_e:
first_e = e
diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py
index 84876b883..d6eb15740 100644
--- a/youtube_dl/extractor/mediasite.py
+++ b/youtube_dl/extractor/mediasite.py
@@ -13,6 +13,8 @@ from ..utils import (
ExtractorError,
float_or_none,
mimetype2ext,
+ str_or_none,
+ try_get,
unescapeHTML,
unsmuggle_url,
url_or_none,
@@ -20,8 +22,11 @@ from ..utils import (
)
+_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
+
+
class MediasiteIE(InfoExtractor):
- _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
_TESTS = [
{
'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
@@ -84,6 +89,19 @@ class MediasiteIE(InfoExtractor):
'timestamp': 1333983600,
'duration': 7794,
}
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
+ 'only_matching': True,
+ },
+ {
+ # dashed id
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
+ 'only_matching': True,
}
]
@@ -101,7 +119,7 @@ class MediasiteIE(InfoExtractor):
return [
unescapeHTML(mobj.group('url'))
for mobj in re.finditer(
- r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/[0-9a-f]{32,34}(?:\?.*?)?)\1',
+ r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
webpage)]
def _real_extract(self, url):
@@ -111,7 +129,7 @@ class MediasiteIE(InfoExtractor):
query = mobj.group('query')
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
- redirect_url = compat_str(urlh.geturl())
+ redirect_url = urlh.geturl()
# XXX: might have also extracted UrlReferrer and QueryString from the html
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
@@ -213,3 +231,136 @@ class MediasiteIE(InfoExtractor):
'formats': formats,
'thumbnails': thumbnails,
}
+
+
+class MediasiteCatalogIE(InfoExtractor):
+ _VALID_URL = r'''(?xi)
+ (?P<url>https?://[^/]+/Mediasite)
+ /Catalog/Full/
+ (?P<catalog_id>{0})
+ (?:
+ /(?P<current_folder_id>{0})
+ /(?P<root_dynamic_folder_id>{0})
+ )?
+ '''.format(_ID_RE)
+ _TESTS = [{
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',
+ 'info_dict': {
+ 'id': '631f9e48530d454381549f955d08c75e21',
+ 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',
+ },
+ 'playlist_count': 6,
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with CurrentFolderId and RootDynamicFolderId
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'info_dict': {
+ 'id': '9518c4a6c5cf4993b21cbd53e828a92521',
+ 'title': 'IUSM Family and Friends Sessions',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',
+ 'only_matching': True,
+ }, {
+ # no AntiForgeryToken
+ 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'only_matching': True,
+ }, {
+ # dashed id
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_id = mobj.group('catalog_id')
+ current_folder_id = mobj.group('current_folder_id') or catalog_id
+ root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')
+
+ webpage = self._download_webpage(url, catalog_id)
+
+ # AntiForgeryToken is optional (e.g. [1])
+ # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21
+ anti_forgery_token = self._search_regex(
+ r'AntiForgeryToken\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery token', default=None, group='value')
+ if anti_forgery_token:
+ anti_forgery_header = self._search_regex(
+ r'AntiForgeryHeaderName\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery header name',
+ default='X-SOFO-AntiForgeryHeader', group='value')
+
+ data = {
+ 'IsViewPage': True,
+ 'IsNewFolder': True,
+ 'AuthTicket': None,
+ 'CatalogId': catalog_id,
+ 'CurrentFolderId': current_folder_id,
+ 'RootDynamicFolderId': root_dynamic_folder_id,
+ 'ItemsPerPage': 1000,
+ 'PageIndex': 0,
+ 'PermissionMask': 'Execute',
+ 'CatalogSearchType': 'SearchInFolder',
+ 'SortBy': 'Date',
+ 'SortDirection': 'Descending',
+ 'StartDate': None,
+ 'EndDate': None,
+ 'StatusFilterList': None,
+ 'PreviewKey': None,
+ 'Tags': [],
+ }
+
+ headers = {
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ if anti_forgery_token:
+ headers[anti_forgery_header] = anti_forgery_token
+
+ catalog = self._download_json(
+ '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url,
+ catalog_id, data=json.dumps(data).encode(), headers=headers)
+
+ entries = []
+ for video in catalog['PresentationDetailsList']:
+ if not isinstance(video, dict):
+ continue
+ video_id = str_or_none(video.get('Id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ '%s/Play/%s' % (mediasite_url, video_id),
+ ie=MediasiteIE.ie_key(), video_id=video_id))
+
+ title = try_get(
+ catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
+
+ return self.playlist_result(entries, catalog_id, title,)
+
+
+class MediasiteNamedCatalogIE(InfoExtractor):
+ _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_name = mobj.group('catalog_name')
+
+ webpage = self._download_webpage(url, catalog_name)
+
+ catalog_id = self._search_regex(
+ r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
+
+ return self.url_result(
+ '%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
+ ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index 28f59f63c..9e92416d1 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -1,12 +1,13 @@
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
from ..compat import (
compat_parse_qs,
+ compat_urllib_parse,
compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
)
from ..utils import (
determine_ext,
@@ -144,7 +145,7 @@ class MetacafeIE(InfoExtractor):
headers = {
# Disable family filter
- 'Cookie': 'user=%s; ' % compat_urllib_parse_urlencode({'ffilter': False})
+ 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
}
# AnyClip videos require the flashversion cookie so that we get the link
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py
index d53d96aae..71fc3ec56 100644
--- a/youtube_dl/extractor/mgtv.py
+++ b/youtube_dl/extractor/mgtv.py
@@ -1,22 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
+import base64
+import time
+import uuid
+
from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import int_or_none
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
+ _GEO_COUNTRIES = ['CN']
_TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
- 'md5': 'b1ffc0fc163152acf6beaa81832c9ee7',
'info_dict': {
'id': '3116640',
'ext': 'mp4',
- 'title': '我是歌手第四季双年巅峰会:韩红李玟“双王”领军对抗',
+ 'title': '我是歌手 第四季',
'description': '我是歌手第四季双年巅峰会',
'duration': 7461,
'thumbnail': r're:^https?://.*\.jpg$',
@@ -28,16 +38,30 @@ class MGTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- api_data = self._download_json(
- 'http://pcweb.api.mgtv.com/player/video', video_id,
- query={'video_id': video_id},
- headers=self.geo_verification_headers())['data']
+ try:
+ api_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/video', video_id, query={
+ 'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
+ 'video_id': video_id,
+ }, headers=self.geo_verification_headers())['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)
+ if error.get('code') == 40005:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError(error['msg'], expected=True)
+ raise
info = api_data['info']
title = info['title'].strip()
- stream_domain = api_data['stream_domain'][0]
+ stream_data = self._download_json(
+ 'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
+ 'pm2': api_data['atc']['pm2'],
+ 'video_id': video_id,
+ }, headers=self.geo_verification_headers())['data']
+ stream_domain = stream_data['stream_domain'][0]
formats = []
- for idx, stream in enumerate(api_data['stream']):
+ for idx, stream in enumerate(stream_data['stream']):
stream_path = stream.get('url')
if not stream_path:
continue
@@ -47,7 +71,7 @@ class MGTVIE(InfoExtractor):
format_url = format_data.get('info')
if not format_url:
continue
- tbr = int_or_none(self._search_regex(
+ tbr = int_or_none(stream.get('filebitrate') or self._search_regex(
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({
'format_id': compat_str(tbr or idx),
@@ -55,6 +79,10 @@ class MGTVIE(InfoExtractor):
'ext': 'mp4',
'tbr': tbr,
'protocol': 'm3u8_native',
+ 'http_headers': {
+ 'Referer': url,
+ },
+ 'format_note': stream.get('name'),
})
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py
deleted file mode 100644
index dccc54249..000000000
--- a/youtube_dl/extractor/minhateca.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_filesize,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class MinhatecaIE(InfoExtractor):
- _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
- _TEST = {
- 'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
- 'info_dict': {
- 'id': '125848331',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'filesize_approx': 1530000,
- 'duration': 9,
- 'view_count': int,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- token = self._html_search_regex(
- r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
- webpage, 'request token')
- token_data = [
- ('fileId', video_id),
- ('__RequestVerificationToken', token),
- ]
- req = sanitized_Request(
- 'http://minhateca.com.br/action/License/Download',
- data=urlencode_postdata(token_data))
- req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- data = self._download_json(
- req, video_id, note='Downloading metadata')
-
- video_url = data['redirectUrl']
- title_str = self._html_search_regex(
- r'<h1.*?>(.*?)</h1>', webpage, 'title')
- title, _, ext = title_str.rpartition('.')
- filesize_approx = parse_filesize(self._html_search_regex(
- r'<p class="fileSize">(.*?)</p>',
- webpage, 'file size approximation', fatal=False))
- duration = parse_duration(self._html_search_regex(
- r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
- webpage, 'duration', fatal=False))
- view_count = int_or_none(self._html_search_regex(
- r'<p class="downloadsCounter">([0-9]+)</p>',
- webpage, 'view count', fatal=False))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'ext': ext,
- 'filesize_approx': filesize_approx,
- 'duration': duration,
- 'view_count': view_count,
- 'thumbnail': self._og_search_thumbnail(webpage),
- }
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
index 1aea78d11..e1506a745 100644
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -65,30 +65,6 @@ class TechTVMITIE(InfoExtractor):
}
-class MITIE(TechTVMITIE):
- IE_NAME = 'video.mit.edu'
- _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
-
- _TEST = {
- 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
- 'md5': '7db01d5ccc1895fc5010e9c9e13648da',
- 'info_dict': {
- 'id': '21783',
- 'ext': 'mp4',
- 'title': 'The Government is Profiling You',
- 'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
- },
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_title = mobj.group('title')
- webpage = self._download_webpage(url, page_title)
- embed_url = self._search_regex(
- r'<iframe .*?src="(.+?)"', webpage, 'embed url')
- return self.url_result(embed_url)
-
-
class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu'
_VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
index 40f214a87..ad9da9612 100644
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -4,8 +4,8 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ parse_iso8601,
smuggle_url,
- parse_duration,
)
@@ -18,16 +18,18 @@ class MiTeleIE(InfoExtractor):
'info_dict': {
'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
'ext': 'mp4',
- 'title': 'Tor, la web invisible',
- 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+ 'title': 'Diario de La redacción Programa 144',
+ 'description': 'md5:07c35a7b11abb05876a6a79185b58d27',
'series': 'Diario de',
- 'season': 'La redacción',
+ 'season': 'Season 14',
'season_number': 14,
- 'season_id': 'diario_de_t14_11981',
- 'episode': 'Programa 144',
+ 'episode': 'Tor, la web invisible',
'episode_number': 3,
'thumbnail': r're:(?i)^https?://.*\.jpg$',
'duration': 2913,
+ 'age_limit': 16,
+ 'timestamp': 1471209401,
+ 'upload_date': '20160814',
},
'add_ie': ['Ooyala'],
}, {
@@ -39,13 +41,15 @@ class MiTeleIE(InfoExtractor):
'title': 'Cuarto Milenio Temporada 6 Programa 226',
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio',
- 'season': 'Temporada 6',
+ 'season': 'Season 6',
'season_number': 6,
- 'season_id': 'cuarto_milenio_t06_12715',
- 'episode': 'Programa 226',
+ 'episode': 'Episode 24',
'episode_number': 24,
'thumbnail': r're:(?i)^https?://.*\.jpg$',
'duration': 7313,
+ 'age_limit': 12,
+ 'timestamp': 1471209021,
+ 'upload_date': '20160814',
},
'params': {
'skip_download': True,
@@ -54,67 +58,36 @@ class MiTeleIE(InfoExtractor):
}, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True,
+ }, {
+ 'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- paths = self._download_json(
- 'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
- video_id, 'Downloading paths JSON')
-
- ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
- base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
- full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
- source = self._download_json(
- '%s://%s%s%s/docs/%s' % (
- ooyala_s.get('protocol', 'https'), base_url, full_path,
- ooyala_s.get('provider_id', '104951'), video_id),
- video_id, 'Downloading data JSON', query={
- 'include_titles': 'Series,Season',
- 'product_name': ooyala_s.get('product_name', 'test'),
- 'format': 'full',
- })['hits']['hits'][0]['_source']
-
- embedCode = source['offers'][0]['embed_codes'][0]
- titles = source['localizable_titles'][0]
-
- title = titles.get('title_medium') or titles['title_long']
-
- description = titles.get('summary_long') or titles.get('summary_medium')
-
- def get(key1, key2):
- value1 = source.get(key1)
- if not value1 or not isinstance(value1, list):
- return
- if not isinstance(value1[0], dict):
- return
- return value1[0].get(key2)
-
- series = get('localizable_titles_series', 'title_medium')
-
- season = get('localizable_titles_season', 'title_medium')
- season_number = int_or_none(source.get('season_number'))
- season_id = source.get('season_id')
-
- episode = titles.get('title_sort_name')
- episode_number = int_or_none(source.get('episode_number'))
-
- duration = parse_duration(get('videos', 'duration'))
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pre_player = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
+ webpage, 'Pre Player'), display_id)['prePlayer']
+ title = pre_player['title']
+ video = pre_player['video']
+ video_id = video['dataMediaId']
+ content = pre_player.get('content') or {}
+ info = content.get('info') or {}
return {
'_type': 'url_transparent',
# for some reason only HLS is supported
- 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}),
+ 'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
'id': video_id,
'title': title,
- 'description': description,
- 'series': series,
- 'season': season,
- 'season_number': season_number,
- 'season_id': season_id,
- 'episode': episode,
- 'episode_number': episode_number,
- 'duration': duration,
- 'thumbnail': get('images', 'url'),
+ 'description': info.get('synopsis'),
+ 'series': content.get('title'),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode': content.get('subtitle'),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'duration': int_or_none(info.get('duration')),
+ 'thumbnail': video.get('dataPoster'),
+ 'age_limit': int_or_none(info.get('rating')),
+ 'timestamp': parse_iso8601(pre_player.get('publishedTime')),
}
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index b7bccb504..9759560f1 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -1,6 +1,5 @@
from __future__ import unicode_literals
-import functools
import itertools
import re
@@ -11,28 +10,37 @@ from ..compat import (
compat_ord,
compat_str,
compat_urllib_parse_unquote,
- compat_urlparse,
compat_zip
)
from ..utils import (
- clean_html,
- ExtractorError,
int_or_none,
- OnDemandPagedList,
- str_to_int,
+ parse_iso8601,
+ strip_or_none,
try_get,
- urljoin,
)
-class MixcloudIE(InfoExtractor):
+class MixcloudBaseIE(InfoExtractor):
+ def _call_api(self, object_type, object_fields, display_id, username, slug=None):
+ lookup_key = object_type + 'Lookup'
+ return self._download_json(
+ 'https://www.mixcloud.com/graphql', display_id, query={
+ 'query': '''{
+ %s(lookup: {username: "%s"%s}) {
+ %s
+ }
+}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
+ })['data'][lookup_key]
+
+
+class MixcloudIE(MixcloudBaseIE):
_VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
IE_NAME = 'mixcloud'
_TESTS = [{
'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
'info_dict': {
- 'id': 'dholbach-cryptkeeper',
+ 'id': 'dholbach_cryptkeeper',
'ext': 'm4a',
'title': 'Cryptkeeper',
'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
@@ -40,11 +48,13 @@ class MixcloudIE(InfoExtractor):
'uploader_id': 'dholbach',
'thumbnail': r're:https?://.*\.jpg',
'view_count': int,
+ 'timestamp': 1321359578,
+ 'upload_date': '20111115',
},
}, {
'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
'info_dict': {
- 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
+ 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
'ext': 'mp3',
'title': 'Caribou 7 inch Vinyl Mix & Chat',
'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
@@ -52,11 +62,14 @@ class MixcloudIE(InfoExtractor):
'uploader_id': 'gillespeterson',
'thumbnail': 're:https?://.*',
'view_count': int,
+ 'timestamp': 1422987057,
+ 'upload_date': '20150203',
},
}, {
'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
'only_matching': True,
}]
+ _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
@staticmethod
def _decrypt_xor_cipher(key, ciphertext):
@@ -66,170 +79,193 @@ class MixcloudIE(InfoExtractor):
for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- uploader = mobj.group(1)
- cloudcast_name = mobj.group(2)
- track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
+ username, slug = re.match(self._VALID_URL, url).groups()
+ username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
+ track_id = '%s_%s' % (username, slug)
+
+ cloudcast = self._call_api('cloudcast', '''audioLength
+ comments(first: 100) {
+ edges {
+ node {
+ comment
+ created
+ user {
+ displayName
+ username
+ }
+ }
+ }
+ totalCount
+ }
+ description
+ favorites {
+ totalCount
+ }
+ featuringArtistList
+ isExclusive
+ name
+ owner {
+ displayName
+ url
+ username
+ }
+ picture(width: 1024, height: 1024) {
+ url
+ }
+ plays
+ publishDate
+ reposts {
+ totalCount
+ }
+ streamInfo {
+ dashUrl
+ hlsUrl
+ url
+ }
+ tags {
+ tag {
+ name
+ }
+ }''', track_id, username, slug)
- webpage = self._download_webpage(url, track_id)
+ title = cloudcast['name']
- # Legacy path
- encrypted_play_info = self._search_regex(
- r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
+ stream_info = cloudcast['streamInfo']
+ formats = []
- if encrypted_play_info is not None:
- # Decode
- encrypted_play_info = compat_b64decode(encrypted_play_info)
- else:
- # New path
- full_info_json = self._parse_json(self._html_search_regex(
- r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
- webpage, 'play info'), 'play info')
- for item in full_info_json:
- item_data = try_get(
- item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
- dict)
- if try_get(item_data, lambda x: x['streamInfo']['url']):
- info_json = item_data
- break
- else:
- raise ExtractorError('Failed to extract matching stream info')
-
- message = self._html_search_regex(
- r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
- webpage, 'error message', default=None)
-
- js_url = self._search_regex(
- r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
- webpage, 'js url')
- js = self._download_webpage(js_url, track_id, 'Downloading JS')
- # Known plaintext attack
- if encrypted_play_info:
- kps = ['{"stream_url":']
- kpa_target = encrypted_play_info
- else:
- kps = ['https://', 'http://']
- kpa_target = compat_b64decode(info_json['streamInfo']['url'])
- for kp in kps:
- partial_key = self._decrypt_xor_cipher(kpa_target, kp)
- for quote in ["'", '"']:
- key = self._search_regex(
- r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
- js, 'encryption key', default=None)
- if key is not None:
- break
+ for url_key in ('url', 'hlsUrl', 'dashUrl'):
+ format_url = stream_info.get(url_key)
+ if not format_url:
+ continue
+ decrypted = self._decrypt_xor_cipher(
+ self._DECRYPTION_KEY, compat_b64decode(format_url))
+ if url_key == 'hlsUrl':
+ formats.extend(self._extract_m3u8_formats(
+ decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif url_key == 'dashUrl':
+ formats.extend(self._extract_mpd_formats(
+ decrypted, track_id, mpd_id='dash', fatal=False))
else:
+ formats.append({
+ 'format_id': 'http',
+ 'url': decrypted,
+ 'downloader_options': {
+ # Mixcloud starts throttling at >~5M
+ 'http_chunk_size': 5242880,
+ },
+ })
+
+ if not formats and cloudcast.get('isExclusive'):
+ self.raise_login_required()
+
+ self._sort_formats(formats)
+
+ comments = []
+ for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
+ node = edge.get('node') or {}
+ text = strip_or_none(node.get('comment'))
+ if not text:
continue
- break
- else:
- raise ExtractorError('Failed to extract encryption key')
-
- if encrypted_play_info is not None:
- play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
- if message and 'stream_url' not in play_info:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
- song_url = play_info['stream_url']
- formats = [{
- 'format_id': 'normal',
- 'url': song_url
- }]
-
- title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
- thumbnail = self._proto_relative_url(self._html_search_regex(
- r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
- uploader = self._html_search_regex(
- r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
- uploader_id = self._search_regex(
- r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
- description = self._og_search_description(webpage)
- view_count = str_to_int(self._search_regex(
- [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
- r'/listeners/?">([0-9,.]+)</a>',
- r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
- webpage, 'play count', default=None))
+ user = node.get('user') or {}
+ comments.append({
+ 'author': user.get('displayName'),
+ 'author_id': user.get('username'),
+ 'text': text,
+ 'timestamp': parse_iso8601(node.get('created')),
+ })
- else:
- title = info_json['name']
- thumbnail = urljoin(
- 'https://thumbnailer.mixcloud.com/unsafe/600x600/',
- try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
- uploader = try_get(info_json, lambda x: x['owner']['displayName'])
- uploader_id = try_get(info_json, lambda x: x['owner']['username'])
- description = try_get(info_json, lambda x: x['description'])
- view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
-
- stream_info = info_json['streamInfo']
- formats = []
-
- for url_key in ('url', 'hlsUrl', 'dashUrl'):
- format_url = stream_info.get(url_key)
- if not format_url:
- continue
- decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
- if not decrypted:
- continue
- if url_key == 'hlsUrl':
- formats.extend(self._extract_m3u8_formats(
- decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif url_key == 'dashUrl':
- formats.extend(self._extract_mpd_formats(
- decrypted, track_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'format_id': 'http',
- 'url': decrypted,
- 'downloader_options': {
- # Mixcloud starts throttling at >~5M
- 'http_chunk_size': 5242880,
- },
- })
- self._sort_formats(formats)
+ tags = []
+ for t in cloudcast.get('tags'):
+ tag = try_get(t, lambda x: x['tag']['name'], compat_str)
+ if not tag:
+ tags.append(tag)
+
+ get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
+
+ owner = cloudcast.get('owner') or {}
return {
'id': track_id,
'title': title,
'formats': formats,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'view_count': view_count,
+ 'description': cloudcast.get('description'),
+ 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
+ 'uploader': owner.get('displayName'),
+ 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
+ 'uploader_id': owner.get('username'),
+ 'uploader_url': owner.get('url'),
+ 'duration': int_or_none(cloudcast.get('audioLength')),
+ 'view_count': int_or_none(cloudcast.get('plays')),
+ 'like_count': get_count('favorites'),
+ 'repost_count': get_count('reposts'),
+ 'comment_count': get_count('comments'),
+ 'comments': comments,
+ 'tags': tags,
+ 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
}
-class MixcloudPlaylistBaseIE(InfoExtractor):
- _PAGE_SIZE = 24
+class MixcloudPlaylistBaseIE(MixcloudBaseIE):
+ def _get_cloudcast(self, node):
+ return node
- def _find_urls_in_page(self, page):
- for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
- yield self.url_result(
- compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
- MixcloudIE.ie_key())
+ def _get_playlist_title(self, title, slug):
+ return title
+
+ def _real_extract(self, url):
+ username, slug = re.match(self._VALID_URL, url).groups()
+ username = compat_urllib_parse_unquote(username)
+ if not slug:
+ slug = 'uploads'
+ else:
+ slug = compat_urllib_parse_unquote(slug)
+ playlist_id = '%s_%s' % (username, slug)
- def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
- real_page_number = real_page_number or current_page + 1
- return self._download_webpage(
- 'https://www.mixcloud.com/%s/' % path, video_id,
- note='Download %s (page %d)' % (page_name, current_page + 1),
- errnote='Unable to download %s' % page_name,
- query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
- headers={'X-Requested-With': 'XMLHttpRequest'})
+ is_playlist_type = self._ROOT_TYPE == 'playlist'
+ playlist_type = 'items' if is_playlist_type else slug
+ list_filter = ''
- def _tracks_page_func(self, page, video_id, page_name, current_page):
- resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
+ has_next_page = True
+ entries = []
+ while has_next_page:
+ playlist = self._call_api(
+ self._ROOT_TYPE, '''%s
+ %s
+ %s(first: 100%s) {
+ edges {
+ node {
+ %s
+ }
+ }
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
+ playlist_id, username, slug if is_playlist_type else None)
+
+ items = playlist.get(playlist_type) or {}
+ for edge in items.get('edges', []):
+ cloudcast = self._get_cloudcast(edge.get('node') or {})
+ cloudcast_url = cloudcast.get('url')
+ if not cloudcast_url:
+ continue
+ entries.append(self.url_result(
+ cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
- for item in self._find_urls_in_page(resp):
- yield item
+ page_info = items['pageInfo']
+ has_next_page = page_info['hasNextPage']
+ list_filter = ', after: "%s"' % page_info['endCursor']
- def _get_user_description(self, page_content):
- return self._html_search_regex(
- r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
- page_content, 'user description', fatal=False)
+ return self.playlist_result(
+ entries, playlist_id,
+ self._get_playlist_title(playlist[self._TITLE_KEY], slug),
+ playlist.get(self._DESCRIPTION_KEY))
class MixcloudUserIE(MixcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
IE_NAME = 'mixcloud:user'
_TESTS = [{
@@ -237,68 +273,58 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
},
- 'playlist_mincount': 11,
+ 'playlist_mincount': 36,
}, {
'url': 'http://www.mixcloud.com/dholbach/uploads/',
'info_dict': {
'id': 'dholbach_uploads',
'title': 'Daniel Holbach (uploads)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
},
- 'playlist_mincount': 11,
+ 'playlist_mincount': 36,
}, {
'url': 'http://www.mixcloud.com/dholbach/favorites/',
'info_dict': {
'id': 'dholbach_favorites',
'title': 'Daniel Holbach (favorites)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
- },
- 'params': {
- 'playlist_items': '1-100',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
},
- 'playlist_mincount': 100,
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 396,
}, {
'url': 'http://www.mixcloud.com/dholbach/listens/',
'info_dict': {
'id': 'dholbach_listens',
'title': 'Daniel Holbach (listens)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
},
- 'params': {
- 'playlist_items': '1-100',
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 1623,
+ 'skip': 'Large list',
+ }, {
+ 'url': 'https://www.mixcloud.com/FirstEar/stream/',
+ 'info_dict': {
+ 'id': 'FirstEar_stream',
+ 'title': 'First Ear (stream)',
+ 'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
},
- 'playlist_mincount': 100,
+ 'playlist_mincount': 271,
}]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user_id = mobj.group('user')
- list_type = mobj.group('type')
-
- # if only a profile URL was supplied, default to download all uploads
- if list_type is None:
- list_type = 'uploads'
-
- video_id = '%s_%s' % (user_id, list_type)
+ _TITLE_KEY = 'displayName'
+ _DESCRIPTION_KEY = 'biog'
+ _ROOT_TYPE = 'user'
+ _NODE_TEMPLATE = '''slug
+ url'''
- profile = self._download_webpage(
- 'https://www.mixcloud.com/%s/' % user_id, video_id,
- note='Downloading user profile',
- errnote='Unable to download user profile')
-
- username = self._og_search_title(profile)
- description = self._get_user_description(profile)
-
- entries = OnDemandPagedList(
- functools.partial(
- self._tracks_page_func,
- '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
- self._PAGE_SIZE)
-
- return self.playlist_result(
- entries, video_id, '%s (%s)' % (username, list_type), description)
+ def _get_playlist_title(self, title, slug):
+ return '%s (%s)' % (title, slug)
class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
@@ -306,87 +332,20 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
IE_NAME = 'mixcloud:playlist'
_TESTS = [{
- 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
- 'info_dict': {
- 'id': 'RedBullThre3style_tokyo-finalists-2015',
- 'title': 'National Champions 2015',
- 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
- },
- 'playlist_mincount': 16,
- }, {
'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user_id = mobj.group('user')
- playlist_id = mobj.group('playlist')
- video_id = '%s_%s' % (user_id, playlist_id)
-
- webpage = self._download_webpage(
- url, user_id,
- note='Downloading playlist page',
- errnote='Unable to download playlist page')
-
- title = self._html_search_regex(
- r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
- webpage, 'playlist title',
- default=None) or self._og_search_title(webpage, fatal=False)
- description = self._get_user_description(webpage)
-
- entries = OnDemandPagedList(
- functools.partial(
- self._tracks_page_func,
- '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
- self._PAGE_SIZE)
-
- return self.playlist_result(entries, video_id, title, description)
-
-
-class MixcloudStreamIE(MixcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
- IE_NAME = 'mixcloud:stream'
-
- _TEST = {
- 'url': 'https://www.mixcloud.com/FirstEar/stream/',
'info_dict': {
- 'id': 'FirstEar',
- 'title': 'First Ear',
- 'description': 'Curators of good music\nfirstearmusic.com',
+ 'id': 'maxvibes_jazzcat-on-ness-radio',
+ 'title': 'Ness Radio sessions',
},
- 'playlist_mincount': 192,
- }
-
- def _real_extract(self, url):
- user_id = self._match_id(url)
-
- webpage = self._download_webpage(url, user_id)
-
- entries = []
- prev_page_url = None
-
- def _handle_page(page):
- entries.extend(self._find_urls_in_page(page))
- return self._search_regex(
- r'm-next-page-url="([^"]+)"', page,
- 'next page URL', default=None)
-
- next_page_url = _handle_page(webpage)
-
- for idx in itertools.count(0):
- if not next_page_url or prev_page_url == next_page_url:
- break
-
- prev_page_url = next_page_url
- current_page = int(self._search_regex(
- r'\?page=(\d+)', next_page_url, 'next page number'))
-
- next_page_url = _handle_page(self._fetch_tracks_page(
- '%s/stream' % user_id, user_id, 'stream', idx,
- real_page_number=current_page))
-
- username = self._og_search_title(webpage)
- description = self._get_user_description(webpage)
-
- return self.playlist_result(entries, user_id, username, description)
+ 'playlist_mincount': 59,
+ }]
+ _TITLE_KEY = 'name'
+ _DESCRIPTION_KEY = 'description'
+ _ROOT_TYPE = 'playlist'
+ _NODE_TEMPLATE = '''cloudcast {
+ slug
+ url
+ }'''
+
+ def _get_cloudcast(self, node):
+ return node.get('cloudcast') or {}
diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dl/extractor/moevideo.py
index 44bcc4982..eb9b4ce7c 100644
--- a/youtube_dl/extractor/moevideo.py
+++ b/youtube_dl/extractor/moevideo.py
@@ -1,15 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
from .common import InfoExtractor
from ..utils import (
- ExtractorError,
+ clean_html,
int_or_none,
- sanitized_Request,
- urlencode_postdata,
)
@@ -17,8 +14,8 @@ class MoeVideoIE(InfoExtractor):
IE_DESC = 'LetitBit video services: moevideo.net, playreplay.net and videochart.net'
_VALID_URL = r'''(?x)
https?://(?P<host>(?:www\.)?
- (?:(?:moevideo|playreplay|videochart)\.net))/
- (?:video|framevideo)/(?P<id>[0-9]+\.[0-9A-Za-z]+)'''
+ (?:(?:moevideo|playreplay|videochart)\.net|thesame\.tv))/
+ (?:video|framevideo|embed)/(?P<id>[0-9a-z]+\.[0-9A-Za-z]+)'''
_API_URL = 'http://api.letitbit.net/'
_API_KEY = 'tVL0gjqo5'
_TESTS = [
@@ -57,58 +54,26 @@ class MoeVideoIE(InfoExtractor):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ host, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
- 'http://%s/video/%s' % (mobj.group('host'), video_id),
+ 'http://%s/video/%s' % (host, video_id),
video_id, 'Downloading webpage')
title = self._og_search_title(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- description = self._og_search_description(webpage)
- r = [
- self._API_KEY,
- [
- 'preview/flv_link',
- {
- 'uid': video_id,
- },
- ],
- ]
- r_json = json.dumps(r)
- post = urlencode_postdata({'r': r_json})
- req = sanitized_Request(self._API_URL, post)
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
- response = self._download_json(req, video_id)
- if response['status'] != 'OK':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, response['data']),
- expected=True
- )
- item = response['data'][0]
- video_url = item['link']
- duration = int_or_none(item['length'])
- width = int_or_none(item['width'])
- height = int_or_none(item['height'])
- filesize = int_or_none(item['convert_size'])
-
- formats = [{
- 'format_id': 'sd',
- 'http_headers': {'Range': 'bytes=0-'}, # Required to download
- 'url': video_url,
- 'width': width,
- 'height': height,
- 'filesize': filesize,
- }]
+ embed_webpage = self._download_webpage(
+ 'http://%s/embed/%s' % (host, video_id),
+ video_id, 'Downloading embed webpage')
+ video = self._parse_json(self._search_regex(
+ r'mvplayer\("#player"\s*,\s*({.+})',
+ embed_webpage, 'mvplayer'), video_id)['video']
return {
'id': video_id,
'title': title,
- 'thumbnail': thumbnail,
- 'description': description,
- 'duration': duration,
- 'formats': formats,
+ 'thumbnail': video.get('poster') or self._og_search_thumbnail(webpage),
+ 'description': clean_html(self._og_search_description(webpage)),
+ 'duration': int_or_none(self._og_search_property('video:duration', webpage)),
+ 'url': video['ourUrl'],
}
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py
index 1c652813a..5234cac02 100644
--- a/youtube_dl/extractor/mofosex.py
+++ b/youtube_dl/extractor/mofosex.py
@@ -1,5 +1,8 @@
from __future__ import unicode_literals
+import re
+
+from .common import InfoExtractor
from ..utils import (
int_or_none,
str_to_int,
@@ -54,3 +57,23 @@ class MofosexIE(KeezMoviesIE):
})
return info
+
+
+class MofosexEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
+ ie=MofosexIE.ie_key(), video_id=video_id)
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
index d4bd273b6..b1615b4d8 100644
--- a/youtube_dl/extractor/motherless.py
+++ b/youtube_dl/extractor/motherless.py
@@ -26,7 +26,7 @@ class MotherlessIE(InfoExtractor):
'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
'upload_date': '20100913',
'uploader_id': 'famouslyfuckedup',
- 'thumbnail': r're:http://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
}, {
@@ -40,7 +40,7 @@ class MotherlessIE(InfoExtractor):
'game', 'hairy'],
'upload_date': '20140622',
'uploader_id': 'Sulivana7x',
- 'thumbnail': r're:http://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
},
'skip': '404',
@@ -54,7 +54,7 @@ class MotherlessIE(InfoExtractor):
'categories': ['superheroine heroine superher'],
'upload_date': '20140827',
'uploader_id': 'shade0230',
- 'thumbnail': r're:http://.*\.jpg',
+ 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18,
}
}, {
@@ -76,22 +76,24 @@ class MotherlessIE(InfoExtractor):
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
title = self._html_search_regex(
- r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
+ (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
+ r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
video_url = (self._html_search_regex(
(r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
- webpage, 'video URL', default=None, group='url') or
- 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
+ webpage, 'video URL', default=None, group='url')
+ or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
age_limit = self._rta_search(webpage)
view_count = str_to_int(self._html_search_regex(
- r'<strong>Views</strong>\s+([^<]+)<',
+ (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
webpage, 'view count', fatal=False))
like_count = str_to_int(self._html_search_regex(
- r'<strong>Favorited</strong>\s+([^<]+)<',
+ (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
webpage, 'like count', fatal=False))
upload_date = self._html_search_regex(
- r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
+ (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
+ r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
if 'Ago' in upload_date:
days = int(re.search(r'([0-9]+)', upload_date).group(1))
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py
index 650731fdc..e59b0b7b0 100644
--- a/youtube_dl/extractor/msn.py
+++ b/youtube_dl/extractor/msn.py
@@ -14,21 +14,28 @@ from ..utils import (
class MSNIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
+ _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
_TESTS = [{
- 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE',
- 'md5': '8442f66c116cbab1ff7098f986983458',
+ 'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
+ 'md5': '087548191d273c5c55d05028f8d2cbcd',
'info_dict': {
- 'id': 'BBqQYNE',
- 'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message',
+ 'id': 'BBPxU6d',
+ 'display_id': '7-ways-to-get-rid-of-chest-congestion',
'ext': 'mp4',
- 'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message',
- 'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25',
- 'duration': 104,
- 'uploader': 'CBS Entertainment',
- 'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v',
+ 'title': 'Seven ways to get rid of chest congestion',
+ 'description': '7 Ways to Get Rid of Chest Congestion',
+ 'duration': 88,
+ 'uploader': 'Health',
+ 'uploader_id': 'BBPrMqa',
},
}, {
+ # Article, multiple Dailymotion Embeds
+ 'url': 'https://www.msn.com/en-in/money/sports/hottest-football-wags-greatest-footballers-turned-managers-and-more/ar-BBpc7Nl',
+ 'info_dict': {
+ 'id': 'BBpc7Nl',
+ },
+ 'playlist_mincount': 4,
+ }, {
'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
'only_matching': True,
}, {
@@ -41,75 +48,124 @@ class MSNIE(InfoExtractor):
}, {
'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
'only_matching': True,
+ }, {
+ # Vidible(AOL) Embed
+ 'url': 'https://www.msn.com/en-us/money/other/jupiter-is-about-to-come-so-close-you-can-see-its-moons-with-binoculars/vi-AACqsHR',
+ 'only_matching': True,
+ }, {
+ # Dailymotion Embed
+ 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
+ 'only_matching': True,
+ }, {
+ # YouTube Embed
+ 'url': 'https://www.msn.com/en-in/money/news/meet-vikram-%E2%80%94-chandrayaan-2s-lander/vi-AAGUr0v',
+ 'only_matching': True,
+ }, {
+ # NBCSports Embed
+ 'url': 'https://www.msn.com/en-us/money/football_nfl/week-13-preview-redskins-vs-panthers/vi-BBXsCDb',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, display_id = mobj.group('id', 'display_id')
+ display_id, page_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
- video = self._parse_json(
- self._search_regex(
- r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
- webpage, 'video data', default='{}', group='data'),
- display_id, transform_source=unescapeHTML)
+ entries = []
+ for _, metadata in re.findall(r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', webpage):
+ video = self._parse_json(unescapeHTML(metadata), display_id)
+
+ provider_id = video.get('providerId')
+ player_name = video.get('playerName')
+ if player_name and provider_id:
+ entry = None
+ if player_name == 'AOL':
+ if provider_id.startswith('http'):
+ provider_id = self._search_regex(
+ r'https?://delivery\.vidible\.tv/video/redirect/([0-9a-f]{24})',
+ provider_id, 'vidible id')
+ entry = self.url_result(
+ 'aol-video:' + provider_id, 'Aol', provider_id)
+ elif player_name == 'Dailymotion':
+ entry = self.url_result(
+ 'https://www.dailymotion.com/video/' + provider_id,
+ 'Dailymotion', provider_id)
+ elif player_name == 'YouTube':
+ entry = self.url_result(
+ provider_id, 'Youtube', provider_id)
+ elif player_name == 'NBCSports':
+ entry = self.url_result(
+ 'http://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/' + provider_id,
+ 'NBCSportsVPlayer', provider_id)
+ if entry:
+ entries.append(entry)
+ continue
+
+ video_id = video['uuid']
+ title = video['title']
+
+ formats = []
+ for file_ in video.get('videoFiles', []):
+ format_url = file_.get('url')
+ if not format_url:
+ continue
+ if 'format=m3u8-aapl' in format_url:
+ # m3u8_native should not be used here until
+ # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif 'format=mpd-time-csf' in format_url:
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, 'dash', fatal=False))
+ elif '.ism' in format_url:
+ if format_url.endswith('.ism'):
+ format_url += '/manifest'
+ formats.extend(self._extract_ism_formats(
+ format_url, display_id, 'mss', fatal=False))
+ else:
+ format_id = file_.get('formatCode')
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'width': int_or_none(file_.get('width')),
+ 'height': int_or_none(file_.get('height')),
+ 'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
+ 'preference': 1 if format_id == '1001' else None,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for file_ in video.get('files', []):
+ format_url = file_.get('url')
+ format_code = file_.get('formatCode')
+ if not format_url or not format_code:
+ continue
+ if compat_str(format_code) == '3100':
+ subtitles.setdefault(file_.get('culture', 'en'), []).append({
+ 'ext': determine_ext(format_url, 'ttml'),
+ 'url': format_url,
+ })
- if not video:
+ entries.append({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('headlineImage', {}).get('url'),
+ 'duration': int_or_none(video.get('durationSecs')),
+ 'uploader': video.get('sourceFriendly'),
+ 'uploader_id': video.get('providerId'),
+ 'creator': video.get('creator'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+
+ if not entries:
error = unescapeHTML(self._search_regex(
r'data-error=(["\'])(?P<error>.+?)\1',
webpage, 'error', group='error'))
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
- title = video['title']
-
- formats = []
- for file_ in video.get('videoFiles', []):
- format_url = file_.get('url')
- if not format_url:
- continue
- if 'm3u8' in format_url:
- # m3u8_native should not be used here until
- # https://github.com/rg3/youtube-dl/issues/9913 is fixed
- m3u8_formats = self._extract_m3u8_formats(
- format_url, display_id, 'mp4',
- m3u8_id='hls', fatal=False)
- formats.extend(m3u8_formats)
- elif determine_ext(format_url) == 'ism':
- formats.extend(self._extract_ism_formats(
- format_url + '/Manifest', display_id, 'mss', fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'ext': 'mp4',
- 'format_id': 'http',
- 'width': int_or_none(file_.get('width')),
- 'height': int_or_none(file_.get('height')),
- })
- self._sort_formats(formats)
-
- subtitles = {}
- for file_ in video.get('files', []):
- format_url = file_.get('url')
- format_code = file_.get('formatCode')
- if not format_url or not format_code:
- continue
- if compat_str(format_code) == '3100':
- subtitles.setdefault(file_.get('culture', 'en'), []).append({
- 'ext': determine_ext(format_url, 'ttml'),
- 'url': format_url,
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video.get('description'),
- 'thumbnail': video.get('headlineImage', {}).get('url'),
- 'duration': int_or_none(video.get('durationSecs')),
- 'uploader': video.get('sourceFriendly'),
- 'uploader_id': video.get('providerId'),
- 'creator': video.get('creator'),
- 'subtitles': subtitles,
- 'formats': formats,
- }
+ return self.playlist_result(entries, page_id)
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 7a3b57abd..fedd5f46b 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -1,3 +1,4 @@
+# coding: utf-8
from __future__ import unicode_literals
import re
@@ -349,33 +350,29 @@ class MTVIE(MTVServicesInfoExtractor):
}]
-class MTV81IE(InfoExtractor):
- IE_NAME = 'mtv81'
- _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)'
+class MTVJapanIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvjapan'
+ _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P<id>[0-9a-z]+)'
_TEST = {
- 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/',
- 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+ 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade',
'info_dict': {
- 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+ 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5',
'ext': 'mp4',
- 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
- 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
- 'timestamp': 1468846800,
- 'upload_date': '20160718',
+ 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition',
+ },
+ 'params': {
+ 'skip_download': True,
},
}
+ _GEO_COUNTRIES = ['JP']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
- def _extract_mgid(self, webpage):
- return self._search_regex(
- r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage,
- 'mgid', group='id')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- mgid = self._extract_mgid(webpage)
- return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtvjapan.com',
+ 'mgid': uri,
+ }
class MTVVideoIE(MTVServicesInfoExtractor):
@@ -425,14 +422,14 @@ class MTVVideoIE(MTVServicesInfoExtractor):
class MTVDEIE(MTVServicesInfoExtractor):
IE_NAME = 'mtv.de'
- _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
_TESTS = [{
- 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
+ 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum',
'info_dict': {
- 'id': 'music_video-a50bc5f0b3aa4b3190aa',
- 'ext': 'flv',
- 'title': 'MusicVideo_cro-traum',
- 'description': 'Cro - Traum',
+ 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5',
+ 'ext': 'mp4',
+ 'title': 'Traum',
+ 'description': 'Traum',
},
'params': {
# rtmp download
@@ -441,11 +438,12 @@ class MTVDEIE(MTVServicesInfoExtractor):
'skip': 'Blocked at Travis CI',
}, {
# mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
- 'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
+ 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1',
'info_dict': {
- 'id': 'local_playlist-f5ae778b9832cc837189',
- 'ext': 'flv',
- 'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
+ 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Teen Mom 2',
+ 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7',
},
'params': {
# rtmp download
@@ -453,7 +451,7 @@ class MTVDEIE(MTVServicesInfoExtractor):
},
'skip': 'Blocked at Travis CI',
}, {
- 'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
+ 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3',
'info_dict': {
'id': 'local_playlist-4e760566473c4c8c5344',
'ext': 'mp4',
@@ -466,25 +464,11 @@ class MTVDEIE(MTVServicesInfoExtractor):
},
'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
}]
+ _GEO_COUNTRIES = ['DE']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- playlist = self._parse_json(
- self._search_regex(
- r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
- video_id)
-
- def _mrss_url(item):
- return item['mrss'] + item.get('mrssvars', '')
-
- # news pages contain single video in playlist with different id
- if len(playlist) == 1:
- return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
-
- for item in playlist:
- item_id = item.get('id')
- if item_id and compat_str(item_id) == video_id:
- return self._get_videos_info_from_url(_mrss_url(item), video_id)
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.de',
+ 'mgid': uri,
+ }
diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py
deleted file mode 100644
index 1854d59a5..000000000
--- a/youtube_dl/extractor/musicplayon.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- int_or_none,
- js_to_json,
- mimetype2ext,
-)
-
-
-class MusicPlayOnIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://en.musicplayon.com/play?v=433377',
- 'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
- 'info_dict': {
- 'id': '433377',
- 'ext': 'mp4',
- 'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
- 'description': 'Rick Ross Interview On Chelsea Lately',
- 'duration': 342,
- 'uploader': 'ultrafish',
- },
- }, {
- 'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
- 'only_matching': True,
- }]
-
- _URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = self._URL_TEMPLATE % video_id
-
- page = self._download_webpage(url, video_id)
-
- title = self._og_search_title(page)
- description = self._og_search_description(page)
- thumbnail = self._og_search_thumbnail(page)
- duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
- view_count = self._og_search_property('count', page, fatal=False)
- uploader = self._html_search_regex(
- r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
-
- sources = self._parse_json(
- self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
- video_id, transform_source=js_to_json)
- formats = [{
- 'url': compat_urlparse.urljoin(url, source['src']),
- 'ext': mimetype2ext(source.get('type')),
- 'format_note': source.get('data-res'),
- } for source in sources]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': int_or_none(duration),
- 'view_count': int_or_none(view_count),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 2afe535b5..db7ebc94c 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -1,73 +1,56 @@
+# coding: utf-8
from __future__ import unicode_literals
-import os.path
+
+import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
-)
+from ..compat import compat_str
from ..utils import (
- ExtractorError,
+ int_or_none,
+ parse_duration,
+ xpath_text,
)
class MySpassIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*'
+ _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
_TEST = {
'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
'md5': '0b49f4844a068f8b33f4b7c88405862b',
'info_dict': {
'id': '11741',
'ext': 'mp4',
- 'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
- 'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2',
+ 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
+ 'title': '17.02.2013 - Die Highlights, Teil 2',
},
}
def _real_extract(self, url):
- META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
+ video_id = self._match_id(url)
- # video id is the last path element of the URL
- # usually there is a trailing slash, so also try the second but last
- url_path = compat_urllib_parse_urlparse(url).path
- url_parent_path, video_id = os.path.split(url_path)
- if not video_id:
- _, video_id = os.path.split(url_parent_path)
-
- # get metadata
- metadata_url = META_DATA_URL_TEMPLATE % video_id
metadata = self._download_xml(
- metadata_url, video_id, transform_source=lambda s: s.strip())
-
- # extract values from metadata
- url_flv_el = metadata.find('url_flv')
- if url_flv_el is None:
- raise ExtractorError('Unable to extract download url')
- video_url = url_flv_el.text
- title_el = metadata.find('title')
- if title_el is None:
- raise ExtractorError('Unable to extract title')
- title = title_el.text
- format_id_el = metadata.find('format_id')
- if format_id_el is None:
- format = 'mp4'
- else:
- format = format_id_el.text
- description_el = metadata.find('description')
- if description_el is not None:
- description = description_el.text
- else:
- description = None
- imagePreview_el = metadata.find('imagePreview')
- if imagePreview_el is not None:
- thumbnail = imagePreview_el.text
- else:
- thumbnail = None
+ 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
+ video_id)
+
+ title = xpath_text(metadata, 'title', fatal=True)
+ video_url = xpath_text(metadata, 'url_flv', 'download url', True)
+ video_id_int = int(video_id)
+ for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
+ group_int = int(group)
+ if group_int > video_id_int:
+ video_url = video_url.replace(
+ group, compat_str(group_int // video_id_int))
return {
'id': video_id,
'url': video_url,
'title': title,
- 'format': format,
- 'thumbnail': thumbnail,
- 'description': description,
+ 'thumbnail': xpath_text(metadata, 'imagePreview'),
+ 'description': xpath_text(metadata, 'description'),
+ 'duration': parse_duration(xpath_text(metadata, 'duration')),
+ 'series': xpath_text(metadata, 'format'),
+ 'season_number': int_or_none(xpath_text(metadata, 'season')),
+ 'season_id': xpath_text(metadata, 'season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(xpath_text(metadata, 'episode')),
}
diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py
index 4d2ee6408..ee12e2b47 100644
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dl/extractor/nationalgeographic.py
@@ -1,15 +1,10 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from .adobepass import AdobePassIE
-from .theplatform import ThePlatformIE
+from .fox import FOXIE
from ..utils import (
smuggle_url,
url_basename,
- update_url_query,
- get_element_by_class,
)
@@ -66,130 +61,22 @@ class NationalGeographicVideoIE(InfoExtractor):
}
-class NationalGeographicIE(ThePlatformIE, AdobePassIE):
- IE_NAME = 'natgeo'
- _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:(?:(?:wild/)?[^/]+/)?(?:videos|episodes)|u)/(?P<id>[^/?]+)'
-
- _TESTS = [
- {
- 'url': 'http://channel.nationalgeographic.com/u/kdi9Ld0PN2molUUIMSBGxoeDhD729KRjQcnxtetilWPMevo8ZwUBIDuPR0Q3D2LVaTsk0MPRkRWDB8ZhqWVeyoxfsZZm36yRp1j-zPfsHEyI_EgAeFY/',
- 'md5': '518c9aa655686cf81493af5cc21e2a04',
- 'info_dict': {
- 'id': 'vKInpacll2pC',
- 'ext': 'mp4',
- 'title': 'Uncovering a Universal Knowledge',
- 'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
- 'timestamp': 1458680907,
- 'upload_date': '20160322',
- 'uploader': 'NEWA-FNG-NGTV',
- },
- 'add_ie': ['ThePlatform'],
+class NationalGeographicTVIE(FOXIE):
+ _VALID_URL = r'https?://(?:www\.)?nationalgeographic\.com/tv/watch/(?P<id>[\da-fA-F]+)'
+ _TESTS = [{
+ 'url': 'https://www.nationalgeographic.com/tv/watch/6a875e6e734b479beda26438c9f21138/',
+ 'info_dict': {
+ 'id': '6a875e6e734b479beda26438c9f21138',
+ 'ext': 'mp4',
+ 'title': 'Why Nat Geo? Valley of the Boom',
+ 'description': 'The lives of prominent figures in the tech world, including their friendships, rivalries, victories and failures.',
+ 'timestamp': 1542662458,
+ 'upload_date': '20181119',
+ 'age_limit': 14,
},
- {
- 'url': 'http://channel.nationalgeographic.com/u/kdvOstqYaBY-vSBPyYgAZRUL4sWUJ5XUUPEhc7ISyBHqoIO4_dzfY3K6EjHIC0hmFXoQ7Cpzm6RkET7S3oMlm6CFnrQwSUwo/',
- 'md5': 'c4912f656b4cbe58f3e000c489360989',
- 'info_dict': {
- 'id': 'Pok5lWCkiEFA',
- 'ext': 'mp4',
- 'title': 'The Stunning Red Bird of Paradise',
- 'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
- 'timestamp': 1459362152,
- 'upload_date': '20160330',
- 'uploader': 'NEWA-FNG-NGTV',
- },
- 'add_ie': ['ThePlatform'],
- },
- {
- 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
- 'only_matching': True,
- },
- {
- 'url': 'http://channel.nationalgeographic.com/videos/treasures-rediscovered/',
- 'only_matching': True,
- },
- {
- 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
- 'only_matching': True,
+ 'params': {
+ 'skip_download': True,
},
- {
- 'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- release_url = self._search_regex(
- r'video_auth_playlist_url\s*=\s*"([^"]+)"',
- webpage, 'release url')
- theplatform_path = self._search_regex(r'https?://link\.theplatform\.com/s/([^?]+)', release_url, 'theplatform path')
- video_id = theplatform_path.split('/')[-1]
- query = {
- 'mbr': 'true',
- }
- is_auth = self._search_regex(r'video_is_auth\s*=\s*"([^"]+)"', webpage, 'is auth', fatal=False)
- if is_auth == 'auth':
- auth_resource_id = self._search_regex(
- r"video_auth_resourceId\s*=\s*'([^']+)'",
- webpage, 'auth resource id')
- query['auth'] = self._extract_mvpd_auth(url, video_id, 'natgeo', auth_resource_id)
-
- formats = []
- subtitles = {}
- for key, value in (('switch', 'http'), ('manifest', 'm3u')):
- tp_query = query.copy()
- tp_query.update({
- key: value,
- })
- tp_formats, tp_subtitles = self._extract_theplatform_smil(
- update_url_query(release_url, tp_query), video_id, 'Downloading %s SMIL data' % value)
- formats.extend(tp_formats)
- subtitles = self._merge_subtitles(subtitles, tp_subtitles)
- self._sort_formats(formats)
-
- info = self._extract_theplatform_metadata(theplatform_path, display_id)
- info.update({
- 'id': video_id,
- 'formats': formats,
- 'subtitles': subtitles,
- 'display_id': display_id,
- })
- return info
-
-
-class NationalGeographicEpisodeGuideIE(InfoExtractor):
- IE_NAME = 'natgeo:episodeguide'
- _VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
- _TESTS = [
- {
- 'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
- 'info_dict': {
- 'id': 'the-story-of-god-with-morgan-freeman-season-1',
- 'title': 'The Story of God with Morgan Freeman - Season 1',
- },
- 'playlist_mincount': 6,
- },
- {
- 'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
- 'info_dict': {
- 'id': 'underworld-inc-season-2',
- 'title': 'Underworld, Inc. - Season 2',
- },
- 'playlist_mincount': 7,
- },
- ]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- show = get_element_by_class('show', webpage)
- selected_season = self._search_regex(
- r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
- webpage, 'selected season')
- entries = [
- self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
- for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
- return self.playlist_result(
- entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
- '%s - %s' % (show, selected_season))
+ }]
+ _HOME_PAGE_URL = 'https://www.nationalgeographic.com/tv/'
+ _API_KEY = '238bb0a0c2aba67922c48709ce0c06fd'
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index bb3d94413..61fc59126 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,68 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
+ clean_html,
+ dict_get,
ExtractorError,
int_or_none,
+ parse_duration,
+ try_get,
update_url_query,
)
-class NaverIE(InfoExtractor):
- _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
+class NaverBaseIE(InfoExtractor):
+ _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
- _TESTS = [{
- 'url': 'http://tv.naver.com/v/81652',
- 'info_dict': {
- 'id': '81652',
- 'ext': 'mp4',
- 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
- 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
- 'upload_date': '20130903',
- },
- }, {
- 'url': 'http://tv.naver.com/v/395837',
- 'md5': '638ed4c12012c458fefcddfd01f173cd',
- 'info_dict': {
- 'id': '395837',
- 'ext': 'mp4',
- 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
- 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
- 'upload_date': '20150519',
- },
- 'skip': 'Georestricted',
- }, {
- 'url': 'http://tvcast.naver.com/v/81652',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- vid = self._search_regex(
- r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'video id', fatal=None, group='value')
- in_key = self._search_regex(
- r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'key', default=None, group='value')
-
- if not vid or not in_key:
- error = self._html_search_regex(
- r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError(error, expected=True)
- raise ExtractorError('couldn\'t extract vid and key')
+ def _extract_video_info(self, video_id, vid, key):
video_data = self._download_json(
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
video_id, query={
- 'key': in_key,
+ 'key': key,
})
meta = video_data['meta']
title = meta['subject']
formats = []
+ get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or []
def extract_formats(streams, stream_type, query={}):
for stream in streams:
@@ -73,7 +38,7 @@ class NaverIE(InfoExtractor):
encoding_option = stream.get('encodingOption', {})
bitrate = stream.get('bitrate', {})
formats.append({
- 'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')),
+ 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
'url': stream_url,
'width': int_or_none(encoding_option.get('width')),
'height': int_or_none(encoding_option.get('height')),
@@ -83,7 +48,7 @@ class NaverIE(InfoExtractor):
'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
})
- extract_formats(video_data.get('videos', {}).get('list', []), 'H264')
+ extract_formats(get_list('video'), 'H264')
for stream_set in video_data.get('streams', []):
query = {}
for param in stream_set.get('keys', []):
@@ -101,28 +66,101 @@ class NaverIE(InfoExtractor):
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
self._sort_formats(formats)
+ replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
+
+ def get_subs(caption_url):
+ if re.search(self._CAPTION_EXT_RE, caption_url):
+ return [{
+ 'url': replace_ext(caption_url, 'ttml'),
+ }, {
+ 'url': replace_ext(caption_url, 'vtt'),
+ }]
+ else:
+ return [{'url': caption_url}]
+
+ automatic_captions = {}
subtitles = {}
- for caption in video_data.get('captions', {}).get('list', []):
+ for caption in get_list('caption'):
caption_url = caption.get('source')
if not caption_url:
continue
- subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({
- 'url': caption_url,
- })
+ sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
+ sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url))
- upload_date = self._search_regex(
- r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})',
- webpage, 'upload date', fatal=False)
- if upload_date:
- upload_date = upload_date.replace('.', '')
+ user = meta.get('user', {})
return {
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
- 'description': self._og_search_description(webpage),
- 'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage),
+ 'automatic_captions': automatic_captions,
+ 'thumbnail': try_get(meta, lambda x: x['cover']['source']),
'view_count': int_or_none(meta.get('count')),
- 'upload_date': upload_date,
+ 'uploader_id': user.get('id'),
+ 'uploader': user.get('name'),
+ 'uploader_url': user.get('url'),
}
+
+
+class NaverIE(NaverBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'http://tv.naver.com/v/81652',
+ 'info_dict': {
+ 'id': '81652',
+ 'ext': 'mp4',
+ 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ 'timestamp': 1378200754,
+ 'upload_date': '20130903',
+ 'uploader': '메가스터디, 합격불변의 법칙',
+ 'uploader_id': 'megastudy',
+ },
+ }, {
+ 'url': 'http://tv.naver.com/v/395837',
+ 'md5': '8a38e35354d26a17f73f4e90094febd3',
+ 'info_dict': {
+ 'id': '395837',
+ 'ext': 'mp4',
+ 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
+ 'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
+ 'timestamp': 1432030253,
+ 'upload_date': '20150519',
+ 'uploader': '4가지쇼 시즌2',
+ 'uploader_id': 'wrappinguser29',
+ },
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ content = self._download_json(
+ 'https://tv.naver.com/api/json/v/' + video_id,
+ video_id, headers=self.geo_verification_headers())
+ player_info_json = content.get('playerInfoJson') or {}
+ current_clip = player_info_json.get('currentClip') or {}
+
+ vid = current_clip.get('videoId')
+ in_key = current_clip.get('inKey')
+
+ if not vid or not in_key:
+ player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
+ if player_auth == 'notCountry':
+ self.raise_geo_restricted(countries=['KR'])
+ elif player_auth == 'notLogin':
+ self.raise_login_required()
+ raise ExtractorError('couldn\'t extract vid and key')
+ info = self._extract_video_info(video_id, vid, in_key)
+ info.update({
+ 'description': clean_html(current_clip.get('description')),
+ 'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
+ 'duration': parse_duration(current_clip.get('displayPlayTime')),
+ 'like_count': int_or_none(current_clip.get('recommendPoint')),
+ 'age_limit': 19 if current_clip.get('adult') else None,
+ })
+ return info
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 765c46fd2..6f3cb3003 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -9,12 +9,13 @@ from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_unquote
from ..utils import (
- find_xpath_attr,
+ int_or_none,
+ js_to_json,
+ parse_duration,
smuggle_url,
try_get,
- unescapeHTML,
+ unified_timestamp,
update_url_query,
- int_or_none,
)
@@ -86,28 +87,61 @@ class NBCIE(AdobePassIE):
def _real_extract(self, url):
permalink, video_id = re.match(self._VALID_URL, url).groups()
permalink = 'http' + compat_urllib_parse_unquote(permalink)
- response = self._download_json(
- 'https://api.nbc.com/v3/videos', video_id, query={
- 'filter[permalink]': permalink,
- 'fields[videos]': 'description,entitlement,episodeNumber,guid,keywords,seasonNumber,title,vChipRating',
- 'fields[shows]': 'shortTitle',
- 'include': 'show.shortTitle',
- })
- video_data = response['data'][0]['attributes']
+ video_data = self._download_json(
+ 'https://friendship.nbc.co/v2/graphql', video_id, query={
+ 'query': '''query bonanzaPage(
+ $app: NBCUBrands! = nbc
+ $name: String!
+ $oneApp: Boolean
+ $platform: SupportedPlatforms! = web
+ $type: EntityPageType! = VIDEO
+ $userId: String!
+) {
+ bonanzaPage(
+ app: $app
+ name: $name
+ oneApp: $oneApp
+ platform: $platform
+ type: $type
+ userId: $userId
+ ) {
+ metadata {
+ ... on VideoPageData {
+ description
+ episodeNumber
+ keywords
+ locked
+ mpxAccountId
+ mpxGuid
+ rating
+ resourceId
+ seasonNumber
+ secondaryTitle
+ seriesShortTitle
+ }
+ }
+ }
+}''',
+ 'variables': json.dumps({
+ 'name': permalink,
+ 'oneApp': True,
+ 'userId': '0',
+ }),
+ })['data']['bonanzaPage']['metadata']
query = {
'mbr': 'true',
'manifest': 'm3u',
}
- video_id = video_data['guid']
- title = video_data['title']
- if video_data.get('entitlement') == 'auth':
+ video_id = video_data['mpxGuid']
+ title = video_data['secondaryTitle']
+ if video_data.get('locked'):
resource = self._get_mvpd_resource(
- 'nbcentertainment', title, video_id,
- video_data.get('vChipRating'))
+ video_data.get('resourceId') or 'nbcentertainment',
+ title, video_id, video_data.get('rating'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
+ 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
query), {'force_smil_url': True})
return {
'_type': 'url_transparent',
@@ -119,7 +153,7 @@ class NBCIE(AdobePassIE):
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
'episode': title,
- 'series': try_get(response, lambda x: x['included'][0]['attributes']['shortTitle']),
+ 'series': video_data.get('seriesShortTitle'),
'ie_key': 'ThePlatform',
}
@@ -269,31 +303,17 @@ class CSNNEIE(InfoExtractor):
class NBCNewsIE(ThePlatformIE):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
- (?:video/.+?/(?P<id>\d+)|
- ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
- '''
+ _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
_TESTS = [
{
- 'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
- 'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
- 'info_dict': {
- 'id': '52753292',
- 'ext': 'flv',
- 'title': 'Crew emerges after four-month Mars food study',
- 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
- },
- },
- {
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
- 'md5': 'af1adfa51312291a017720403826bb64',
+ 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
'info_dict': {
- 'id': 'p_tweet_snow_140529',
+ 'id': '269389891880',
'ext': 'mp4',
'title': 'How Twitter Reacted To The Snowden Interview',
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
- 'uploader': 'NBCU-NEWS',
'timestamp': 1401363060,
'upload_date': '20140529',
},
@@ -311,55 +331,51 @@ class NBCNewsIE(ThePlatformIE):
},
{
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
- 'md5': '73135a2e0ef819107bbb55a5a9b2a802',
+ 'md5': '8eb831eca25bfa7d25ddd83e85946548',
'info_dict': {
- 'id': 'nn_netcast_150204',
+ 'id': '394064451844',
'ext': 'mp4',
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
'timestamp': 1423104900,
- 'uploader': 'NBCU-NEWS',
'upload_date': '20150205',
},
},
{
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
- 'md5': 'a49e173825e5fcd15c13fc297fced39d',
+ 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
'info_dict': {
- 'id': 'x_lon_vwhorn_150922',
+ 'id': 'n431456',
'ext': 'mp4',
- 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
- 'description': 'md5:c8be487b2d80ff0594c005add88d8351',
+ 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
+ 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
'upload_date': '20150922',
'timestamp': 1442917800,
- 'uploader': 'NBCU-NEWS',
},
},
{
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
'md5': '118d7ca3f0bea6534f119c68ef539f71',
'info_dict': {
- 'id': 'tdy_al_space_160420',
+ 'id': '669831235788',
'ext': 'mp4',
'title': 'See the aurora borealis from space in stunning new NASA video',
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
'upload_date': '20160420',
'timestamp': 1461152093,
- 'uploader': 'NBCU-NEWS',
},
},
{
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
'info_dict': {
- 'id': 'n_hayes_Aimm_140801_272214',
+ 'id': '314487875924',
'ext': 'mp4',
'title': 'The chaotic GOP immigration vote',
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1406937606,
'upload_date': '20140802',
- 'uploader': 'NBCU-NEWS',
},
},
{
@@ -374,60 +390,63 @@ class NBCNewsIE(ThePlatformIE):
]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- if video_id is not None:
- all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
- info = all_info.find('video')
-
- return {
- 'id': video_id,
- 'title': info.find('headline').text,
- 'ext': 'flv',
- 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
- 'description': info.find('caption').text,
- 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
- }
- else:
- # "feature" and "nightly-news" pages use theplatform.com
- video_id = mobj.group('mpx_id')
- webpage = self._download_webpage(url, video_id)
-
- filter_param = 'byId'
- bootstrap_json = self._search_regex(
- [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
- r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
- webpage, 'bootstrap json', default=None)
- if bootstrap_json:
- bootstrap = self._parse_json(
- bootstrap_json, video_id, transform_source=unescapeHTML)
-
- info = None
- if 'results' in bootstrap:
- info = bootstrap['results'][0]['video']
- elif 'video' in bootstrap:
- info = bootstrap['video']
- elif 'msnbcVideoInfo' in bootstrap:
- info = bootstrap['msnbcVideoInfo']['meta']
- elif 'msnbcThePlatform' in bootstrap:
- info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
- else:
- info = bootstrap
-
- if 'guid' in info:
- video_id = info['guid']
- filter_param = 'byGuid'
- elif 'mpxId' in info:
- video_id = info['mpxId']
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- # http://feed.theplatform.com/f/2E2eJC/nbcnews also works
- 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
- 'ie_key': 'ThePlatformFeed',
- }
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*({.+});', webpage,
+ 'bootstrap json'), video_id, js_to_json)
+ video_data = try_get(data, lambda x: x['video']['current'], dict)
+ if not video_data:
+ video_data = data['article']['content'][0]['primaryMedia']['video']
+ title = video_data['headline']['primary']
+
+ formats = []
+ for va in video_data.get('videoAssets', []):
+ public_url = va.get('publicUrl')
+ if not public_url:
+ continue
+ if '://link.theplatform.com/' in public_url:
+ public_url = update_url_query(public_url, {'format': 'redirect'})
+ format_id = va.get('format')
+ if format_id == 'M3U':
+ formats.extend(self._extract_m3u8_formats(
+ public_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ continue
+ tbr = int_or_none(va.get('bitrate'), 1000)
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': public_url,
+ 'width': int_or_none(va.get('width')),
+ 'height': int_or_none(va.get('height')),
+ 'tbr': tbr,
+ 'ext': 'mp4',
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ closed_captioning = video_data.get('closedCaptioning')
+ if closed_captioning:
+ for cc_url in closed_captioning.values():
+ if not cc_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': try_get(video_data, lambda x: x['description']['primary']),
+ 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'timestamp': unified_timestamp(video_data.get('datePublished')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
class NBCOlympicsIE(InfoExtractor):
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
index aec2ea133..2447c812e 100644
--- a/youtube_dl/extractor/ndr.py
+++ b/youtube_dl/extractor/ndr.py
@@ -7,8 +7,11 @@ from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
+ merge_dicts,
parse_iso8601,
qualities,
+ try_get,
+ urljoin,
)
@@ -85,21 +88,25 @@ class NDRIE(NDRBaseIE):
def _extract_embed(self, webpage, display_id):
embed_url = self._html_search_meta(
- 'embedURL', webpage, 'embed URL', fatal=True)
+ 'embedURL', webpage, 'embed URL',
+ default=None) or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url')
description = self._search_regex(
r'<p[^>]+itemprop="description">([^<]+)</p>',
webpage, 'description', default=None) or self._og_search_description(webpage)
timestamp = parse_iso8601(
self._search_regex(
r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
- webpage, 'upload date', fatal=False))
- return {
+ webpage, 'upload date', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
'_type': 'url_transparent',
'url': embed_url,
'display_id': display_id,
'description': description,
'timestamp': timestamp,
- }
+ }, info)
class NJoyIE(NDRBaseIE):
@@ -220,11 +227,17 @@ class NDREmbedBaseIE(InfoExtractor):
upload_date = ppjson.get('config', {}).get('publicationDate')
duration = int_or_none(config.get('duration'))
- thumbnails = [{
- 'id': thumbnail.get('quality') or thumbnail_id,
- 'url': thumbnail['src'],
- 'preference': quality_key(thumbnail.get('quality')),
- } for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
+ thumbnails = []
+ poster = try_get(config, lambda x: x['poster'], dict) or {}
+ for thumbnail_id, thumbnail in poster.items():
+ thumbnail_url = urljoin(url, thumbnail.get('src'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail.get('quality') or thumbnail_id,
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail.get('quality')),
+ })
return {
'id': video_id,
diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py
index ddec89f2c..bc3eb9160 100644
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dl/extractor/ndtv.py
@@ -84,8 +84,8 @@ class NDTVIE(InfoExtractor):
# '__title' does not contain extra words such as sub-site name, "Video" etc.
title = compat_urllib_parse_unquote_plus(
- self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None) or
- self._og_search_title(webpage))
+ self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
+ or self._og_search_title(webpage))
filename = self._search_regex(
r"(?:__)?filename\s*[:=]\s*'([^']+)'", webpage, 'video filename')
diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py
index e3f35f1d8..dab4aec44 100644
--- a/youtube_dl/extractor/newstube.py
+++ b/youtube_dl/extractor/newstube.py
@@ -1,12 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
+import base64
+import hashlib
from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
from ..utils import (
- ExtractorError,
+ bytes_to_intlist,
int_or_none,
+ intlist_to_bytes,
+ parse_codecs,
+ parse_duration,
)
@@ -14,7 +19,7 @@ class NewstubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)'
_TEST = {
'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym',
- 'md5': '801eef0c2a9f4089fa04e4fe3533abdc',
+ 'md5': '9d10320ad473444352f72f746ccb8b8c',
'info_dict': {
'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6',
'ext': 'mp4',
@@ -25,84 +30,45 @@ class NewstubeIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
- page = self._download_webpage(url, video_id, 'Downloading page')
+ page = self._download_webpage(url, video_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], page, fatal=True)
video_guid = self._html_search_regex(
- r'<meta property="og:video:url" content="https?://(?:www\.)?newstube\.ru/freshplayer\.swf\?guid=(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'<meta\s+property="og:video(?::(?:(?:secure_)?url|iframe))?"\s+content="https?://(?:www\.)?newstube\.ru/embed/(?P<guid>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
page, 'video GUID')
- player = self._download_xml(
- 'http://p.newstube.ru/v2/player.asmx/GetAutoPlayInfo6?state=&url=%s&sessionId=&id=%s&placement=profile&location=n2' % (url, video_guid),
- video_guid, 'Downloading player XML')
-
- def ns(s):
- return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'}
-
- error_message = player.find(ns('./ErrorMessage'))
- if error_message is not None:
- raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True)
-
- session_id = player.find(ns('./SessionId')).text
- media_info = player.find(ns('./Medias/MediaInfo'))
- title = media_info.find(ns('./Name')).text
- description = self._og_search_description(page)
- thumbnail = media_info.find(ns('./KeyFrame')).text
- duration = int(media_info.find(ns('./Duration')).text) / 1000.0
+ enc_data = base64.b64decode(self._download_webpage(
+ 'https://www.newstube.ru/embed/api/player/getsources2',
+ video_guid, query={
+ 'guid': video_guid,
+ 'ff': 3,
+ }))
+ key = hashlib.pbkdf2_hmac(
+ 'sha1', video_guid.replace('-', '').encode(), enc_data[:16], 1)[:16]
+ dec_data = aes_cbc_decrypt(
+ bytes_to_intlist(enc_data[32:]), bytes_to_intlist(key),
+ bytes_to_intlist(enc_data[16:32]))
+ sources = self._parse_json(intlist_to_bytes(dec_data[:-dec_data[-1]]), video_guid)
formats = []
-
- for stream_info in media_info.findall(ns('./Streams/StreamInfo')):
- media_location = stream_info.find(ns('./MediaLocation'))
- if media_location is None:
+ for source in sources:
+ source_url = source.get('Src')
+ if not source_url:
continue
-
- server = media_location.find(ns('./Server')).text
- app = media_location.find(ns('./App')).text
- media_id = stream_info.find(ns('./Id')).text
- name = stream_info.find(ns('./Name')).text
- width = int(stream_info.find(ns('./Width')).text)
- height = int(stream_info.find(ns('./Height')).text)
-
- formats.append({
- 'url': 'rtmp://%s/%s' % (server, app),
- 'app': app,
- 'play_path': '01/%s' % video_guid.upper(),
- 'rtmp_conn': ['S:%s' % session_id, 'S:%s' % media_id, 'S:n2'],
- 'page_url': url,
- 'ext': 'flv',
- 'format_id': 'rtmp' + ('-%s' % name if name else ''),
- 'width': width,
+ height = int_or_none(source.get('Height'))
+ f = {
+ 'format_id': 'http' + ('-%dp' % height if height else ''),
+ 'url': source_url,
+ 'width': int_or_none(source.get('Width')),
'height': height,
- })
-
- sources_data = self._download_json(
- 'http://www.newstube.ru/player2/getsources?guid=%s' % video_guid,
- video_guid, fatal=False)
- if sources_data:
- for source in sources_data.get('Sources', []):
- source_url = source.get('Src')
- if not source_url:
- continue
- height = int_or_none(source.get('Height'))
- f = {
- 'format_id': 'http' + ('-%dp' % height if height else ''),
- 'url': source_url,
- 'width': int_or_none(source.get('Width')),
- 'height': height,
- }
- source_type = source.get('Type')
- if source_type:
- mobj = re.search(r'codecs="([^,]+),\s*([^"]+)"', source_type)
- if mobj:
- vcodec, acodec = mobj.groups()
- f.update({
- 'vcodec': vcodec,
- 'acodec': acodec,
- })
- formats.append(f)
+ }
+ source_type = source.get('Type')
+ if source_type:
+ f.update(parse_codecs(self._search_regex(
+ r'codecs="([^"]+)"', source_type, 'codecs', fatal=False)))
+ formats.append(f)
self._check_formats(formats, video_guid)
self._sort_formats(formats)
@@ -110,8 +76,8 @@ class NewstubeIE(InfoExtractor):
return {
'id': video_guid,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
+ 'description': self._html_search_meta(['description', 'og:description'], page),
+ 'thumbnail': self._html_search_meta(['og:image:secure_url', 'og:image', 'twitter:image'], page),
+ 'duration': parse_duration(self._html_search_meta('duration', page)),
'formats': formats,
}
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py
index 680f03aad..7bd1290bf 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dl/extractor/nextmedia.py
@@ -180,8 +180,8 @@ class AppleDailyIE(NextMediaIE):
_URL_PATTERN = r'\{url: \'(.+)\'\}'
def _fetch_title(self, page):
- return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None) or
- self._html_search_meta('description', page, 'news title'))
+ return (self._html_search_regex(r'<h1 id="h1">([^<>]+)</h1>', page, 'news title', default=None)
+ or self._html_search_meta('description', page, 'news title'))
def _fetch_thumbnail(self, page):
return self._html_search_regex(r"setInitialImage\(\'([^']+)'\)", page, 'video thumbnail', fatal=False)
diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py
index 82d526c22..586c1b7eb 100644
--- a/youtube_dl/extractor/nexx.py
+++ b/youtube_dl/extractor/nexx.py
@@ -108,7 +108,7 @@ class NexxIE(InfoExtractor):
@staticmethod
def _extract_domain_id(webpage):
mobj = re.search(
- r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
+ r'<script\b[^>]+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P<id>\d+)',
webpage)
return mobj.group('id') if mobj else None
@@ -123,7 +123,7 @@ class NexxIE(InfoExtractor):
domain_id = NexxIE._extract_domain_id(webpage)
if domain_id:
for video_id in re.findall(
- r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
+ r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)',
webpage):
entries.append(
'https://api.nexx.cloud/v3/%s/videos/byid/%s'
@@ -295,13 +295,23 @@ class NexxIE(InfoExtractor):
video = None
+ def find_video(result):
+ if isinstance(result, dict):
+ return result
+ elif isinstance(result, list):
+ vid = int(video_id)
+ for v in result:
+ if try_get(v, lambda x: x['general']['ID'], int) == vid:
+ return v
+ return None
+
response = self._download_json(
'https://arc.nexx.cloud/api/video/%s.json' % video_id,
video_id, fatal=False)
if response and isinstance(response, dict):
result = response.get('result')
- if result and isinstance(result, dict):
- video = result
+ if result:
+ video = find_video(result)
# not all videos work via arc, e.g. nexx:741:1269984
if not video:
@@ -348,7 +358,7 @@ class NexxIE(InfoExtractor):
request_token = hashlib.md5(
''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
- video = self._call_api(
+ result = self._call_api(
domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
'addInteractionOptions': '1',
@@ -363,6 +373,7 @@ class NexxIE(InfoExtractor):
'X-Request-CID': cid,
'X-Request-Token': request_token,
})
+ video = find_video(result)
general = video['general']
title = general['title']
@@ -399,8 +410,8 @@ class NexxIE(InfoExtractor):
class NexxEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
- _TEST = {
+ _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
'md5': '16746bfc28c42049492385c989b26c4a',
'info_dict': {
@@ -409,7 +420,6 @@ class NexxEmbedIE(InfoExtractor):
'title': 'Nervenkitzel Achterbahn',
'alt_title': 'Karussellbauer in Deutschland',
'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
- 'release_year': 2005,
'creator': 'SPIEGEL TV',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 2761,
@@ -420,7 +430,10 @@ class NexxEmbedIE(InfoExtractor):
'format': 'bestvideo',
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7',
+ 'only_matching': True,
+ }]
@staticmethod
def _extract_urls(webpage):
diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py
deleted file mode 100644
index adcc636bc..000000000
--- a/youtube_dl/extractor/nfb.py
+++ /dev/null
@@ -1,112 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- determine_ext,
- int_or_none,
- qualities,
- urlencode_postdata,
- xpath_text,
-)
-
-
-class NFBIE(InfoExtractor):
- IE_NAME = 'nfb'
- IE_DESC = 'National Film Board of Canada'
- _VALID_URL = r'https?://(?:www\.)?(?:nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)'
-
- _TEST = {
- 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny',
- 'info_dict': {
- 'id': 'qallunaat_why_white_people_are_funny',
- 'ext': 'flv',
- 'title': 'Qallunaat! Why White People Are Funny ',
- 'description': 'md5:6b8e32dde3abf91e58857b174916620c',
- 'duration': 3128,
- 'creator': 'Mark Sandiford',
- 'uploader': 'Mark Sandiford',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- config = self._download_xml(
- 'https://www.nfb.ca/film/%s/player_config' % video_id,
- video_id, 'Downloading player config XML',
- data=urlencode_postdata({'getConfig': 'true'}),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'X-NFB-Referer': 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf'
- })
-
- title, description, thumbnail, duration, uploader, author = [None] * 6
- thumbnails, formats = [[]] * 2
- subtitles = {}
-
- for media in config.findall('./player/stream/media'):
- if media.get('type') == 'posterImage':
- quality_key = qualities(('low', 'high'))
- thumbnails = []
- for asset in media.findall('assets/asset'):
- asset_url = xpath_text(asset, 'default/url', default=None)
- if not asset_url:
- continue
- quality = asset.get('quality')
- thumbnails.append({
- 'url': asset_url,
- 'id': quality,
- 'preference': quality_key(quality),
- })
- elif media.get('type') == 'video':
- title = xpath_text(media, 'title', fatal=True)
- for asset in media.findall('assets/asset'):
- quality = asset.get('quality')
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]$', quality or '', 'height', default=None))
- for node in asset:
- streamer = xpath_text(node, 'streamerURI', default=None)
- if not streamer:
- continue
- play_path = xpath_text(node, 'url', default=None)
- if not play_path:
- continue
- formats.append({
- 'url': streamer,
- 'app': streamer.split('/', 3)[3],
- 'play_path': play_path,
- 'rtmp_live': False,
- 'ext': 'flv',
- 'format_id': '%s-%s' % (node.tag, quality) if quality else node.tag,
- 'height': height,
- })
- self._sort_formats(formats)
- description = clean_html(xpath_text(media, 'description'))
- uploader = xpath_text(media, 'author')
- duration = int_or_none(media.get('duration'))
- for subtitle in media.findall('./subtitles/subtitle'):
- subtitle_url = xpath_text(subtitle, 'url', default=None)
- if not subtitle_url:
- continue
- lang = xpath_text(subtitle, 'lang', default='en')
- subtitles.setdefault(lang, []).append({
- 'url': subtitle_url,
- 'ext': (subtitle.get('format') or determine_ext(subtitle_url)).lower(),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'creator': uploader,
- 'uploader': uploader,
- 'formats': formats,
- 'subtitles': subtitles,
- }
diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py
index 5c8cd76dc..de6a707c4 100644
--- a/youtube_dl/extractor/nhk.py
+++ b/youtube_dl/extractor/nhk.py
@@ -1,51 +1,93 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from ..utils import ExtractorError
class NhkVodIE(InfoExtractor):
- _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P<id>[^/]+/[^/?#&]+)'
- _TEST = {
- # Videos available only for a limited period of time. Visit
- # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples.
- 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815',
+ _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
+ # Content available only for a limited period of time. Visit
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
+ _TESTS = [{
+ # clip
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+ 'md5': '256a1be14f48d960a7e61e2532d95ec3',
'info_dict': {
- 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5',
- 'ext': 'flv',
- 'title': 'TOKYO FASHION EXPRESS - The Kimono as Global Fashion',
- 'description': 'md5:db338ee6ce8204f415b754782f819824',
- 'series': 'TOKYO FASHION EXPRESS',
- 'episode': 'The Kimono as Global Fashion',
+ 'id': 'a95j5iza',
+ 'ext': 'mp4',
+ 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+ 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+ 'timestamp': 1565965194,
+ 'upload_date': '20190816',
},
- 'skip': 'Videos available only for a limited period of time',
- }
- _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k'
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
+ 'only_matching': True,
+ }]
+ _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
def _real_extract(self, url):
- video_id = self._match_id(url)
+ lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
+ if episode_id.isdigit():
+ episode_id = episode_id[:4] + '-' + episode_id[4:]
- data = self._download_json(self._API_URL, video_id)
+ is_video = m_type == 'video'
+ episode = self._download_json(
+ self._API_URL_TEMPLATE % (
+ 'v' if is_video else 'r',
+ 'clip' if episode_id[:4] == '9999' else 'esd',
+ episode_id, lang, '/all' if is_video else ''),
+ episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
+ title = episode.get('sub_title_clean') or episode['sub_title']
- try:
- episode = next(
- e for e in data['data']['episodes']
- if e.get('url') and video_id in e['url'])
- except StopIteration:
- raise ExtractorError('Unable to find episode')
+ def get_clean_field(key):
+ return episode.get(key + '_clean') or episode.get(key)
- embed_code = episode['vod_id']
+ series = get_clean_field('title')
- title = episode.get('sub_title_clean') or episode['sub_title']
- description = episode.get('description_clean') or episode.get('description')
- series = episode.get('title_clean') or episode.get('title')
+ thumbnails = []
+ for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
+ img_path = episode.get('image' + s)
+ if not img_path:
+ continue
+ thumbnails.append({
+ 'id': '%dp' % h,
+ 'height': h,
+ 'width': w,
+ 'url': 'https://www3.nhk.or.jp' + img_path,
+ })
- return {
- '_type': 'url_transparent',
- 'ie_key': 'Ooyala',
- 'url': 'ooyala:%s' % embed_code,
+ info = {
+ 'id': episode_id + '-' + lang,
'title': '%s - %s' % (series, title) if series and title else title,
- 'description': description,
+ 'description': get_clean_field('description'),
+ 'thumbnails': thumbnails,
'series': series,
'episode': title,
}
+ if is_video:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'Piksel',
+ 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
+ })
+ else:
+ audio = episode['audio']
+ audio_path = audio['audio']
+ info['formats'] = self._extract_m3u8_formats(
+ 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+ episode_id, 'm4a', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in info['formats']:
+ f['language'] = lang
+ return info
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py
index cf440f713..eddfe1f37 100644
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -108,7 +108,7 @@ class NHLIE(NHLBaseIE):
'timestamp': 1454544904,
},
}, {
- # Some m3u8 URLs are invalid (https://github.com/rg3/youtube-dl/issues/10713)
+ # Some m3u8 URLs are invalid (https://github.com/ytdl-org/youtube-dl/issues/10713)
'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
'md5': '50b2bb47f405121484dda3ccbea25459',
'info_dict': {
diff --git a/youtube_dl/extractor/nick.py b/youtube_dl/extractor/nick.py
index 5e34d776b..2e8b302ac 100644
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dl/extractor/nick.py
@@ -85,7 +85,8 @@ class NickBrIE(MTVServicesInfoExtractor):
https?://
(?:
(?P<domain>(?:www\.)?nickjr|mundonick\.uol)\.com\.br|
- (?:www\.)?nickjr\.[a-z]{2}
+ (?:www\.)?nickjr\.[a-z]{2}|
+ (?:www\.)?nickelodeonjunior\.fr
)
/(?:programas/)?[^/]+/videos/(?:episodios/)?(?P<id>[^/?\#.]+)
'''
@@ -101,6 +102,9 @@ class NickBrIE(MTVServicesInfoExtractor):
}, {
'url': 'http://www.nickjr.de/blaze-und-die-monster-maschinen/videos/f6caaf8f-e4e8-4cc1-b489-9380d6dcd059/',
'only_matching': True,
+ }, {
+ 'url': 'http://www.nickelodeonjunior.fr/paw-patrol-la-pat-patrouille/videos/episode-401-entier-paw-patrol/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 76b412ff1..eb07ca776 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -369,14 +369,14 @@ class NiconicoIE(InfoExtractor):
video_detail = watch_api_data.get('videoDetail', {})
thumbnail = (
- get_video_info(['thumbnail_url', 'thumbnailURL']) or
- self._html_search_meta('image', webpage, 'thumbnail', default=None) or
- video_detail.get('thumbnail'))
+ get_video_info(['thumbnail_url', 'thumbnailURL'])
+ or self._html_search_meta('image', webpage, 'thumbnail', default=None)
+ or video_detail.get('thumbnail'))
description = get_video_info('description')
- timestamp = (parse_iso8601(get_video_info('first_retrieve')) or
- unified_timestamp(get_video_info('postedDateTime')))
+ timestamp = (parse_iso8601(get_video_info('first_retrieve'))
+ or unified_timestamp(get_video_info('postedDateTime')))
if not timestamp:
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
if match:
@@ -395,9 +395,9 @@ class NiconicoIE(InfoExtractor):
view_count = int_or_none(match.replace(',', ''))
view_count = view_count or video_detail.get('viewCount')
- comment_count = (int_or_none(get_video_info('comment_num')) or
- video_detail.get('commentCount') or
- try_get(api_data, lambda x: x['thread']['commentCount']))
+ comment_count = (int_or_none(get_video_info('comment_num'))
+ or video_detail.get('commentCount')
+ or try_get(api_data, lambda x: x['thread']['commentCount']))
if not comment_count:
match = self._html_search_regex(
r'>Comments: <strong[^>]*>([^<]+)</strong>',
@@ -406,11 +406,11 @@ class NiconicoIE(InfoExtractor):
comment_count = int_or_none(match.replace(',', ''))
duration = (parse_duration(
- get_video_info('length') or
- self._html_search_meta(
- 'video:duration', webpage, 'video duration', default=None)) or
- video_detail.get('length') or
- get_video_info('duration'))
+ get_video_info('length')
+ or self._html_search_meta(
+ 'video:duration', webpage, 'video duration', default=None))
+ or video_detail.get('length')
+ or get_video_info('duration'))
webpage_url = get_video_info('watch_url') or url
diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py
index f32f530f7..6157dc7c1 100644
--- a/youtube_dl/extractor/ninenow.py
+++ b/youtube_dl/extractor/ninenow.py
@@ -45,7 +45,11 @@ class NineNowIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
page_data = self._parse_json(self._search_regex(
r'window\.__data\s*=\s*({.*?});', webpage,
- 'page data'), display_id)
+ 'page data', default='{}'), display_id, fatal=False)
+ if not page_data:
+ page_data = self._parse_json(self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*JSON\.parse\s*\(\s*(".+?")\s*\)\s*;',
+ webpage, 'page data'), display_id), display_id)
for kind in ('episode', 'clip'):
current_key = page_data.get(kind, {}).get(
diff --git a/youtube_dl/extractor/nintendo.py b/youtube_dl/extractor/nintendo.py
index 4b4e66b05..ff8f70ba6 100644
--- a/youtube_dl/extractor/nintendo.py
+++ b/youtube_dl/extractor/nintendo.py
@@ -5,13 +5,12 @@ import re
from .common import InfoExtractor
from .ooyala import OoyalaIE
-from ..utils import unescapeHTML
class NintendoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:games/detail|nintendo-direct)/(?P<id>[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj',
+ 'url': 'https://www.nintendo.com/games/detail/duck-hunt-wii-u/',
'info_dict': {
'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
'ext': 'flv',
@@ -28,7 +27,19 @@ class NintendoIE(InfoExtractor):
'id': 'tokyo-mirage-sessions-fe-wii-u',
'title': 'Tokyo Mirage Sessions ♯FE',
},
- 'playlist_count': 3,
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
+ 'info_dict': {
+ 'id': 'J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V',
+ 'ext': 'mp4',
+ 'title': 'Switch_ROS_ND0904-H264.mov',
+ 'duration': 2324.758,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
}]
def _real_extract(self, url):
@@ -39,8 +50,11 @@ class NintendoIE(InfoExtractor):
entries = [
OoyalaIE._build_url_result(m.group('code'))
for m in re.finditer(
- r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2',
- webpage)]
+ r'data-(?:video-id|directVideoId)=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage)]
+
+ title = self._html_search_regex(
+ r'(?s)<(?:span|div)[^>]+class="(?:title|wrapper)"[^>]*>.*?<h1>(.+?)</h1>',
+ webpage, 'title', fatal=False)
return self.playlist_result(
- entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False)))
+ entries, page_id, title)
diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dl/extractor/njpwworld.py
index febef097a..025c5d249 100644
--- a/youtube_dl/extractor/njpwworld.py
+++ b/youtube_dl/extractor/njpwworld.py
@@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
'skip': 'Requires login',
}
+ _LOGIN_URL = 'https://front.njpwworld.com/auth/login'
+
def _real_initialize(self):
self._login()
@@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor):
if not username:
return True
+ # Setup session (will set necessary cookies)
+ self._request_webpage(
+ 'https://njpwworld.com/', None, note='Setting up session')
+
webpage, urlh = self._download_webpage_handle(
- 'https://njpwworld.com/auth/login', None,
+ self._LOGIN_URL, None,
note='Logging in', errnote='Unable to login',
data=urlencode_postdata({'login_id': username, 'pw': password}),
- headers={'Referer': 'https://njpwworld.com/auth'})
+ headers={'Referer': 'https://front.njpwworld.com/auth'})
# /auth/login will return 302 for successful logins
- if urlh.geturl() == 'https://njpwworld.com/auth/login':
+ if urlh.geturl() == self._LOGIN_URL:
self.report_warning('unable to login')
return False
diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py
index 58b371ed7..30df905af 100644
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dl/extractor/noco.py
@@ -115,7 +115,7 @@ class NocoIE(InfoExtractor):
# Timestamp adjustment offset between server time and local time
# must be calculated in order to use timestamps closest to server's
- # in all API requests (see https://github.com/rg3/youtube-dl/issues/7864)
+ # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(
diff --git a/youtube_dl/extractor/nonktube.py b/youtube_dl/extractor/nonktube.py
index 63e58aae2..ca1424e06 100644
--- a/youtube_dl/extractor/nonktube.py
+++ b/youtube_dl/extractor/nonktube.py
@@ -25,9 +25,14 @@ class NonkTubeIE(NuevoBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- info = self._extract_nuevo(
- 'https://www.nonktube.com/media/nuevo/econfig.php?key=%s'
- % video_id, video_id)
+ webpage = self._download_webpage(url, video_id)
- info['age_limit'] = 18
+ title = self._og_search_title(webpage)
+ info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+ info.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': 18,
+ })
return info
diff --git a/youtube_dl/extractor/noovo.py b/youtube_dl/extractor/noovo.py
index 974de3c3e..b40770d07 100644
--- a/youtube_dl/extractor/noovo.py
+++ b/youtube_dl/extractor/noovo.py
@@ -57,7 +57,8 @@ class NoovoIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- bc_url = BrightcoveNewIE._extract_url(self, webpage)
+ brightcove_id = self._search_regex(
+ r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
data = self._parse_json(
self._search_regex(
@@ -89,7 +90,10 @@ class NoovoIE(InfoExtractor):
return {
'_type': 'url_transparent',
'ie_key': BrightcoveNewIE.ie_key(),
- 'url': smuggle_url(bc_url, {'geo_countries': ['CA']}),
+ 'url': smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['CA']}),
+ 'id': brightcove_id,
'title': title,
'description': description,
'series': series,
diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py
index 80186ec50..47b9748f0 100644
--- a/youtube_dl/extractor/nova.py
+++ b/youtube_dl/extractor/nova.py
@@ -6,6 +6,7 @@ import re
from .common import InfoExtractor
from ..utils import (
clean_html,
+ determine_ext,
int_or_none,
js_to_json,
qualities,
@@ -18,7 +19,7 @@ class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
- 'md5': 'b3834f6de5401baabf31ed57456463f7',
+ 'md5': 'ee009bafcc794541570edd44b71cbea3',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
@@ -33,36 +34,76 @@ class NovaEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- bitrates = self._parse_json(
+ duration = None
+ formats = []
+
+ player = self._parse_json(
self._search_regex(
- r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
- video_id, transform_source=js_to_json)
+ r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
+ webpage, 'player', default='{}'), video_id, fatal=False)
+ if player:
+ for format_id, format_list in player['tracks'].items():
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_dict in format_list:
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = url_or_none(format_dict.get('src'))
+ format_type = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if (format_type == 'application/x-mpegURL'
+ or format_id == 'HLS' or ext == 'm3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ elif (format_type == 'application/dash+xml'
+ or format_id == 'DASH' or ext == 'mpd'):
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ duration = int_or_none(player.get('duration'))
+ else:
+ # Old path, not actual as of 08.04.2020
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
+ video_id, transform_source=js_to_json)
- QUALITIES = ('lq', 'mq', 'hq', 'hd')
- quality_key = qualities(QUALITIES)
+ QUALITIES = ('lq', 'mq', 'hq', 'hd')
+ quality_key = qualities(QUALITIES)
+
+ for format_id, format_list in bitrates.items():
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_url in format_list:
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ continue
+ f = {
+ 'url': format_url,
+ }
+ f_id = format_id
+ for quality in QUALITIES:
+ if '%s.mp4' % quality in format_url:
+ f_id += '-%s' % quality
+ f.update({
+ 'quality': quality_key(quality),
+ 'format_note': quality.upper(),
+ })
+ break
+ f['format_id'] = f_id
+ formats.append(f)
- formats = []
- for format_id, format_list in bitrates.items():
- if not isinstance(format_list, list):
- continue
- for format_url in format_list:
- format_url = url_or_none(format_url)
- if not format_url:
- continue
- f = {
- 'url': format_url,
- }
- f_id = format_id
- for quality in QUALITIES:
- if '%s.mp4' % quality in format_url:
- f_id += '-%s' % quality
- f.update({
- 'quality': quality_key(quality),
- 'format_note': quality.upper(),
- })
- break
- f['format_id'] = f_id
- formats.append(f)
self._sort_formats(formats)
title = self._og_search_title(
@@ -75,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='value')
duration = int_or_none(self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
+ default=duration))
return {
'id': video_id,
@@ -91,7 +133,7 @@ class NovaIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
- 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
+ 'md5': '249baab7d0104e186e78b0899c7d5f28',
'info_dict': {
'id': '1757139',
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
@@ -113,7 +155,8 @@ class NovaIE(InfoExtractor):
'params': {
# rtmp download
'skip_download': True,
- }
+ },
+ 'skip': 'gone',
}, {
# media.cms.nova.cz embed
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
@@ -128,6 +171,7 @@ class NovaIE(InfoExtractor):
'skip_download': True,
},
'add_ie': [NovaEmbedIE.ie_key()],
+ 'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
}, {
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
'only_matching': True,
@@ -152,14 +196,29 @@ class NovaIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
+ description = clean_html(self._og_search_description(webpage, default=None))
+ if site == 'novaplus':
+ upload_date = unified_strdate(self._search_regex(
+ r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
+ elif site == 'fanda':
+ upload_date = unified_strdate(self._search_regex(
+ r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
+ else:
+ upload_date = None
+
# novaplus
embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
webpage, 'embed url', default=None)
if embed_id:
- return self.url_result(
- 'https://media.cms.nova.cz/embed/%s' % embed_id,
- ie=NovaEmbedIE.ie_key(), video_id=embed_id)
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
+ 'ie_key': NovaEmbedIE.ie_key(),
+ 'id': embed_id,
+ 'description': description,
+ 'upload_date': upload_date
+ }
video_id = self._search_regex(
[r"(?:media|video_id)\s*:\s*'(\d+)'",
@@ -233,18 +292,8 @@ class NovaIE(InfoExtractor):
self._sort_formats(formats)
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
- description = clean_html(self._og_search_description(webpage, default=None))
thumbnail = config.get('poster')
- if site == 'novaplus':
- upload_date = unified_strdate(self._search_regex(
- r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
- elif site == 'fanda':
- upload_date = unified_strdate(self._search_regex(
- r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
- else:
- upload_date = None
-
return {
'id': video_id,
'display_id': display_id,
diff --git a/youtube_dl/extractor/novamov.py b/youtube_dl/extractor/novamov.py
deleted file mode 100644
index 829c71960..000000000
--- a/youtube_dl/extractor/novamov.py
+++ /dev/null
@@ -1,212 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- ExtractorError,
- NO_DEFAULT,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class NovaMovIE(InfoExtractor):
- IE_NAME = 'novamov'
- IE_DESC = 'NovaMov'
-
- _VALID_URL_TEMPLATE = r'''(?x)
- http://
- (?:
- (?:www\.)?%(host)s/(?:file|video|mobile/\#/videos)/|
- (?:(?:embed|www)\.)%(host)s/embed(?:\.php|/)?\?(?:.*?&)?\bv=
- )
- (?P<id>[a-z\d]{13})
- '''
- _VALID_URL = _VALID_URL_TEMPLATE % {'host': r'novamov\.com'}
-
- _HOST = 'www.novamov.com'
-
- _FILE_DELETED_REGEX = r'This file no longer exists on our servers!</h2>'
- _FILEKEY_REGEX = r'flashvars\.filekey=(?P<filekey>"?[^"]+"?);'
- _TITLE_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>([^<]+)</h3>'
- _DESCRIPTION_REGEX = r'(?s)<div class="v_tab blockborder rounded5" id="v_tab1">\s*<h3>[^<]+</h3><p>([^<]+)</p>'
- _URL_TEMPLATE = 'http://%s/video/%s'
-
- _TEST = None
-
- def _check_existence(self, webpage, video_id):
- if re.search(self._FILE_DELETED_REGEX, webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- url = self._URL_TEMPLATE % (self._HOST, video_id)
-
- webpage = self._download_webpage(
- url, video_id, 'Downloading video page')
-
- self._check_existence(webpage, video_id)
-
- def extract_filekey(default=NO_DEFAULT):
- filekey = self._search_regex(
- self._FILEKEY_REGEX, webpage, 'filekey', default=default)
- if filekey is not default and (filekey[0] != '"' or filekey[-1] != '"'):
- return self._search_regex(
- r'var\s+%s\s*=\s*"([^"]+)"' % re.escape(filekey), webpage, 'filekey', default=default)
- else:
- return filekey
-
- filekey = extract_filekey(default=None)
-
- if not filekey:
- fields = self._hidden_inputs(webpage)
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>.+?)\1', webpage,
- 'post url', default=url, group='url')
- if not post_url.startswith('http'):
- post_url = compat_urlparse.urljoin(url, post_url)
- request = sanitized_Request(
- post_url, urlencode_postdata(fields))
- request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- request.add_header('Referer', post_url)
- webpage = self._download_webpage(
- request, video_id, 'Downloading continue to the video page')
- self._check_existence(webpage, video_id)
-
- filekey = extract_filekey()
-
- title = self._html_search_regex(self._TITLE_REGEX, webpage, 'title')
- description = self._html_search_regex(self._DESCRIPTION_REGEX, webpage, 'description', default='', fatal=False)
-
- api_response = self._download_webpage(
- 'http://%s/api/player.api.php?key=%s&file=%s' % (self._HOST, filekey, video_id), video_id,
- 'Downloading video api response')
-
- response = compat_urlparse.parse_qs(api_response)
-
- if 'error_msg' in response:
- raise ExtractorError('%s returned error: %s' % (self.IE_NAME, response['error_msg'][0]), expected=True)
-
- video_url = response['url'][0]
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description
- }
-
-
-class WholeCloudIE(NovaMovIE):
- IE_NAME = 'wholecloud'
- IE_DESC = 'WholeCloud'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'(?:wholecloud\.net|movshare\.(?:net|sx|ag))'}
-
- _HOST = 'www.wholecloud.net'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<strong>Title:</strong> ([^<]+)</p>'
- _DESCRIPTION_REGEX = r'<strong>Description:</strong> ([^<]+)</p>'
-
- _TEST = {
- 'url': 'http://www.wholecloud.net/video/559e28be54d96',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': '559e28be54d96',
- 'ext': 'flv',
- 'title': 'dissapeared image',
- 'description': 'optical illusion dissapeared image magic illusion',
- }
- }
-
-
-class NowVideoIE(NovaMovIE):
- IE_NAME = 'nowvideo'
- IE_DESC = 'NowVideo'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'nowvideo\.(?:to|ch|ec|sx|eu|at|ag|co|li)'}
-
- _HOST = 'www.nowvideo.to'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<h4>([^<]+)</h4>'
- _DESCRIPTION_REGEX = r'</h4>\s*<p>([^<]+)</p>'
-
- _TEST = {
- 'url': 'http://www.nowvideo.sx/video/f1d6fce9a968b',
- 'md5': '12c82cad4f2084881d8bc60ee29df092',
- 'info_dict': {
- 'id': 'f1d6fce9a968b',
- 'ext': 'flv',
- 'title': 'youtubedl test video BaWjenozKc',
- 'description': 'Description',
- },
- }
-
-
-class VideoWeedIE(NovaMovIE):
- IE_NAME = 'videoweed'
- IE_DESC = 'VideoWeed'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'videoweed\.(?:es|com)'}
-
- _HOST = 'www.videoweed.es'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<h1 class="text_shadow">([^<]+)</h1>'
- _URL_TEMPLATE = 'http://%s/file/%s'
-
- _TEST = {
- 'url': 'http://www.videoweed.es/file/b42178afbea14',
- 'md5': 'abd31a2132947262c50429e1d16c1bfd',
- 'info_dict': {
- 'id': 'b42178afbea14',
- 'ext': 'flv',
- 'title': 'optical illusion dissapeared image magic illusion',
- 'description': ''
- },
- }
-
-
-class CloudTimeIE(NovaMovIE):
- IE_NAME = 'cloudtime'
- IE_DESC = 'CloudTime'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'cloudtime\.to'}
-
- _HOST = 'www.cloudtime.to'
-
- _FILE_DELETED_REGEX = r'>This file no longer exists on our servers.<'
- _TITLE_REGEX = r'<div[^>]+class=["\']video_det["\'][^>]*>\s*<strong>([^<]+)</strong>'
-
- _TEST = None
-
-
-class AuroraVidIE(NovaMovIE):
- IE_NAME = 'auroravid'
- IE_DESC = 'AuroraVid'
-
- _VALID_URL = NovaMovIE._VALID_URL_TEMPLATE % {'host': r'auroravid\.to'}
-
- _HOST = 'www.auroravid.to'
-
- _FILE_DELETED_REGEX = r'This file no longer exists on our servers!<'
-
- _TESTS = [{
- 'url': 'http://www.auroravid.to/video/4rurhn9x446jj',
- 'md5': '7205f346a52bbeba427603ba10d4b935',
- 'info_dict': {
- 'id': '4rurhn9x446jj',
- 'ext': 'flv',
- 'title': 'search engine optimization',
- 'description': 'search engine optimization is used to rank the web page in the google search engine'
- },
- 'skip': '"Invalid token" errors abound (in web interface as well as youtube-dl, there is nothing we can do about it.)'
- }, {
- 'url': 'http://www.auroravid.to/embed/?v=4rurhn9x446jj',
- 'only_matching': True,
- }]
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index c2cb85a73..e525ad928 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -12,11 +12,16 @@ from ..utils import (
ExtractorError,
fix_xml_ampersands,
int_or_none,
+ merge_dicts,
orderedSet,
parse_duration,
qualities,
+ str_or_none,
strip_jsonp,
unified_strdate,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
)
@@ -176,9 +181,122 @@ class NPOIE(NPOBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- return self._get_info(video_id)
+ return self._get_info(url, video_id) or self._get_old_info(video_id)
+
+ def _get_info(self, url, video_id):
+ token = self._download_json(
+ 'https://www.npostart.nl/api/token', video_id,
+ 'Downloading token', headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['token']
+
+ player = self._download_json(
+ 'https://www.npostart.nl/player/%s' % video_id, video_id,
+ 'Downloading player JSON', data=urlencode_postdata({
+ 'autoplay': 0,
+ 'share': 1,
+ 'pageUrl': url,
+ 'hasAdConsent': 0,
+ '_token': token,
+ }))
+
+ player_token = player['token']
+
+ drm = False
+ format_urls = set()
+ formats = []
+ for profile in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
+ streams = self._download_json(
+ 'https://start-player.npo.nl/video/%s/streams' % video_id,
+ video_id, 'Downloading %s profile JSON' % profile, fatal=False,
+ query={
+ 'profile': profile,
+ 'quality': 'npo',
+ 'tokenId': player_token,
+ 'streamType': 'broadcast',
+ })
+ if not streams:
+ continue
+ stream = streams.get('stream')
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('src'))
+ if not stream_url or stream_url in format_urls:
+ continue
+ format_urls.add(stream_url)
+ if stream.get('protection') is not None or stream.get('keySystemOptions') is not None:
+ drm = True
+ continue
+ stream_type = stream.get('type')
+ stream_ext = determine_ext(stream_url)
+ if stream_type == 'application/dash+xml' or stream_ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ stream_url, video_id, mpd_id='dash', fatal=False))
+ elif stream_type == 'application/vnd.apple.mpegurl' or stream_ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ elif re.search(r'\.isml?/Manifest', stream_url):
+ formats.extend(self._extract_ism_formats(
+ stream_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'url': stream_url,
+ })
+
+ if not formats:
+ if drm:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ return
+
+ self._sort_formats(formats)
+
+ info = {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
- def _get_info(self, video_id):
+ embed_url = url_or_none(player.get('embedUrl'))
+ if embed_url:
+ webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed page', fatal=False)
+ if webpage:
+ video = self._parse_json(
+ self._search_regex(
+ r'\bvideo\s*=\s*({.+?})\s*;', webpage, 'video',
+ default='{}'), video_id)
+ if video:
+ title = video.get('episodeTitle')
+ subtitles = {}
+ subtitles_list = video.get('subtitles')
+ if isinstance(subtitles_list, list):
+ for cc in subtitles_list:
+ cc_url = url_or_none(cc.get('src'))
+ if not cc_url:
+ continue
+ lang = str_or_none(cc.get('language')) or 'nl'
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ return merge_dicts({
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': url_or_none(
+ video.get('still_image_url') or video.get('orig_image_url')),
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': unified_timestamp(video.get('broadcastDate')),
+ 'creator': video.get('channel'),
+ 'series': video.get('title'),
+ 'episode': title,
+ 'episode_number': int_or_none(video.get('episodeNumber')),
+ 'subtitles': subtitles,
+ }, info)
+
+ return info
+
+ def _get_old_info(self, video_id):
metadata = self._download_json(
'http://e.omroep.nl/metadata/%s' % video_id,
video_id,
@@ -280,7 +398,7 @@ class NPOIE(NPOBaseIE):
# JSON
else:
video_url = stream_info.get('url')
- if not video_url or video_url in urls:
+ if not video_url or 'vodnotavailable.' in video_url or video_url in urls:
continue
urls.add(video_url)
if determine_ext(video_url) == 'm3u8':
@@ -363,7 +481,7 @@ class NPOIE(NPOBaseIE):
class NPOLiveIE(NPOBaseIE):
IE_NAME = 'npo.nl:live'
- _VALID_URL = r'https?://(?:www\.)?npo\.nl/live(?:/(?P<id>[^/?#&]+))?'
+ _VALID_URL = r'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://www.npo.nl/live/npo-1',
@@ -380,6 +498,9 @@ class NPOLiveIE(NPOBaseIE):
}, {
'url': 'http://www.npo.nl/live',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npostart.nl/live/npo-1',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py
index 1777aa10b..53acc6e57 100644
--- a/youtube_dl/extractor/npr.py
+++ b/youtube_dl/extractor/npr.py
@@ -1,24 +1,24 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlencode
from ..utils import (
int_or_none,
qualities,
+ url_or_none,
)
class NprIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?npr\.org/player/v2/mediaPlayer\.html\?.*\bid=(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?id=449974205',
+ 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more',
'info_dict': {
'id': '449974205',
'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
},
'playlist_count': 7,
}, {
- 'url': 'http://www.npr.org/player/v2/mediaPlayer.html?action=1&t=1&islist=false&id=446928052&m=446929930&live=1',
+ 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz',
'info_dict': {
'id': '446928052',
'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
@@ -32,30 +32,50 @@ class NprIE(InfoExtractor):
'duration': 402,
},
}],
+ }, {
+ # mutlimedia, not media title
+ 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
+ 'info_dict': {
+ 'id': '533198237',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '533201718',
+ 'ext': 'mp4',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ 'duration': 402,
+ },
+ }],
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ # multimedia, no formats, stream
+ 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
+ 'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
- config = self._download_json(
- 'http://api.npr.org/query?%s' % compat_urllib_parse_urlencode({
+ story = self._download_json(
+ 'http://api.npr.org/query', playlist_id, query={
'id': playlist_id,
- 'fields': 'titles,audio,show',
+ 'fields': 'audio,multimedia,title',
'format': 'json',
'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
- }), playlist_id)
-
- story = config['list']['story'][0]
+ })['list']['story'][0]
+ playlist_title = story.get('title', {}).get('$text')
- KNOWN_FORMATS = ('threegp', 'mp4', 'mp3')
+ KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
quality = qualities(KNOWN_FORMATS)
entries = []
- for audio in story.get('audio', []):
- title = audio.get('title', {}).get('$text')
- duration = int_or_none(audio.get('duration', {}).get('$text'))
+ for media in story.get('audio', []) + story.get('multimedia', []):
+ media_id = media['id']
+
formats = []
- for format_id, formats_entry in audio.get('format', {}).items():
+ for format_id, formats_entry in media.get('format', {}).items():
if not formats_entry:
continue
if isinstance(formats_entry, list):
@@ -64,19 +84,41 @@ class NprIE(InfoExtractor):
if not format_url:
continue
if format_id in KNOWN_FORMATS:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'ext': formats_entry.get('type'),
- 'quality': quality(format_id),
- })
+ if format_id == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id == 'smil':
+ smil_formats = self._extract_smil_formats(
+ format_url, media_id, transform_source=lambda s: s.replace(
+ 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
+ self._check_formats(smil_formats, media_id)
+ formats.extend(smil_formats)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+ for stream_id, stream_entry in media.get('stream', {}).items():
+ if not isinstance(stream_entry, dict):
+ continue
+ if stream_id != 'hlsUrl':
+ continue
+ stream_url = url_or_none(stream_entry.get('$text'))
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, stream_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
self._sort_formats(formats)
+
entries.append({
- 'id': audio['id'],
- 'title': title,
- 'duration': duration,
+ 'id': media_id,
+ 'title': media.get('title', {}).get('$text') or playlist_title,
+ 'thumbnail': media.get('altImageUrl', {}).get('$text'),
+ 'duration': int_or_none(media.get('duration', {}).get('$text')),
'formats': formats,
})
- playlist_title = story.get('title', {}).get('$text')
return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index a231735fb..94115534b 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -12,6 +12,7 @@ from ..utils import (
ExtractorError,
int_or_none,
JSON_LD_RE,
+ js_to_json,
NO_DEFAULT,
parse_age_limit,
parse_duration,
@@ -45,8 +46,8 @@ class NRKBaseIE(InfoExtractor):
entries = []
conviva = data.get('convivaStatistics') or {}
- live = (data.get('mediaElementType') == 'Live' or
- data.get('isLive') is True or conviva.get('isLive'))
+ live = (data.get('mediaElementType') == 'Live'
+ or data.get('isLive') is True or conviva.get('isLive'))
def make_title(t):
return self._live_title(t) if live else t
@@ -105,6 +106,7 @@ class NRKBaseIE(InfoExtractor):
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
+ 'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
@@ -211,13 +213,13 @@ class NRKIE(NRKBaseIE):
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
- 'md5': '2f7f6eeb2aacdd99885f355428715cfa',
+ 'md5': '706f34cdf1322577589e369e522b50ef',
'info_dict': {
'id': '150533',
'ext': 'mp4',
'title': 'Dompap og andre fugler i Piip-Show',
'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
- 'duration': 263,
+ 'duration': 262,
}
}, {
# audio
@@ -248,24 +250,36 @@ class NRKTVIE(NRKBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
- (?:serie/[^/]+|program)/
+ (?:serie(?:/[^/]+){1,2}|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{
+ 'url': 'https://tv.nrk.no/program/MDDP12000117',
+ 'md5': '8270824df46ec629b66aeaa5796b36fb',
+ 'info_dict': {
+ 'id': 'MDDP12000117AA',
+ 'ext': 'mp4',
+ 'title': 'Alarm Trolltunga',
+ 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
+ 'duration': 2223,
+ 'age_limit': 6,
+ },
+ }, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
- 'md5': '4e9ca6629f09e588ed240fb11619922a',
+ 'md5': '9a167e54d04671eb6317a37b7bc8a280',
'info_dict': {
'id': 'MUHH48000314AA',
'ext': 'mp4',
'title': '20 spørsmål 23.05.2014',
'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
'duration': 1741,
- 'series': '20 spørsmål - TV',
+ 'series': '20 spørsmål',
'episode': '23.05.2014',
},
+ 'skip': 'NoProgramRights',
}, {
'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': {
@@ -301,7 +315,7 @@ class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
- 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
@@ -314,7 +328,7 @@ class NRKTVIE(NRKBaseIE):
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
- 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
@@ -326,7 +340,7 @@ class NRKTVIE(NRKBaseIE):
'info_dict': {
'id': 'MSPO40010515',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
- 'description': 'md5:c03aba1e917561eface5214020551b7a',
+ 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
},
'expected_warnings': ['Video is geo restricted'],
}, {
@@ -362,12 +376,32 @@ class NRKTVIE(NRKBaseIE):
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
+ 'only_matching': True,
}]
class NRKTVEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
- _TEST = {
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
+ 'info_dict': {
+ 'id': 'MUHH36005220BA',
+ 'ext': 'mp4',
+ 'title': 'Kro, krig og kjærlighet 2:6',
+ 'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
+ 'duration': 1563,
+ 'series': 'Hellums kro',
+ 'season_number': 1,
+ 'episode_number': 2,
+ 'episode': '2:6',
+ 'age_limit': 6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
'info_dict': {
'id': 'MSUI14000816AA',
@@ -383,7 +417,8 @@ class NRKTVEpisodeIE(InfoExtractor):
'params': {
'skip_download': True,
},
- }
+ 'skip': 'ProgramRightsHasExpired',
+ }]
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -403,21 +438,35 @@ class NRKTVSerieBaseIE(InfoExtractor):
def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json(
self._search_regex(
- r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', webpage, 'config',
- default='{}' if not fatal else NO_DEFAULT),
- display_id, fatal=False)
+ (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
+ r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
+ webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
+ display_id, fatal=False, transform_source=js_to_json)
if not config:
return
- return try_get(config, lambda x: x['series'], dict)
+ return try_get(
+ config,
+ (lambda x: x['initialState']['series'], lambda x: x['series']),
+ dict)
+
+ def _extract_seasons(self, seasons):
+ if not isinstance(seasons, list):
+ return []
+ entries = []
+ for season in seasons:
+ entries.extend(self._extract_episodes(season))
+ return entries
def _extract_episodes(self, season):
- entries = []
if not isinstance(season, dict):
- return entries
- episodes = season.get('episodes')
- if not isinstance(episodes, list):
- return entries
- for episode in episodes:
+ return []
+ return self._extract_entries(season.get('episodes'))
+
+ def _extract_entries(self, entry_list):
+ if not isinstance(entry_list, list):
+ return []
+ entries = []
+ for episode in entry_list:
nrk_id = episode.get('prfId')
if not nrk_id or not isinstance(nrk_id, compat_str):
continue
@@ -462,7 +511,15 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{
- # new layout
+ 'url': 'https://tv.nrk.no/serie/blank',
+ 'info_dict': {
+ 'id': 'blank',
+ 'title': 'Blank',
+ 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # new layout, seasons
'url': 'https://tv.nrk.no/serie/backstage',
'info_dict': {
'id': 'backstage',
@@ -471,20 +528,21 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
},
'playlist_mincount': 60,
}, {
- # old layout
+ # new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
- 'playlist_mincount': 9,
+ 'playlist_mincount': 10,
}, {
- 'url': 'http://tv.nrksuper.no/serie/labyrint',
+ # old layout
+ 'url': 'https://tv.nrksuper.no/serie/labyrint',
'info_dict': {
'id': 'labyrint',
'title': 'Labyrint',
- 'description': 'md5:58afd450974c89e27d5a19212eee7115',
+ 'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
},
'playlist_mincount': 3,
}, {
@@ -517,11 +575,12 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
description = try_get(
series, lambda x: x['titles']['subtitle'], compat_str)
entries = []
- for season in series['seasons']:
- entries.extend(self._extract_episodes(season))
+ entries.extend(self._extract_seasons(series.get('seasons')))
+ entries.extend(self._extract_entries(series.get('instalments')))
+ entries.extend(self._extract_episodes(series.get('extraMaterial')))
return self.playlist_result(entries, series_id, title, description)
- # Old layout (e.g. https://tv.nrk.no/serie/groenn-glede)
+ # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
entries = [
self.url_result(
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
@@ -533,6 +592,9 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
'seriestitle', webpage,
'title', default=None) or self._og_search_title(
webpage, fatal=False)
+ if title:
+ title = self._search_regex(
+ r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
description = self._html_search_meta(
'series_description', webpage,
@@ -593,7 +655,7 @@ class NRKPlaylistIE(NRKPlaylistBaseIE):
'title': 'Rivertonprisen til Karin Fossum',
'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
},
- 'playlist_count': 5,
+ 'playlist_count': 2,
}]
def _extract_title(self, webpage):
@@ -626,7 +688,7 @@ class NRKSkoleIE(InfoExtractor):
_TESTS = [{
'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
- 'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6',
+ 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
'info_dict': {
'id': '6021',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/nrl.py b/youtube_dl/extractor/nrl.py
new file mode 100644
index 000000000..22a2df8d3
--- /dev/null
+++ b/youtube_dl/extractor/nrl.py
@@ -0,0 +1,30 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class NRLTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
+ 'info_dict': {
+ 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
+ 'ext': 'mp4',
+ 'title': 'Match Highlights: Titans v Knights',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ q_data = self._parse_json(self._html_search_regex(
+ r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
+ ooyala_id = q_data['videoId']
+ return self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
diff --git a/youtube_dl/extractor/ntvcojp.py b/youtube_dl/extractor/ntvcojp.py
new file mode 100644
index 000000000..0c8221b22
--- /dev/null
+++ b/youtube_dl/extractor/ntvcojp.py
@@ -0,0 +1,49 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+)
+
+
+class NTVCoJpCUIE(InfoExtractor):
+ IE_NAME = 'cu.ntv.co.jp'
+ IE_DESC = 'Nippon Television Network'
+ _VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
+ 'info_dict': {
+ 'id': '5978891207001',
+ 'ext': 'mp4',
+ 'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
+ 'upload_date': '20181213',
+ 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+ 'uploader_id': '3855502814001',
+ 'timestamp': 1544669941,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ player_config = self._parse_json(self._search_regex(
+ r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
+ webpage, 'player config'), display_id, js_to_json)
+ video_id = player_config['videoId']
+ account_id = player_config.get('account') or '3855502814001'
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
+ 'description': self._html_search_meta(['description', 'og:description'], webpage),
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py
index 4f9cedb84..c47d1dfa4 100644
--- a/youtube_dl/extractor/ntvru.py
+++ b/youtube_dl/extractor/ntvru.py
@@ -3,9 +3,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- clean_html,
- xpath_text,
int_or_none,
+ strip_or_none,
+ unescapeHTML,
+ xpath_text,
)
@@ -47,10 +48,10 @@ class NTVRuIE(InfoExtractor):
'duration': 1496,
},
}, {
- 'url': 'http://www.ntv.ru/kino/Koma_film',
- 'md5': 'f825770930937aa7e5aca0dc0d29319a',
+ 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
+ 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4',
'info_dict': {
- 'id': '1007609',
+ 'id': '1126480',
'ext': 'mp4',
'title': 'Остросюжетный фильм «Кома»',
'description': 'Остросюжетный фильм «Кома»',
@@ -68,6 +69,10 @@ class NTVRuIE(InfoExtractor):
'thumbnail': r're:^http://.*\.jpg',
'duration': 2590,
},
+ }, {
+ # Schemeless file URL
+ 'url': 'https://www.ntv.ru/video/1797442',
+ 'only_matching': True,
}]
_VIDEO_ID_REGEXES = [
@@ -96,37 +101,31 @@ class NTVRuIE(InfoExtractor):
'http://www.ntv.ru/vi%s/' % video_id,
video_id, 'Downloading video XML')
- title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
- description = clean_html(xpath_text(player, './data/description', 'description'))
+ title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
video = player.find('./data/video')
- video_id = xpath_text(video, './id', 'video id')
- thumbnail = xpath_text(video, './splash', 'thumbnail')
- duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
- view_count = int_or_none(xpath_text(video, './views', 'view count'))
-
- token = self._download_webpage(
- 'http://stat.ntv.ru/services/access/token',
- video_id, 'Downloading access token')
formats = []
for format_id in ['', 'hi', 'webm']:
- file_ = video.find('./%sfile' % format_id)
- if file_ is None:
+ file_ = xpath_text(video, './%sfile' % format_id)
+ if not file_:
continue
- size = video.find('./%ssize' % format_id)
+ if file_.startswith('//'):
+ file_ = self._proto_relative_url(file_)
+ elif not file_.startswith('http'):
+ file_ = 'http://media.ntv.ru/vod/' + file_
formats.append({
- 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
- 'filesize': int_or_none(size.text if size is not None else None),
+ 'url': file_,
+ 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
})
self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': xpath_text(video, './id'),
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
+ 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
+ 'thumbnail': xpath_text(video, './splash'),
+ 'duration': int_or_none(xpath_text(video, './totaltime')),
+ 'view_count': int_or_none(xpath_text(video, './views')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
index 2bb77ab24..fc78ca56c 100644
--- a/youtube_dl/extractor/nytimes.py
+++ b/youtube_dl/extractor/nytimes.py
@@ -69,10 +69,10 @@ class NYTimesBaseIE(InfoExtractor):
'width': int_or_none(video.get('width')),
'height': int_or_none(video.get('height')),
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
- 'tbr': int_or_none(video.get('bitrate'), 1000),
+ 'tbr': int_or_none(video.get('bitrate'), 1000) or None,
'ext': ext,
})
- self._sort_formats(formats)
+ self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
thumbnails = []
for image in video_data.get('images', []):
diff --git a/youtube_dl/extractor/nzz.py b/youtube_dl/extractor/nzz.py
index 2d352f53f..61ee77adb 100644
--- a/youtube_dl/extractor/nzz.py
+++ b/youtube_dl/extractor/nzz.py
@@ -11,20 +11,27 @@ from ..utils import (
class NZZIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
'info_dict': {
'id': '9153',
},
'playlist_mincount': 6,
- }
+ }, {
+ 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
+ 'info_dict': {
+ 'id': '1368112',
+ },
+ 'playlist_count': 1,
+ }]
def _real_extract(self, url):
page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id)
entries = []
- for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+ for player_element in re.findall(
+ r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
player_params = extract_attributes(player_element)
if player_params.get('data-type') not in ('kaltura_singleArticle',):
self.report_warning('Unsupported player type')
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
index 190d8af4d..7ed9fac55 100644
--- a/youtube_dl/extractor/odnoklassniki.py
+++ b/youtube_dl/extractor/odnoklassniki.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
@@ -115,8 +117,19 @@ class OdnoklassnikiIE(InfoExtractor):
}, {
'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
'only_matching': True,
+ }, {
+ # Paid video
+ 'url': 'https://ok.ru/video/954886983203',
+ 'only_matching': True,
}]
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
def _real_extract(self, url):
start_time = int_or_none(compat_parse_qs(
compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
@@ -244,6 +257,11 @@ class OdnoklassnikiIE(InfoExtractor):
'ext': 'flv',
})
+ if not formats:
+ payment_info = metadata.get('paymentInfo')
+ if payment_info:
+ raise ExtractorError('This video is paid, subscribe to download it', expected=True)
+
self._sort_formats(formats)
info['formats'] = formats
diff --git a/youtube_dl/extractor/once.py b/youtube_dl/extractor/once.py
index 8ae5fadd8..3e44b7829 100644
--- a/youtube_dl/extractor/once.py
+++ b/youtube_dl/extractor/once.py
@@ -21,7 +21,7 @@ class OnceIE(InfoExtractor):
progressive_formats = []
for adaptive_format in formats:
# Prevent advertisement from embedding into m3u8 playlist (see
- # https://github.com/rg3/youtube-dl/issues/8893#issuecomment-199912684)
+ # https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
adaptive_format['url'] = re.sub(
r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
rendition_id = self._search_regex(
diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py
index 58da1bc27..e55b2ac89 100644
--- a/youtube_dl/extractor/onet.py
+++ b/youtube_dl/extractor/onet.py
@@ -20,6 +20,8 @@ from ..utils import (
class OnetBaseIE(InfoExtractor):
+ _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/'
+
def _search_mvp_id(self, webpage):
return self._search_regex(
r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
@@ -45,7 +47,7 @@ class OnetBaseIE(InfoExtractor):
video = response['result'].get('0')
formats = []
- for _, formats_dict in video['formats'].items():
+ for format_type, formats_dict in video['formats'].items():
if not isinstance(formats_dict, dict):
continue
for format_id, format_list in formats_dict.items():
@@ -56,21 +58,31 @@ class OnetBaseIE(InfoExtractor):
if not video_url:
continue
ext = determine_ext(video_url)
- if format_id == 'ism':
+ if format_id.startswith('ism'):
formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False))
elif ext == 'mpd':
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
+ elif format_id.startswith('hls'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
else:
- formats.append({
+ http_f = {
'url': video_url,
'format_id': format_id,
- 'height': int_or_none(f.get('vertical_resolution')),
- 'width': int_or_none(f.get('horizontal_resolution')),
'abr': float_or_none(f.get('audio_bitrate')),
- 'vbr': float_or_none(f.get('video_bitrate')),
- })
+ }
+ if format_type == 'audio':
+ http_f['vcodec'] = 'none'
+ else:
+ http_f.update({
+ 'height': int_or_none(f.get('vertical_resolution')),
+ 'width': int_or_none(f.get('horizontal_resolution')),
+ 'vbr': float_or_none(f.get('video_bitrate')),
+ })
+ formats.append(http_f)
self._sort_formats(formats)
meta = video.get('meta', {})
@@ -105,12 +117,12 @@ class OnetMVPIE(OnetBaseIE):
class OnetIE(OnetBaseIE):
- _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
IE_NAME = 'onet.tv'
- _TEST = {
+ _TESTS = [{
'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
- 'md5': 'e3ffbf47590032ac3f27249204173d50',
+ 'md5': '436102770fb095c75b8bb0392d3da9ff',
'info_dict': {
'id': 'qbpyqc',
'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
@@ -120,7 +132,10 @@ class OnetIE(OnetBaseIE):
'upload_date': '20160705',
'timestamp': 1467721580,
},
- }
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -140,18 +155,21 @@ class OnetIE(OnetBaseIE):
class OnetChannelIE(OnetBaseIE):
- _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)'
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)'
IE_NAME = 'onet.tv:channel'
- _TEST = {
+ _TESTS = [{
'url': 'http://onet.tv/k/openerfestival',
'info_dict': {
'id': 'openerfestival',
- 'title': 'Open\'er Festival Live',
- 'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.',
+ 'title': "Open'er Festival",
+ 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.",
},
- 'playlist_mincount': 46,
- }
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
channel_id = self._match_id(url)
@@ -173,7 +191,7 @@ class OnetChannelIE(OnetBaseIE):
'Downloading channel %s - add --no-playlist to just download video %s' % (
channel_id, video_name))
matches = re.findall(
- r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)',
+ r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
webpage)
entries = [
self.url_result(video_link, OnetIE.ie_key())
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py
index c6e3d5640..cf5c39e66 100644
--- a/youtube_dl/extractor/onionstudios.py
+++ b/youtube_dl/extractor/onionstudios.py
@@ -4,12 +4,8 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- float_or_none,
- mimetype2ext,
-)
+from ..compat import compat_str
+from ..utils import js_to_json
class OnionStudiosIE(InfoExtractor):
@@ -17,14 +13,16 @@ class OnionStudiosIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
- 'md5': '719d1f8c32094b8c33902c17bcae5e34',
+ 'md5': '5a118d466d62b5cd03647cf2c593977f',
'info_dict': {
- 'id': '2937',
+ 'id': '3459881',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
+ 'description': 'md5:545299bda6abf87e5ec666548c6a9448',
'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'The A.V. Club',
- 'uploader_id': 'the-av-club',
+ 'uploader': 'a.v. club',
+ 'upload_date': '20150619',
+ 'timestamp': 1434728546,
},
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
@@ -44,38 +42,12 @@ class OnionStudiosIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- video_data = self._download_json(
- 'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
-
- title = video_data['title']
-
- formats = []
- for source in video_data.get('sources', []):
- source_url = source.get('url')
- if not source_url:
- continue
- ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- tbr = int_or_none(source.get('bitrate'))
- formats.append({
- 'format_id': ext + ('-%d' % tbr if tbr else ''),
- 'url': source_url,
- 'width': int_or_none(source.get('width')),
- 'tbr': tbr,
- 'ext': ext,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': video_data.get('poster_url'),
- 'uploader': video_data.get('channel_name'),
- 'uploader_id': video_data.get('channel_slug'),
- 'duration': float_or_none(video_data.get('duration', 1000)),
- 'tags': video_data.get('tags'),
- 'formats': formats,
- }
+ webpage = self._download_webpage(
+ 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
+ video_id)
+ mcp_id = compat_str(self._parse_json(self._search_regex(
+ r'window\.mcpMapping\s*=\s*({.+?});', webpage,
+ 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
+ return self.url_result(
+ 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
+ 'KinjaEmbed', mcp_id)
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index ad8bf03f8..eb957b8fe 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -1,12 +1,12 @@
from __future__ import unicode_literals
+import base64
import re
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
compat_str,
- compat_urllib_parse_urlencode,
)
from ..utils import (
determine_ext,
@@ -21,9 +21,9 @@ from ..utils import (
class OoyalaBaseIE(InfoExtractor):
_PLAYER_BASE = 'http://player.ooyala.com/'
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
- _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
+ _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s'
- def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None):
+ def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None):
content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code']
@@ -31,59 +31,62 @@ class OoyalaBaseIE(InfoExtractor):
title = metadata['title']
auth_data = self._download_json(
- self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
- compat_urllib_parse_urlencode({
- 'domain': domain,
+ self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code),
+ video_id, headers=self.geo_verification_headers(), query={
+ 'domain': domain or 'player.ooyala.com',
'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
'embedToken': embed_token,
- }), video_id)
-
- cur_auth_data = auth_data['authorization_data'][embed_code]
+ })['authorization_data'][embed_code]
urls = []
formats = []
- if cur_auth_data['authorized']:
- for stream in cur_auth_data['streams']:
- url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
- if not url_data:
- continue
- s_url = compat_b64decode(url_data).decode('utf-8')
- if not s_url or s_url in urls:
- continue
- urls.append(s_url)
- ext = determine_ext(s_url, None)
- delivery_type = stream.get('delivery_type')
- if delivery_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif delivery_type == 'hds' or ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
- elif delivery_type == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- s_url, embed_code, mpd_id='dash', fatal=False))
- elif delivery_type == 'smooth':
- self._extract_ism_formats(
- s_url, embed_code, ism_id='mss', fatal=False)
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- s_url, embed_code, fatal=False))
- else:
- formats.append({
- 'url': s_url,
- 'ext': ext or delivery_type,
- 'vcodec': stream.get('video_codec'),
- 'format_id': delivery_type,
- 'width': int_or_none(stream.get('width')),
- 'height': int_or_none(stream.get('height')),
- 'abr': int_or_none(stream.get('audio_bitrate')),
- 'vbr': int_or_none(stream.get('video_bitrate')),
- 'fps': float_or_none(stream.get('framerate')),
- })
- else:
+ streams = auth_data.get('streams') or [{
+ 'delivery_type': 'hls',
+ 'url': {
+ 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(),
+ }
+ }]
+ for stream in streams:
+ url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
+ if not url_data:
+ continue
+ s_url = compat_b64decode(url_data).decode('utf-8')
+ if not s_url or s_url in urls:
+ continue
+ urls.append(s_url)
+ ext = determine_ext(s_url, None)
+ delivery_type = stream.get('delivery_type')
+ if delivery_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif delivery_type == 'hds' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+ elif delivery_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ s_url, embed_code, mpd_id='dash', fatal=False))
+ elif delivery_type == 'smooth':
+ self._extract_ism_formats(
+ s_url, embed_code, ism_id='mss', fatal=False)
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ s_url, embed_code, fatal=False))
+ else:
+ formats.append({
+ 'url': s_url,
+ 'ext': ext or delivery_type,
+ 'vcodec': stream.get('video_codec'),
+ 'format_id': delivery_type,
+ 'width': int_or_none(stream.get('width')),
+ 'height': int_or_none(stream.get('height')),
+ 'abr': int_or_none(stream.get('audio_bitrate')),
+ 'vbr': int_or_none(stream.get('video_bitrate')),
+ 'fps': float_or_none(stream.get('framerate')),
+ })
+ if not formats and not auth_data.get('authorized'):
raise ExtractorError('%s said: %s' % (
- self.IE_NAME, cur_auth_data['message']), expected=True)
+ self.IE_NAME, auth_data['message']), expected=True)
self._sort_formats(formats)
subtitles = {}
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
index d51d3d2b4..1d6dd175b 100644
--- a/youtube_dl/extractor/openload.py
+++ b/youtube_dl/extractor/openload.py
@@ -3,21 +3,17 @@ from __future__ import unicode_literals
import json
import os
-import re
import subprocess
import tempfile
-from .common import InfoExtractor
from ..compat import (
compat_urlparse,
compat_kwargs,
)
from ..utils import (
check_executable,
- determine_ext,
encodeArgument,
ExtractorError,
- get_element_by_id,
get_exe_version,
is_outdated_version,
std_headers,
@@ -42,9 +38,9 @@ def cookie_to_dict(cookie):
if cookie.discard is not None:
cookie_dict['discard'] = cookie.discard
try:
- if (cookie.has_nonstandard_attr('httpOnly') or
- cookie.has_nonstandard_attr('httponly') or
- cookie.has_nonstandard_attr('HttpOnly')):
+ if (cookie.has_nonstandard_attr('httpOnly')
+ or cookie.has_nonstandard_attr('httponly')
+ or cookie.has_nonstandard_attr('HttpOnly')):
cookie_dict['httponly'] = True
except TypeError:
pass
@@ -240,6 +236,8 @@ class PhantomJSwrapper(object):
self._load_cookies()
return (html, encodeArgument(out))
+<<<<<<< HEAD
+=======
class OpenloadIE(InfoExtractor):
@@ -377,3 +375,4 @@ class OpenloadIE(InfoExtractor):
'http_headers': headers,
}
return info_dict
+>>>>>>> master
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index c1fb580ca..700ce448c 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -6,15 +6,18 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ clean_html,
determine_ext,
float_or_none,
HEADRequest,
int_or_none,
orderedSet,
remove_end,
+ str_or_none,
strip_jsonp,
unescapeHTML,
unified_strdate,
+ url_or_none,
)
@@ -68,26 +71,39 @@ class ORFTVthekIE(InfoExtractor):
webpage, 'playlist', group='json'),
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
- def quality_to_int(s):
- m = re.search('([0-9]+)', s)
- if m is None:
- return -1
- return int(m.group(1))
-
entries = []
for sd in data_jsb:
video_id, title = sd.get('id'), sd.get('title')
if not video_id or not title:
continue
video_id = compat_str(video_id)
- formats = [{
- 'preference': -10 if fd['delivery'] == 'hls' else None,
- 'format_id': '%s-%s-%s' % (
- fd['delivery'], fd['quality'], fd['quality_string']),
- 'url': fd['src'],
- 'protocol': fd['protocol'],
- 'quality': quality_to_int(fd['quality']),
- } for fd in sd['sources']]
+ formats = []
+ for fd in sd['sources']:
+ src = url_or_none(fd.get('src'))
+ if not src:
+ continue
+ format_id_list = []
+ for key in ('delivery', 'quality', 'quality_string'):
+ value = fd.get(key)
+ if value:
+ format_id_list.append(value)
+ format_id = '-'.join(format_id_list)
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ if any('/geoprotection' in f['url'] for f in m3u8_formats):
+ self.raise_geo_restricted()
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': src,
+ 'protocol': fd.get('protocol'),
+ })
# Check for geoblocking.
# There is a property is_geoprotection, but that's always false
@@ -146,47 +162,53 @@ class ORFTVthekIE(InfoExtractor):
class ORFRadioIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- station = mobj.group('station')
show_date = mobj.group('date')
show_id = mobj.group('show')
- if station == 'fm4':
- show_id = '4%s' % show_id
-
data = self._download_json(
- 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),
- show_id
- )
-
- def extract_entry_dict(info, title, subtitle):
- return {
- 'id': info['loopStreamId'].replace('.mp3', ''),
- 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),
- 'title': title,
- 'description': subtitle,
- 'duration': (info['end'] - info['start']) / 1000,
- 'timestamp': info['start'] / 1000,
- 'ext': 'mp3'
- }
+ 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
+ % (self._API_STATION, show_id, show_date), show_id)
- entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
+ entries = []
+ for info in data['streams']:
+ loop_stream_id = str_or_none(info.get('loopStreamId'))
+ if not loop_stream_id:
+ continue
+ title = str_or_none(data.get('title'))
+ if not title:
+ continue
+ start = int_or_none(info.get('start'), scale=1000)
+ end = int_or_none(info.get('end'), scale=1000)
+ duration = end - start if end and start else None
+ entries.append({
+ 'id': loop_stream_id.replace('.mp3', ''),
+ 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
+ 'title': title,
+ 'description': clean_html(data.get('subtitle')),
+ 'duration': duration,
+ 'timestamp': start,
+ 'ext': 'mp3',
+ 'series': data.get('programTitle'),
+ })
return {
'_type': 'playlist',
'id': show_id,
- 'title': data['title'],
- 'description': data['subtitle'],
- 'entries': entries
+ 'title': data.get('title'),
+ 'description': clean_html(data.get('subtitle')),
+ 'entries': entries,
}
class ORFFM4IE(ORFRadioIE):
IE_NAME = 'orf:fm4'
IE_DESC = 'radio FM4'
- _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
+ _API_STATION = 'fm4'
+ _LOOP_STATION = 'fm4'
_TEST = {
- 'url': 'http://fm4.orf.at/player/20170107/CC',
+ 'url': 'http://fm4.orf.at/player/20170107/4CC',
'md5': '2b0be47375432a7ef104453432a19212',
'info_dict': {
'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
@@ -197,7 +219,138 @@ class ORFFM4IE(ORFRadioIE):
'timestamp': 1483819257,
'upload_date': '20170107',
},
- 'skip': 'Shows from ORF radios are only available for 7 days.'
+ 'skip': 'Shows from ORF radios are only available for 7 days.',
+ 'only_matching': True,
+ }
+
+
+class ORFNOEIE(ORFRadioIE):
+ IE_NAME = 'orf:noe'
+ IE_DESC = 'Radio Niederösterreich'
+ _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'noe'
+ _LOOP_STATION = 'oe2n'
+
+ _TEST = {
+ 'url': 'https://noe.orf.at/player/20200423/NGM',
+ 'only_matching': True,
+ }
+
+
+class ORFWIEIE(ORFRadioIE):
+ IE_NAME = 'orf:wien'
+ IE_DESC = 'Radio Wien'
+ _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'wie'
+ _LOOP_STATION = 'oe2w'
+
+ _TEST = {
+ 'url': 'https://wien.orf.at/player/20200423/WGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFBGLIE(ORFRadioIE):
+ IE_NAME = 'orf:burgenland'
+ IE_DESC = 'Radio Burgenland'
+ _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'bgl'
+ _LOOP_STATION = 'oe2b'
+
+ _TEST = {
+ 'url': 'https://burgenland.orf.at/player/20200423/BGM',
+ 'only_matching': True,
+ }
+
+
+class ORFOOEIE(ORFRadioIE):
+ IE_NAME = 'orf:oberoesterreich'
+ IE_DESC = 'Radio Oberösterreich'
+ _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'ooe'
+ _LOOP_STATION = 'oe2o'
+
+ _TEST = {
+ 'url': 'https://ooe.orf.at/player/20200423/OGMO',
+ 'only_matching': True,
+ }
+
+
+class ORFSTMIE(ORFRadioIE):
+ IE_NAME = 'orf:steiermark'
+ IE_DESC = 'Radio Steiermark'
+ _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'stm'
+ _LOOP_STATION = 'oe2st'
+
+ _TEST = {
+ 'url': 'https://steiermark.orf.at/player/20200423/STGMS',
+ 'only_matching': True,
+ }
+
+
+class ORFKTNIE(ORFRadioIE):
+ IE_NAME = 'orf:kaernten'
+ IE_DESC = 'Radio Kärnten'
+ _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'ktn'
+ _LOOP_STATION = 'oe2k'
+
+ _TEST = {
+ 'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
+ 'only_matching': True,
+ }
+
+
+class ORFSBGIE(ORFRadioIE):
+ IE_NAME = 'orf:salzburg'
+ IE_DESC = 'Radio Salzburg'
+ _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'sbg'
+ _LOOP_STATION = 'oe2s'
+
+ _TEST = {
+ 'url': 'https://salzburg.orf.at/player/20200423/SGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFTIRIE(ORFRadioIE):
+ IE_NAME = 'orf:tirol'
+ IE_DESC = 'Radio Tirol'
+ _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'tir'
+ _LOOP_STATION = 'oe2t'
+
+ _TEST = {
+ 'url': 'https://tirol.orf.at/player/20200423/TGUMO',
+ 'only_matching': True,
+ }
+
+
+class ORFVBGIE(ORFRadioIE):
+ IE_NAME = 'orf:vorarlberg'
+ IE_DESC = 'Radio Vorarlberg'
+ _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'vbg'
+ _LOOP_STATION = 'oe2v'
+
+ _TEST = {
+ 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFOE3IE(ORFRadioIE):
+ IE_NAME = 'orf:oe3'
+ IE_DESC = 'Radio Österreich 3'
+ _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'oe3'
+ _LOOP_STATION = 'oe3'
+
+ _TEST = {
+ 'url': 'https://oe3.orf.at/player/20200424/3WEK',
+ 'only_matching': True,
}
@@ -205,6 +358,8 @@ class ORFOE1IE(ORFRadioIE):
IE_NAME = 'orf:oe1'
IE_DESC = 'Radio Österreich 1'
_VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'oe1'
+ _LOOP_STATION = 'oe1'
_TEST = {
'url': 'http://oe1.orf.at/player/20170108/456544',
diff --git a/youtube_dl/extractor/outsidetv.py b/youtube_dl/extractor/outsidetv.py
new file mode 100644
index 000000000..c5333b08c
--- /dev/null
+++ b/youtube_dl/extractor/outsidetv.py
@@ -0,0 +1,28 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class OutsideTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?outsidetv\.com/(?:[^/]+/)*?play/[a-zA-Z0-9]{8}/\d+/\d+/(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'http://www.outsidetv.com/category/snow/play/ZjQYboH6/1/10/Hdg0jukV/4',
+ 'md5': '192d968fedc10b2f70ec31865ffba0da',
+ 'info_dict': {
+ 'id': 'Hdg0jukV',
+ 'ext': 'mp4',
+ 'title': 'Home - Jackson Ep 1 | Arbor Snowboards',
+ 'description': 'md5:41a12e94f3db3ca253b04bb1e8d8f4cd',
+ 'upload_date': '20181225',
+ 'timestamp': 1545742800,
+ }
+ }, {
+ 'url': 'http://www.outsidetv.com/home/play/ZjQYboH6/1/10/Hdg0jukV/4',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ jw_media_id = self._match_id(url)
+ return self.url_result(
+ 'jwplatform:' + jw_media_id, 'JWPlatform', jw_media_id)
diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dl/extractor/packtpub.py
index 56a2a1083..11ad3b3b8 100644
--- a/youtube_dl/extractor/packtpub.py
+++ b/youtube_dl/extractor/packtpub.py
@@ -5,28 +5,29 @@ import re
from .common import InfoExtractor
from ..compat import (
- compat_str,
+ # compat_str,
compat_HTTPError,
)
from ..utils import (
clean_html,
ExtractorError,
- remove_end,
+ # remove_end,
+ str_or_none,
strip_or_none,
unified_timestamp,
- urljoin,
+ # urljoin,
)
class PacktPubBaseIE(InfoExtractor):
- _PACKT_BASE = 'https://www.packtpub.com'
- _MAPT_REST = '%s/mapt-rest' % _PACKT_BASE
+ # _PACKT_BASE = 'https://www.packtpub.com'
+ _STATIC_PRODUCTS_BASE = 'https://static.packt-cdn.com/products/'
class PacktPubIE(PacktPubBaseIE):
- _VALID_URL = r'https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>\d+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<course_id>\d+)/(?P<chapter_id>[^/]+)/(?P<id>[^/]+)(?:/(?P<display_id>[^/?&#]+))?'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215/20528/20530/Project+Intro',
'md5': '1e74bd6cfd45d7d07666f4684ef58f70',
'info_dict': {
@@ -37,7 +38,13 @@ class PacktPubIE(PacktPubBaseIE):
'timestamp': 1490918400,
'upload_date': '20170331',
},
- }
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215/20528/20530/project-intro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/programming/9781838988906/p1/video1_1/business-card-project',
+ 'only_matching': True,
+ }]
_NETRC_MACHINE = 'packtpub'
_TOKEN = None
@@ -47,9 +54,9 @@ class PacktPubIE(PacktPubBaseIE):
return
try:
self._TOKEN = self._download_json(
- self._MAPT_REST + '/users/tokens', None,
+ 'https://services.packtpub.com/auth-v1/users/tokens', None,
'Downloading Authorization Token', data=json.dumps({
- 'email': username,
+ 'username': username,
'password': password,
}).encode())['data']['access']
except ExtractorError as e:
@@ -58,67 +65,57 @@ class PacktPubIE(PacktPubBaseIE):
raise ExtractorError(message, expected=True)
raise
- def _handle_error(self, response):
- if response.get('status') != 'success':
- raise ExtractorError(
- '% said: %s' % (self.IE_NAME, response['message']),
- expected=True)
-
- def _download_json(self, *args, **kwargs):
- response = super(PacktPubIE, self)._download_json(*args, **kwargs)
- self._handle_error(response)
- return response
-
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- course_id, chapter_id, video_id = mobj.group(
- 'course_id', 'chapter_id', 'id')
+ course_id, chapter_id, video_id, display_id = re.match(self._VALID_URL, url).groups()
headers = {}
if self._TOKEN:
headers['Authorization'] = 'Bearer ' + self._TOKEN
- video = self._download_json(
- '%s/users/me/products/%s/chapters/%s/sections/%s'
- % (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
- 'Downloading JSON video', headers=headers)['data']
-
- content = video.get('content')
- if not content:
- self.raise_login_required('This video is locked')
-
- video_url = content['file']
+ try:
+ video_url = self._download_json(
+ 'https://services.packtpub.com/products-v1/products/%s/%s/%s' % (course_id, chapter_id, video_id), video_id,
+ 'Downloading JSON video', headers=headers)['data']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ self.raise_login_required('This video is locked')
+ raise
- metadata = self._download_json(
- '%s/products/%s/chapters/%s/sections/%s/metadata'
- % (self._MAPT_REST, course_id, chapter_id, video_id),
- video_id)['data']
+ # TODO: find a better way to avoid duplicating course requests
+ # metadata = self._download_json(
+ # '%s/products/%s/chapters/%s/sections/%s/metadata'
+ # % (self._MAPT_REST, course_id, chapter_id, video_id),
+ # video_id)['data']
- title = metadata['pageTitle']
- course_title = metadata.get('title')
- if course_title:
- title = remove_end(title, ' - %s' % course_title)
- timestamp = unified_timestamp(metadata.get('publicationDate'))
- thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
+ # title = metadata['pageTitle']
+ # course_title = metadata.get('title')
+ # if course_title:
+ # title = remove_end(title, ' - %s' % course_title)
+ # timestamp = unified_timestamp(metadata.get('publicationDate'))
+ # thumbnail = urljoin(self._PACKT_BASE, metadata.get('filepath'))
return {
'id': video_id,
'url': video_url,
- 'title': title,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
+ 'title': display_id or video_id, # title,
+ # 'thumbnail': thumbnail,
+ # 'timestamp': timestamp,
}
class PacktPubCourseIE(PacktPubBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?packtpub\.com/mapt/video/[^/]+/(?P<id>\d+))'
- _TEST = {
+ _VALID_URL = r'(?P<url>https?://(?:(?:www\.)?packtpub\.com/mapt|subscription\.packtpub\.com)/video/[^/]+/(?P<id>\d+))'
+ _TESTS = [{
'url': 'https://www.packtpub.com/mapt/video/web-development/9781787122215',
'info_dict': {
'id': '9781787122215',
'title': 'Learn Nodejs by building 12 projects [Video]',
+ 'description': 'md5:489da8d953f416e51927b60a1c7db0aa',
},
'playlist_count': 90,
- }
+ }, {
+ 'url': 'https://subscription.packtpub.com/video/web_development/9781787122215',
+ 'only_matching': True,
+ }]
@classmethod
def suitable(cls, url):
@@ -130,35 +127,38 @@ class PacktPubCourseIE(PacktPubBaseIE):
url, course_id = mobj.group('url', 'id')
course = self._download_json(
- '%s/products/%s/metadata' % (self._MAPT_REST, course_id),
- course_id)['data']
+ self._STATIC_PRODUCTS_BASE + '%s/toc' % course_id, course_id)
+ metadata = self._download_json(
+ self._STATIC_PRODUCTS_BASE + '%s/summary' % course_id,
+ course_id, fatal=False) or {}
entries = []
- for chapter_num, chapter in enumerate(course['tableOfContents'], 1):
- if chapter.get('type') != 'chapter':
- continue
- children = chapter.get('children')
- if not isinstance(children, list):
+ for chapter_num, chapter in enumerate(course['chapters'], 1):
+ chapter_id = str_or_none(chapter.get('id'))
+ sections = chapter.get('sections')
+ if not chapter_id or not isinstance(sections, list):
continue
chapter_info = {
'chapter': chapter.get('title'),
'chapter_number': chapter_num,
- 'chapter_id': chapter.get('id'),
+ 'chapter_id': chapter_id,
}
- for section in children:
- if section.get('type') != 'section':
- continue
- section_url = section.get('seoUrl')
- if not isinstance(section_url, compat_str):
+ for section in sections:
+ section_id = str_or_none(section.get('id'))
+ if not section_id or section.get('contentType') != 'video':
continue
entry = {
'_type': 'url_transparent',
- 'url': urljoin(url + '/', section_url),
+ 'url': '/'.join([url, chapter_id, section_id]),
'title': strip_or_none(section.get('title')),
'description': clean_html(section.get('summary')),
+ 'thumbnail': metadata.get('coverImage'),
+ 'timestamp': unified_timestamp(metadata.get('publicationDate')),
'ie_key': PacktPubIE.ie_key(),
}
entry.update(chapter_info)
entries.append(entry)
- return self.playlist_result(entries, course_id, course.get('title'))
+ return self.playlist_result(
+ entries, course_id, metadata.get('title'),
+ clean_html(metadata.get('about')))
diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py
deleted file mode 100644
index 13a2e7efc..000000000
--- a/youtube_dl/extractor/pandatv.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- qualities,
-)
-
-
-class PandaTVIE(InfoExtractor):
- IE_DESC = '熊猫TV'
- _VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.panda.tv/66666',
- 'info_dict': {
- 'id': '66666',
- 'title': 're:.+',
- 'uploader': '刘杀鸡',
- 'ext': 'flv',
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Live stream is offline',
- }, {
- 'url': 'https://www.panda.tv/66666',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- config = self._download_json(
- 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
-
- error_code = config.get('errno', 0)
- if error_code is not 0:
- raise ExtractorError(
- '%s returned error %s: %s'
- % (self.IE_NAME, error_code, config['errmsg']),
- expected=True)
-
- data = config['data']
- video_info = data['videoinfo']
-
- # 2 = live, 3 = offline
- if video_info.get('status') != '2':
- raise ExtractorError(
- 'Live stream is offline', expected=True)
-
- title = data['roominfo']['name']
- uploader = data.get('hostinfo', {}).get('name')
- room_key = video_info['room_key']
- stream_addr = video_info.get(
- 'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
-
- # Reverse engineered from web player swf
- # (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
- # writing).
- plflag0, plflag1 = video_info['plflag'].split('_')
- plflag0 = int(plflag0) - 1
- if plflag1 == '21':
- plflag0 = 10
- plflag1 = '4'
- live_panda = 'live_panda' if plflag0 < 1 else ''
-
- plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
- sign = plflag_auth['auth']['sign']
- ts = plflag_auth['auth']['time']
- rid = plflag_auth['auth']['rid']
-
- quality_key = qualities(['OD', 'HD', 'SD'])
- suffix = ['_small', '_mid', '']
- formats = []
- for k, v in stream_addr.items():
- if v != '1':
- continue
- quality = quality_key(k)
- if quality <= 0:
- continue
- for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
- formats.append({
- 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
- % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
- 'format_id': '%s-%s' % (k, ext),
- 'quality': quality,
- 'source_preference': pref,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._live_title(title),
- 'uploader': uploader,
- 'formats': formats,
- 'is_live': True,
- }
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py
index 9eb027679..761a4b1de 100644
--- a/youtube_dl/extractor/patreon.py
+++ b/youtube_dl/extractor/patreon.py
@@ -2,52 +2,68 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+)
class PatreonIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
- _TESTS = [
- {
- 'url': 'http://www.patreon.com/creation?hid=743933',
- 'md5': 'e25505eec1053a6e6813b8ed369875cc',
- 'info_dict': {
- 'id': '743933',
- 'ext': 'mp3',
- 'title': 'Episode 166: David Smalley of Dogma Debate',
- 'uploader': 'Cognitive Dissonance Podcast',
- 'thumbnail': 're:^https?://.*$',
- },
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.patreon.com/creation?hid=743933',
+ 'md5': 'e25505eec1053a6e6813b8ed369875cc',
+ 'info_dict': {
+ 'id': '743933',
+ 'ext': 'mp3',
+ 'title': 'Episode 166: David Smalley of Dogma Debate',
+ 'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ 'timestamp': 1406473987,
+ 'upload_date': '20140727',
+ 'uploader_id': '87145',
+ },
+ }, {
+ 'url': 'http://www.patreon.com/creation?hid=754133',
+ 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+ 'info_dict': {
+ 'id': '754133',
+ 'ext': 'mp3',
+ 'title': 'CD 167 Extra',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
},
- {
- 'url': 'http://www.patreon.com/creation?hid=754133',
- 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
- 'info_dict': {
- 'id': '754133',
- 'ext': 'mp3',
- 'title': 'CD 167 Extra',
- 'uploader': 'Cognitive Dissonance Podcast',
- 'thumbnail': 're:^https?://.*$',
- },
+ 'skip': 'Patron-only content',
+ }, {
+ 'url': 'https://www.patreon.com/creation?hid=1682498',
+ 'info_dict': {
+ 'id': 'SU4fj_aEMVw',
+ 'ext': 'mp4',
+ 'title': 'I\'m on Patreon!',
+ 'uploader': 'TraciJHines',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20150211',
+ 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+ 'uploader_id': 'TraciJHines',
},
- {
- 'url': 'https://www.patreon.com/creation?hid=1682498',
- 'info_dict': {
- 'id': 'SU4fj_aEMVw',
- 'ext': 'mp4',
- 'title': 'I\'m on Patreon!',
- 'uploader': 'TraciJHines',
- 'thumbnail': 're:^https?://.*$',
- 'upload_date': '20150211',
- 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
- 'uploader_id': 'TraciJHines',
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- }
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
}
- ]
+ }, {
+ 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.patreon.com/posts/743933',
+ 'only_matching': True,
+ }]
# Currently Patreon exposes download URL via hidden CSS, so login is not
# needed. Keeping this commented for when this inevitably changes.
@@ -78,38 +94,63 @@ class PatreonIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage).strip()
-
- attach_fn = self._html_search_regex(
- r'<div class="attach"><a target="_blank" href="([^"]+)">',
- webpage, 'attachment URL', default=None)
- embed = self._html_search_regex(
- r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
- webpage, 'embedded URL', default=None)
-
- if attach_fn is not None:
- video_url = 'http://www.patreon.com' + attach_fn
- thumbnail = self._og_search_thumbnail(webpage)
- uploader = self._html_search_regex(
- r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
- elif embed is not None:
- return self.url_result(embed)
- else:
- playlist = self._parse_json(self._search_regex(
- r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
- webpage, 'playlist JSON'),
- video_id, transform_source=js_to_json)
- data = playlist[0]
- video_url = self._proto_relative_url(data['mp3'])
- thumbnail = self._proto_relative_url(data.get('cover'))
- uploader = data.get('artist')
-
- return {
+ post = self._download_json(
+ 'https://www.patreon.com/api/posts/' + video_id, video_id, query={
+ 'fields[media]': 'download_url,mimetype,size_bytes',
+ 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
+ 'fields[user]': 'full_name,url',
+ 'json-api-use-default-includes': 'false',
+ 'include': 'media,user',
+ })
+ attributes = post['data']['attributes']
+ title = attributes['title'].strip()
+ image = attributes.get('image') or {}
+ info = {
'id': video_id,
- 'url': video_url,
- 'ext': 'mp3',
'title': title,
- 'uploader': uploader,
- 'thumbnail': thumbnail,
+ 'description': clean_html(attributes.get('content')),
+ 'thumbnail': image.get('large_url') or image.get('url'),
+ 'timestamp': parse_iso8601(attributes.get('published_at')),
+ 'like_count': int_or_none(attributes.get('like_count')),
+ 'comment_count': int_or_none(attributes.get('comment_count')),
}
+
+ for i in post.get('included', []):
+ i_type = i.get('type')
+ if i_type == 'media':
+ media_attributes = i.get('attributes') or {}
+ download_url = media_attributes.get('download_url')
+ ext = mimetype2ext(media_attributes.get('mimetype'))
+ if download_url and ext in KNOWN_EXTENSIONS:
+ info.update({
+ 'ext': ext,
+ 'filesize': int_or_none(media_attributes.get('size_bytes')),
+ 'url': download_url,
+ })
+ elif i_type == 'user':
+ user_attributes = i.get('attributes')
+ if user_attributes:
+ info.update({
+ 'uploader': user_attributes.get('full_name'),
+ 'uploader_id': str_or_none(i.get('id')),
+ 'uploader_url': user_attributes.get('url'),
+ })
+
+ if not info.get('url'):
+ embed_url = try_get(attributes, lambda x: x['embed']['url'])
+ if embed_url:
+ info.update({
+ '_type': 'url',
+ 'url': embed_url,
+ })
+
+ if not info.get('url'):
+ post_file = attributes['post_file']
+ ext = determine_ext(post_file.get('name'))
+ if ext in KNOWN_EXTENSIONS:
+ info.update({
+ 'ext': ext,
+ 'url': post_file['url'],
+ })
+
+ return info
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 80340f595..4dbe661be 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -305,7 +305,7 @@ class PBSIE(InfoExtractor):
{
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
- # https://github.com/rg3/youtube-dl/issues/7059)
+ # https://github.com/ytdl-org/youtube-dl/issues/7059)
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
'info_dict': {
@@ -348,7 +348,7 @@ class PBSIE(InfoExtractor):
},
},
{
- # https://github.com/rg3/youtube-dl/issues/13801
+ # https://github.com/ytdl-org/youtube-dl/issues/13801
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
'info_dict': {
'id': '3003333873',
@@ -642,7 +642,7 @@ class PBSIE(InfoExtractor):
# we won't try extracting them.
# Since summer 2016 higher quality formats (4500k and 6500k) are also available
# albeit they are not documented in [2].
- # 1. https://github.com/rg3/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
+ # 1. https://github.com/ytdl-org/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
# 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
if not bitrate or int(bitrate) < 400:
continue
diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py
index e03c3d1d3..48fb95416 100644
--- a/youtube_dl/extractor/peertube.py
+++ b/youtube_dl/extractor/peertube.py
@@ -8,6 +8,7 @@ from ..compat import compat_str
from ..utils import (
int_or_none,
parse_resolution,
+ str_or_none,
try_get,
unified_timestamp,
url_or_none,
@@ -18,81 +19,385 @@ from ..utils import (
class PeerTubeIE(InfoExtractor):
_INSTANCES_RE = r'''(?:
# Taken from https://instances.joinpeertube.org/instances
+ peertube\.rainbowswingers\.net|
+ tube\.stanisic\.nl|
+ peer\.suiri\.us|
+ medias\.libox\.fr|
+ videomensoif\.ynh\.fr|
+ peertube\.travelpandas\.eu|
+ peertube\.rachetjay\.fr|
+ peertube\.montecsys\.fr|
+ tube\.eskuero\.me|
+ peer\.tube|
+ peertube\.umeahackerspace\.se|
+ tube\.nx-pod\.de|
+ video\.monsieurbidouille\.fr|
tube\.openalgeria\.org|
- peertube\.pointsecu\.fr|
+ vid\.lelux\.fi|
+ video\.anormallostpod\.ovh|
+ tube\.crapaud-fou\.org|
+ peertube\.stemy\.me|
+ lostpod\.space|
+ exode\.me|
+ peertube\.snargol\.com|
+ vis\.ion\.ovh|
+ videosdulib\.re|
+ v\.mbius\.io|
+ videos\.judrey\.eu|
+ peertube\.osureplayviewer\.xyz|
+ peertube\.mathieufamily\.ovh|
+ www\.videos-libr\.es|
+ fightforinfo\.com|
+ peertube\.fediverse\.ru|
+ peertube\.oiseauroch\.fr|
+ video\.nesven\.eu|
+ v\.bearvideo\.win|
+ video\.qoto\.org|
+ justporn\.cc|
+ video\.vny\.fr|
+ peervideo\.club|
+ tube\.taker\.fr|
+ peertube\.chantierlibre\.org|
+ tube\.ipfixe\.info|
+ tube\.kicou\.info|
+ tube\.dodsorf\.as|
+ videobit\.cc|
+ video\.yukari\.moe|
+ videos\.elbinario\.net|
+ hkvideo\.live|
+ pt\.tux\.tf|
+ www\.hkvideo\.live|
+ FIGHTFORINFO\.com|
+ pt\.765racing\.com|
+ peertube\.gnumeria\.eu\.org|
+ nordenmedia\.com|
+ peertube\.co\.uk|
+ tube\.darfweb\.eu|
+ tube\.kalah-france\.org|
+ 0ch\.in|
+ vod\.mochi\.academy|
+ film\.node9\.org|
+ peertube\.hatthieves\.es|
+ video\.fitchfamily\.org|
+ peertube\.ddns\.net|
+ video\.ifuncle\.kr|
+ video\.fdlibre\.eu|
+ tube\.22decembre\.eu|
+ peertube\.harmoniescreatives\.com|
+ tube\.fabrigli\.fr|
+ video\.thedwyers\.co|
+ video\.bruitbruit\.com|
+ peertube\.foxfam\.club|
+ peer\.philoxweb\.be|
+ videos\.bugs\.social|
+ peertube\.malbert\.xyz|
+ peertube\.bilange\.ca|
+ libretube\.net|
+ diytelevision\.com|
+ peertube\.fedilab\.app|
+ libre\.video|
+ video\.mstddntfdn\.online|
+ us\.tv|
+ peertube\.sl-network\.fr|
+ peertube\.dynlinux\.io|
+ peertube\.david\.durieux\.family|
+ peertube\.linuxrocks\.online|
+ peerwatch\.xyz|
+ v\.kretschmann\.social|
+ tube\.otter\.sh|
+ yt\.is\.nota\.live|
+ tube\.dragonpsi\.xyz|
+ peertube\.boneheadmedia\.com|
+ videos\.funkwhale\.audio|
+ watch\.44con\.com|
+ peertube\.gcaillaut\.fr|
+ peertube\.icu|
+ pony\.tube|
+ spacepub\.space|
+ tube\.stbr\.io|
+ v\.mom-gay\.faith|
+ tube\.port0\.xyz|
+ peertube\.simounet\.net|
+ play\.jergefelt\.se|
+ peertube\.zeteo\.me|
+ tube\.danq\.me|
+ peertube\.kerenon\.com|
+ tube\.fab-l3\.org|
+ tube\.calculate\.social|
+ peertube\.mckillop\.org|
+ tube\.netzspielplatz\.de|
+ vod\.ksite\.de|
+ peertube\.laas\.fr|
+ tube\.govital\.net|
+ peertube\.stephenson\.cc|
+ bistule\.nohost\.me|
+ peertube\.kajalinifi\.de|
+ video\.ploud\.jp|
+ video\.omniatv\.com|
+ peertube\.ffs2play\.fr|
+ peertube\.leboulaire\.ovh|
+ peertube\.tronic-studio\.com|
+ peertube\.public\.cat|
+ peertube\.metalbanana\.net|
+ video\.1000i100\.fr|
+ peertube\.alter-nativ-voll\.de|
+ tube\.pasa\.tf|
+ tube\.worldofhauru\.xyz|
+ pt\.kamp\.site|
+ peertube\.teleassist\.fr|
+ videos\.mleduc\.xyz|
+ conf\.tube|
+ media\.privacyinternational\.org|
+ pt\.forty-two\.nl|
+ video\.halle-leaks\.de|
+ video\.grosskopfgames\.de|
+ peertube\.schaeferit\.de|
+ peertube\.jackbot\.fr|
+ tube\.extinctionrebellion\.fr|
+ peertube\.f-si\.org|
+ video\.subak\.ovh|
+ videos\.koweb\.fr|
+ peertube\.zergy\.net|
+ peertube\.roflcopter\.fr|
+ peertube\.floss-marketing-school\.com|
+ vloggers\.social|
+ peertube\.iriseden\.eu|
+ videos\.ubuntu-paris\.org|
+ peertube\.mastodon\.host|
+ armstube\.com|
+ peertube\.s2s\.video|
+ peertube\.lol|
+ tube\.open-plug\.eu|
+ open\.tube|
+ peertube\.ch|
+ peertube\.normandie-libre\.fr|
+ peertube\.slat\.org|
+ video\.lacaveatonton\.ovh|
+ peertube\.uno|
+ peertube\.servebeer\.com|
+ peertube\.fedi\.quebec|
+ tube\.h3z\.jp|
+ tube\.plus200\.com|
+ peertube\.eric\.ovh|
+ tube\.metadocs\.cc|
+ tube\.unmondemeilleur\.eu|
+ gouttedeau\.space|
+ video\.antirep\.net|
+ nrop\.cant\.at|
+ tube\.ksl-bmx\.de|
+ tube\.plaf\.fr|
+ tube\.tchncs\.de|
+ video\.devinberg\.com|
+ hitchtube\.fr|
+ peertube\.kosebamse\.com|
+ yunopeertube\.myddns\.me|
+ peertube\.varney\.fr|
+ peertube\.anon-kenkai\.com|
+ tube\.maiti\.info|
+ tubee\.fr|
+ videos\.dinofly\.com|
+ toobnix\.org|
+ videotape\.me|
+ voca\.tube|
+ video\.heromuster\.com|
+ video\.lemediatv\.fr|
+ video\.up\.edu\.ph|
+ balafon\.video|
+ video\.ivel\.fr|
+ thickrips\.cloud|
+ pt\.laurentkruger\.fr|
+ video\.monarch-pass\.net|
+ peertube\.artica\.center|
+ video\.alternanet\.fr|
+ indymotion\.fr|
+ fanvid\.stopthatimp\.net|
+ video\.farci\.org|
+ v\.lesterpig\.com|
+ video\.okaris\.de|
+ tube\.pawelko\.net|
+ peertube\.mablr\.org|
+ tube\.fede\.re|
+ pytu\.be|
+ evertron\.tv|
+ devtube\.dev-wiki\.de|
+ raptube\.antipub\.org|
+ video\.selea\.se|
+ peertube\.mygaia\.org|
+ video\.oh14\.de|
+ peertube\.livingutopia\.org|
+ peertube\.the-penguin\.de|
+ tube\.thechangebook\.org|
+ tube\.anjara\.eu|
+ pt\.pube\.tk|
+ video\.samedi\.pm|
+ mplayer\.demouliere\.eu|
+ widemus\.de|
+ peertube\.me|
+ peertube\.zapashcanon\.fr|
+ video\.latavernedejohnjohn\.fr|
+ peertube\.pcservice46\.fr|
+ peertube\.mazzonetto\.eu|
+ video\.irem\.univ-paris-diderot\.fr|
+ video\.livecchi\.cloud|
+ alttube\.fr|
+ video\.coop\.tools|
+ video\.cabane-libre\.org|
+ peertube\.openstreetmap\.fr|
+ videos\.alolise\.org|
+ irrsinn\.video|
+ video\.antopie\.org|
+ scitech\.video|
+ tube2\.nemsia\.org|
+ video\.amic37\.fr|
+ peertube\.freeforge\.eu|
+ video\.arbitrarion\.com|
+ video\.datsemultimedia\.com|
+ stoptrackingus\.tv|
+ peertube\.ricostrongxxx\.com|
+ docker\.videos\.lecygnenoir\.info|
+ peertube\.togart\.de|
+ tube\.postblue\.info|
+ videos\.domainepublic\.net|
+ peertube\.cyber-tribal\.com|
+ video\.gresille\.org|
+ peertube\.dsmouse\.net|
+ cinema\.yunohost\.support|
+ tube\.theocevaer\.fr|
+ repro\.video|
+ tube\.4aem\.com|
+ quaziinc\.com|
+ peertube\.metawurst\.space|
+ videos\.wakapo\.com|
+ video\.ploud\.fr|
+ video\.freeradical\.zone|
+ tube\.valinor\.fr|
+ refuznik\.video|
+ pt\.kircheneuenburg\.de|
+ peertube\.asrun\.eu|
+ peertube\.lagob\.fr|
+ videos\.side-ways\.net|
+ 91video\.online|
+ video\.valme\.io|
+ video\.taboulisme\.com|
+ videos-libr\.es|
+ tv\.mooh\.fr|
+ nuage\.acostey\.fr|
+ video\.monsieur-a\.fr|
+ peertube\.librelois\.fr|
+ videos\.pair2jeux\.tube|
+ videos\.pueseso\.club|
+ peer\.mathdacloud\.ovh|
+ media\.assassinate-you\.net|
+ vidcommons\.org|
+ ptube\.rousset\.nom\.fr|
+ tube\.cyano\.at|
+ videos\.squat\.net|
+ video\.iphodase\.fr|
+ peertube\.makotoworkshop\.org|
+ peertube\.serveur\.slv-valbonne\.fr|
+ vault\.mle\.party|
+ hostyour\.tv|
+ videos\.hack2g2\.fr|
+ libre\.tube|
+ pire\.artisanlogiciel\.net|
+ videos\.numerique-en-commun\.fr|
+ video\.netsyms\.com|
+ video\.die-partei\.social|
+ video\.writeas\.org|
+ peertube\.swarm\.solvingmaz\.es|
+ tube\.pericoloso\.ovh|
+ watching\.cypherpunk\.observer|
+ videos\.adhocmusic\.com|
+ tube\.rfc1149\.net|
+ peertube\.librelabucm\.org|
+ videos\.numericoop\.fr|
+ peertube\.koehn\.com|
+ peertube\.anarchmusicall\.net|
+ tube\.kampftoast\.de|
+ vid\.y-y\.li|
+ peertube\.xtenz\.xyz|
+ diode\.zone|
+ tube\.egf\.mn|
+ peertube\.nomagic\.uk|
+ visionon\.tv|
+ videos\.koumoul\.com|
+ video\.rastapuls\.com|
+ video\.mantlepro\.com|
+ video\.deadsuperhero\.com|
+ peertube\.musicstudio\.pro|
+ peertube\.we-keys\.fr|
+ artitube\.artifaille\.fr|
+ peertube\.ethernia\.net|
+ tube\.midov\.pl|
+ peertube\.fr|
+ watch\.snoot\.tube|
+ peertube\.donnadieu\.fr|
+ argos\.aquilenet\.fr|
+ tube\.nemsia\.org|
+ tube\.bruniau\.net|
+ videos\.darckoune\.moe|
+ tube\.traydent\.info|
+ dev\.videos\.lecygnenoir\.info|
+ peertube\.nayya\.org|
+ peertube\.live|
+ peertube\.mofgao\.space|
+ video\.lequerrec\.eu|
+ peertube\.amicale\.net|
+ aperi\.tube|
+ tube\.ac-lyon\.fr|
+ video\.lw1\.at|
+ www\.yiny\.org|
+ videos\.pofilo\.fr|
+ tube\.lou\.lt|
+ choob\.h\.etbus\.ch|
+ tube\.hoga\.fr|
+ peertube\.heberge\.fr|
+ video\.obermui\.de|
+ videos\.cloudfrancois\.fr|
+ betamax\.video|
+ video\.typica\.us|
+ tube\.piweb\.be|
+ video\.blender\.org|
+ peertube\.cat|
+ tube\.kdy\.ch|
+ pe\.ertu\.be|
+ peertube\.social|
+ videos\.lescommuns\.org|
+ tv\.datamol\.org|
+ videonaute\.fr|
+ dialup\.express|
peertube\.nogafa\.org|
- peertube\.pl|
megatube\.lilomoino\.fr|
peertube\.tamanoir\.foucry\.net|
- peertube\.inapurna\.org|
- peertube\.netzspielplatz\.de|
- video\.deadsuperhero\.com|
peertube\.devosi\.org|
peertube\.1312\.media|
- tube\.worldofhauru\.xyz|
tube\.bootlicker\.party|
skeptikon\.fr|
- peertube\.geekshell\.fr|
- tube\.opportunis\.me|
- peertube\.peshane\.net|
video\.blueline\.mg|
tube\.homecomputing\.fr|
- videos\.cloudfrancois\.fr|
- peertube\.viviers-fibre\.net|
tube\.ouahpiti\.info|
video\.tedomum\.net|
video\.g3l\.org|
fontube\.fr|
peertube\.gaialabs\.ch|
- peertube\.extremely\.online|
- peertube\.public-infrastructure\.eu|
tube\.kher\.nl|
peertube\.qtg\.fr|
- tube\.22decembre\.eu|
- facegirl\.me|
video\.migennes\.net|
- janny\.moe|
tube\.p2p\.legal|
- video\.atlanti\.se|
troll\.tv|
- peertube\.geekael\.fr|
- vid\.leotindall\.com|
- video\.anormallostpod\.ovh|
- p-tube\.h3z\.jp|
- tube\.darfweb\.eu|
videos\.iut-orsay\.fr|
peertube\.solidev\.net|
- videos\.symphonie-of-code\.fr|
- testtube\.ortg\.de|
videos\.cemea\.org|
- peertube\.gwendalavir\.eu|
video\.passageenseine\.fr|
videos\.festivalparminous\.org|
peertube\.touhoppai\.moe|
- peertube\.duckdns\.org|
sikke\.fi|
- peertube\.mastodon\.host|
- firedragonvideos\.com|
- vidz\.dou\.bet|
- peertube\.koehn\.com|
peer\.hostux\.social|
share\.tube|
peertube\.walkingmountains\.fr|
- medias\.libox\.fr|
- peertube\.moe|
- peertube\.xyz|
- jp\.peertube\.network|
videos\.benpro\.fr|
- tube\.otter\.sh|
- peertube\.angristan\.xyz|
peertube\.parleur\.net|
- peer\.ecutsa\.fr|
peertube\.heraut\.eu|
- peertube\.tifox\.fr|
- peertube\.maly\.io|
- vod\.mochi\.academy|
- exode\.me|
- coste\.video|
tube\.aquilenet\.fr|
peertube\.gegeweb\.eu|
framatube\.org|
@@ -100,24 +405,18 @@ class PeerTubeIE(InfoExtractor):
tube\.conferences-gesticulees\.net|
peertube\.datagueule\.tv|
video\.lqdn\.fr|
- meilleurtube\.delire\.party|
tube\.mochi\.academy|
- peertube\.dav\.li|
media\.zat\.im|
- pytu\.be|
- peertube\.valvin\.fr|
- peertube\.nsa\.ovh|
video\.colibris-outilslibres\.org|
- video\.hispagatos\.org|
tube\.svnet\.fr|
peertube\.video|
- videos\.lecygnenoir\.info|
peertube3\.cpy\.re|
peertube2\.cpy\.re|
videos\.tcit\.fr|
peertube\.cpy\.re
)'''
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _API_BASE = 'https://%s/api/v1/videos/%s/%s'
_VALID_URL = r'''(?x)
(?:
peertube:(?P<host>[^:]+):|
@@ -126,26 +425,30 @@ class PeerTubeIE(InfoExtractor):
(?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE)
_TESTS = [{
- 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
- 'md5': '80f24ff364cc9d333529506a263e7feb',
+ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'md5': '9bed8c0137913e17b86334e5885aacff',
'info_dict': {
- 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+ 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
'ext': 'mp4',
- 'title': 'wow',
- 'description': 'wow such video, so gif',
+ 'title': 'What is PeerTube?',
+ 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
'thumbnail': r're:https?://.*\.(?:jpg|png)',
- 'timestamp': 1519297480,
- 'upload_date': '20180222',
- 'uploader': 'Luclu7',
- 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
- 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
- 'license': 'Unknown',
- 'duration': 3,
+ 'timestamp': 1538391166,
+ 'upload_date': '20181001',
+ 'uploader': 'Framasoft',
+ 'uploader_id': '3',
+ 'uploader_url': 'https://framatube.org/accounts/framasoft',
+ 'channel': 'Les vidéos de Framasoft',
+ 'channel_id': '2',
+ 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'language': 'en',
+ 'license': 'Attribution - Share Alike',
+ 'duration': 113,
'view_count': int,
'like_count': int,
'dislike_count': int,
- 'tags': list,
- 'categories': list,
+ 'tags': ['framasoft', 'peertube'],
+ 'categories': ['Science & Technology'],
}
}, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
@@ -168,7 +471,7 @@ class PeerTubeIE(InfoExtractor):
@staticmethod
def _extract_peertube_url(webpage, source_url):
mobj = re.match(
- r'https?://(?P<host>[^/]+)/videos/watch/(?P<id>%s)'
+ r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
% PeerTubeIE._UUID_RE, source_url)
if mobj and any(p in webpage for p in (
'<title>PeerTube<',
@@ -187,13 +490,38 @@ class PeerTubeIE(InfoExtractor):
entries = [peertube_url]
return entries
+ def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+ return self._download_json(
+ self._API_BASE % (host, video_id, path), video_id,
+ note=note, errnote=errnote, fatal=fatal)
+
+ def _get_subtitles(self, host, video_id):
+ captions = self._call_api(
+ host, video_id, 'captions', note='Downloading captions JSON',
+ fatal=False)
+ if not isinstance(captions, dict):
+ return
+ data = captions.get('data')
+ if not isinstance(data, list):
+ return
+ subtitles = {}
+ for e in data:
+ language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+ caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+ if not caption_url:
+ continue
+ subtitles.setdefault(language_id or 'en', []).append({
+ 'url': caption_url,
+ })
+ return subtitles
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') or mobj.group('host_2')
video_id = mobj.group('id')
- video = self._download_json(
- 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
+ video = self._call_api(
+ host, video_id, '', note='Downloading video JSON')
title = video['name']
@@ -216,10 +544,28 @@ class PeerTubeIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
- def account_data(field):
- return try_get(video, lambda x: x['account'][field], compat_str)
+ full_description = self._call_api(
+ host, video_id, 'description', note='Downloading description JSON',
+ fatal=False)
+
+ description = None
+ if isinstance(full_description, dict):
+ description = str_or_none(full_description.get('description'))
+ if not description:
+ description = video.get('description')
+
+ subtitles = self.extract_subtitles(host, video_id)
+
+ def data(section, field, type_):
+ return try_get(video, lambda x: x[section][field], type_)
+
+ def account_data(field, type_):
+ return data('account', field, type_)
+
+ def channel_data(field, type_):
+ return data('channel', field, type_)
- category = try_get(video, lambda x: x['category']['label'], compat_str)
+ category = data('category', 'label', compat_str)
categories = [category] if category else None
nsfw = video.get('nsfw')
@@ -231,14 +577,17 @@ class PeerTubeIE(InfoExtractor):
return {
'id': video_id,
'title': title,
- 'description': video.get('description'),
+ 'description': description,
'thumbnail': urljoin(url, video.get('thumbnailPath')),
'timestamp': unified_timestamp(video.get('publishedAt')),
- 'uploader': account_data('displayName'),
- 'uploader_id': account_data('uuid'),
- 'uploder_url': account_data('url'),
- 'license': try_get(
- video, lambda x: x['licence']['label'], compat_str),
+ 'uploader': account_data('displayName', compat_str),
+ 'uploader_id': str_or_none(account_data('id', int)),
+ 'uploader_url': url_or_none(account_data('url', compat_str)),
+ 'channel': channel_data('displayName', compat_str),
+ 'channel_id': str_or_none(channel_data('id', int)),
+ 'channel_url': url_or_none(channel_data('url', compat_str)),
+ 'language': data('language', 'id', compat_str),
+ 'license': data('licence', 'label', compat_str),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
'like_count': int_or_none(video.get('likes')),
@@ -247,4 +596,5 @@ class PeerTubeIE(InfoExtractor):
'tags': try_get(video, lambda x: x['tags'], list),
'categories': categories,
'formats': formats,
+ 'subtitles': subtitles
}
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
index 8afe541ec..b15906390 100644
--- a/youtube_dl/extractor/periscope.py
+++ b/youtube_dl/extractor/periscope.py
@@ -5,6 +5,7 @@ import re
from .common import InfoExtractor
from ..utils import (
+ int_or_none,
parse_iso8601,
unescapeHTML,
)
@@ -16,12 +17,54 @@ class PeriscopeBaseIE(InfoExtractor):
'https://api.periscope.tv/api/v2/%s' % method,
item_id, query=query)
+ def _parse_broadcast_data(self, broadcast, video_id):
+ title = broadcast.get('status') or 'Periscope Broadcast'
+ uploader = broadcast.get('user_display_name') or broadcast.get('username')
+ title = '%s - %s' % (uploader, title) if uploader else title
+ is_live = broadcast.get('state').lower() == 'running'
+
+ thumbnails = [{
+ 'url': broadcast[image],
+ } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
+
+ return {
+ 'id': broadcast.get('id') or video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'timestamp': parse_iso8601(broadcast.get('created_at')),
+ 'uploader': uploader,
+ 'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
+ 'thumbnails': thumbnails,
+ 'view_count': int_or_none(broadcast.get('total_watched')),
+ 'tags': broadcast.get('tags'),
+ 'is_live': is_live,
+ }
+
+ @staticmethod
+ def _extract_common_format_info(broadcast):
+ return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))
+
+ @staticmethod
+ def _add_width_and_height(f, width, height):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native'
+ if state in ('ended', 'timed_out') else 'm3u8',
+ m3u8_id=format_id, fatal=fatal)
+ if len(m3u8_formats) == 1:
+ self._add_width_and_height(m3u8_formats[0], width, height)
+ return m3u8_formats
+
class PeriscopeIE(PeriscopeBaseIE):
IE_DESC = 'Periscope'
IE_NAME = 'periscope'
_VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
- # Alive example URLs can be found here http://onperiscope.com/
+ # Alive example URLs can be found here https://www.periscope.tv/
_TESTS = [{
'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
'md5': '65b57957972e503fcbbaeed8f4fa04ca',
@@ -60,20 +103,16 @@ class PeriscopeIE(PeriscopeBaseIE):
'accessVideoPublic', {'broadcast_id': token}, token)
broadcast = stream['broadcast']
- title = broadcast['status']
+ info = self._parse_broadcast_data(broadcast, token)
- uploader = broadcast.get('user_display_name') or broadcast.get('username')
- uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
-
- title = '%s - %s' % (uploader, title) if uploader else title
state = broadcast.get('state').lower()
- if state == 'running':
- title = self._live_title(title)
- timestamp = parse_iso8601(broadcast.get('created_at'))
+ width = int_or_none(broadcast.get('width'))
+ height = int_or_none(broadcast.get('height'))
- thumbnails = [{
- 'url': broadcast[image],
- } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
+ def add_width_and_height(f):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
video_urls = set()
formats = []
@@ -83,27 +122,20 @@ class PeriscopeIE(PeriscopeBaseIE):
continue
video_urls.add(video_url)
if format_id != 'rtmp':
- formats.extend(self._extract_m3u8_formats(
- video_url, token, 'mp4',
- entry_protocol='m3u8_native'
- if state in ('ended', 'timed_out') else 'm3u8',
- m3u8_id=format_id, fatal=False))
+ m3u8_formats = self._extract_pscp_m3u8_formats(
+ video_url, token, format_id, state, width, height, False)
+ formats.extend(m3u8_formats)
continue
- formats.append({
+ rtmp_format = {
'url': video_url,
'ext': 'flv' if format_id == 'rtmp' else 'mp4',
- })
+ }
+ self._add_width_and_height(rtmp_format)
+ formats.append(rtmp_format)
self._sort_formats(formats)
- return {
- 'id': broadcast.get('id') or token,
- 'title': title,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
+ info['formats'] = formats
+ return info
class PeriscopeUserIE(PeriscopeBaseIE):
diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py
index f1008ae51..03da64b11 100644
--- a/youtube_dl/extractor/philharmoniedeparis.py
+++ b/youtube_dl/extractor/philharmoniedeparis.py
@@ -2,77 +2,105 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- float_or_none,
- int_or_none,
- parse_iso8601,
- xpath_text,
+ try_get,
+ urljoin,
)
class PhilharmonieDeParisIE(InfoExtractor):
IE_DESC = 'Philharmonie de Paris'
- _VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)|
+ pad\.philharmoniedeparis\.fr/doc/CIMU/
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
+ 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
+ 'md5': 'a0a4b195f544645073631cbec166a2c2',
+ 'info_dict': {
+ 'id': '1086697',
+ 'ext': 'mp4',
+ 'title': 'Jazz à la Villette : Knower',
+ },
+ }, {
'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
'info_dict': {
'id': '1032066',
- 'ext': 'flv',
- 'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
- 'timestamp': 1428179400,
- 'upload_date': '20150404',
- 'duration': 6592.278,
+ 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
+ 'playlist_mincount': 2,
}, {
'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
'only_matching': True,
}, {
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
'only_matching': True,
+ }, {
+ 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://live.philharmoniedeparis.fr/embed/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR',
+ 'only_matching': True,
}]
+ _LIVE_URL = 'https://live.philharmoniedeparis.fr'
def _real_extract(self, url):
video_id = self._match_id(url)
- concert = self._download_xml(
- 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
- video_id).find('./concert')
-
- formats = []
- info_dict = {
- 'id': video_id,
- 'title': xpath_text(concert, './titre', 'title', fatal=True),
- 'formats': formats,
- }
+ config = self._download_json(
+ '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
+ 'id': video_id,
+ 'lang': 'fr-FR',
+ })
- fichiers = concert.find('./fichiers')
- stream = fichiers.attrib['serveurstream']
- for fichier in fichiers.findall('./fichier'):
- info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
- for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
- format_url = fichier.get('url%s' % suffix)
- if not format_url:
+ def extract_entry(source):
+ if not isinstance(source, dict):
+ return
+ title = source.get('title')
+ if not title:
+ return
+ files = source.get('files')
+ if not isinstance(files, dict):
+ return
+ format_urls = set()
+ formats = []
+ for format_id in ('mobile', 'desktop'):
+ format_url = try_get(
+ files, lambda x: x[format_id]['file'], compat_str)
+ if not format_url or format_url in format_urls:
continue
- formats.append({
- 'url': stream,
- 'play_path': format_url,
- 'ext': 'flv',
- 'format_id': format_id,
- 'width': int_or_none(concert.get('largeur%s' % suffix)),
- 'height': int_or_none(concert.get('hauteur%s' % suffix)),
- 'quality': quality,
- })
- self._sort_formats(formats)
+ format_urls.add(format_url)
+ m3u8_url = urljoin(self._LIVE_URL, format_url)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ if not formats:
+ return
+ self._sort_formats(formats)
+ return {
+ 'title': title,
+ 'formats': formats,
+ }
+
+ thumbnail = urljoin(self._LIVE_URL, config.get('image'))
+
+ info = extract_entry(config)
+ if info:
+ info.update({
+ 'id': video_id,
+ 'thumbnail': thumbnail,
+ })
+ return info
- date, hour = concert.get('date'), concert.get('heure')
- if date and hour:
- info_dict['timestamp'] = parse_iso8601(
- '%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
- elif date:
- info_dict['upload_date'] = date
+ entries = []
+ for num, chapter in enumerate(config['chapters'], start=1):
+ entry = extract_entry(chapter)
+ entry['id'] = '%s-%d' % (video_id, num)
+ entries.append(entry)
- return info_dict
+ return self.playlist_result(entries, video_id, config.get('title'))
diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py
index 2366dfb34..8099ef1d6 100644
--- a/youtube_dl/extractor/picarto.py
+++ b/youtube_dl/extractor/picarto.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
import time
from .common import InfoExtractor
@@ -15,7 +16,7 @@ from ..utils import (
class PicartoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
_TEST = {
'url': 'https://picarto.tv/Setz',
'info_dict': {
@@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
def _real_extract(self, url):
- channel_id = self._match_id(url)
- stream_page = self._download_webpage(url, channel_id)
+ mobj = re.match(self._VALID_URL, url)
+ channel_id = mobj.group('id')
- if '>This channel does not exist' in stream_page:
- raise ExtractorError(
- 'Channel %s does not exist' % channel_id, expected=True)
+ metadata = self._download_json(
+ 'https://api.picarto.tv/v1/channel/name/' + channel_id,
+ channel_id)
- player = self._parse_json(
- self._search_regex(
- r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
- 'player settings'),
- channel_id, transform_source=js_to_json)
-
- if player.get('online') is False:
+ if metadata.get('online') is False:
raise ExtractorError('Stream is offline', expected=True)
cdn_data = self._download_json(
@@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
note='Downloading load balancing info')
- def get_event(key):
- return try_get(player, lambda x: x['event'][key], compat_str) or ''
-
+ token = mobj.group('token') or 'public'
params = {
- 'token': player.get('token') or '',
- 'ticket': get_event('ticket'),
'con': int(time.time() * 1000),
- 'type': get_event('ticket'),
- 'scope': get_event('scope'),
+ 'token': token,
}
prefered_edge = cdn_data.get('preferedEdge')
- default_tech = player.get('defaultTech')
-
formats = []
for edge in cdn_data['edges']:
@@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
preference = 0
if edge_id == prefered_edge:
preference += 1
- if tech_type == default_tech:
- preference += 1
format_id = []
if edge_id:
format_id.append(edge_id)
@@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
continue
self._sort_formats(formats)
- mature = player.get('mature')
+ mature = metadata.get('adult')
if mature is None:
age_limit = None
else:
@@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
return {
'id': channel_id,
- 'title': self._live_title(channel_id),
+ 'title': self._live_title(metadata.get('title') or channel_id),
'is_live': True,
- 'thumbnail': player.get('vodThumb'),
+ 'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
+ 'channel': channel_id,
+ 'channel_url': 'https://picarto.tv/%s' % channel_id,
'age_limit': age_limit,
'formats': formats,
}
diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py
index c0c276a50..88b6859b0 100644
--- a/youtube_dl/extractor/piksel.py
+++ b/youtube_dl/extractor/piksel.py
@@ -15,18 +15,17 @@ from ..utils import (
class PikselIE(InfoExtractor):
- _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
_TESTS = [
{
- 'url': 'http://player.piksel.com/v/nv60p12f',
- 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235',
+ 'url': 'http://player.piksel.com/v/ums2867l',
+ 'md5': '34e34c8d89dc2559976a6079db531e85',
'info_dict': {
- 'id': 'nv60p12f',
+ 'id': 'ums2867l',
'ext': 'mp4',
- 'title': 'فن الحياة - الحلقة 1',
- 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور',
- 'timestamp': 1465231790,
- 'upload_date': '20160606',
+ 'title': 'GX-005 with Caption',
+ 'timestamp': 1481335659,
+ 'upload_date': '20161210'
}
},
{
@@ -39,8 +38,13 @@ class PikselIE(InfoExtractor):
'title': 'WAW- State of Washington vs. Donald J. Trump, et al',
'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.',
'timestamp': 1486171129,
- 'upload_date': '20170204',
+ 'upload_date': '20170204'
}
+ },
+ {
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/
+ 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477',
+ 'only_matching': True,
}
]
@@ -53,8 +57,11 @@ class PikselIE(InfoExtractor):
return mobj.group('url')
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'data-de-program-uuid=[\'"]([a-z0-9]+)',
+ webpage, 'program uuid', default=display_id)
app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"',
r'data-de-api-key\s*=\s*"([^"]+)"'
@@ -113,6 +120,13 @@ class PikselIE(InfoExtractor):
})
self._sort_formats(formats)
+ subtitles = {}
+ for caption in video_data.get('captions', []):
+ caption_url = caption.get('url')
+ if caption_url:
+ subtitles.setdefault(caption.get('locale', 'en'), []).append({
+ 'url': caption_url})
+
return {
'id': video_id,
'title': title,
@@ -120,4 +134,5 @@ class PikselIE(InfoExtractor):
'thumbnail': video_data.get('thumbnailUrl'),
'timestamp': parse_iso8601(video_data.get('dateadd')),
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py
new file mode 100644
index 000000000..23c8256b5
--- /dev/null
+++ b/youtube_dl/extractor/platzi.py
@@ -0,0 +1,224 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class PlatziBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://platzi.com/login/'
+ _NETRC_MACHINE = 'platzi'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ urlh = self._request_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ # login succeeded
+ if 'platzi.com/login' not in urlh.geturl():
+ return
+
+ login_error = self._webpage_read_content(
+ urlh, self._LOGIN_URL, None, 'Downloading login error page')
+
+ login = self._parse_json(
+ self._search_regex(
+ r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
+ None)
+
+ for kind in ('error', 'password', 'nonFields'):
+ error = str_or_none(login.get('%sError' % kind))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class PlatziIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+ '''
+
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+ 'md5': '8f56448241005b561c10f11a595b37e3',
+ 'info_dict': {
+ 'id': '12074',
+ 'ext': 'mp4',
+ 'title': 'Creando nuestra primera página',
+ 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+ 'duration': 420,
+ },
+ 'skip': 'Requires platzi account credentials',
+ }, {
+ 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+ 'info_dict': {
+ 'id': '13430',
+ 'ext': 'mp4',
+ 'title': 'Background',
+ 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+ 'duration': 360,
+ },
+ 'skip': 'Requires platzi account credentials',
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ # client_data may contain "};" so that we have to try more
+ # strict regex first
+ (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+ r'client_data\s*=\s*({.+?})\s*;'),
+ webpage, 'client data'),
+ lecture_id)
+
+ material = data['initialState']['material']
+ desc = material['description']
+ title = desc['title']
+
+ formats = []
+ for server_id, server in material['videos'].items():
+ if not isinstance(server, dict):
+ continue
+ for format_id in ('hls', 'dash'):
+ format_url = url_or_none(server.get(format_id))
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lecture_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ note='Downloading %s m3u8 information' % server_id,
+ fatal=False))
+ elif format_id == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lecture_id, mpd_id=format_id,
+ note='Downloading %s MPD manifest' % server_id,
+ fatal=False))
+ self._sort_formats(formats)
+
+ content = str_or_none(desc.get('content'))
+ description = (clean_html(compat_b64decode(content).decode('utf-8'))
+ if content else None)
+ duration = int_or_none(material.get('duration'), invscale=60)
+
+ return {
+ 'id': lecture_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class PlatziCourseIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/(?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/next-js/',
+ 'info_dict': {
+ 'id': '1311',
+ 'title': 'Curso de Next.js',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://courses.platzi.com/classes/communication-codestream/',
+ 'info_dict': {
+ 'id': '1367',
+ 'title': 'Codestream Course',
+ },
+ 'playlist_count': 14,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_name)
+
+ props = self._parse_json(
+ self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
+ course_name)['initialProps']
+
+ entries = []
+ for chapter_num, chapter in enumerate(props['concepts'], 1):
+ if not isinstance(chapter, dict):
+ continue
+ materials = chapter.get('materials')
+ if not materials or not isinstance(materials, list):
+ continue
+ chapter_title = chapter.get('title')
+ chapter_id = str_or_none(chapter.get('id'))
+ for material in materials:
+ if not isinstance(material, dict):
+ continue
+ if material.get('material_type') != 'video':
+ continue
+ video_url = urljoin(url, material.get('url'))
+ if not video_url:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': str_or_none(material.get('name')),
+ 'id': str_or_none(material.get('id')),
+ 'ie_key': PlatziIE.ie_key(),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ })
+
+ course_id = compat_str(try_get(props, lambda x: x['course']['id']))
+ course_title = try_get(props, lambda x: x['course']['name'], compat_str)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py
new file mode 100644
index 000000000..1e30ab23a
--- /dev/null
+++ b/youtube_dl/extractor/playplustv.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ PUTRequest,
+)
+
+
+class PlayPlusTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})'
+ _TEST = {
+ 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e',
+ 'md5': 'd078cb89d7ab6b9df37ce23c647aef72',
+ 'info_dict': {
+ 'id': 'db8d274a5163424e967f35a30ddafb8e',
+ 'ext': 'mp4',
+ 'title': 'Capítulo 179 - Final',
+ 'description': 'md5:01085d62d8033a1e34121d3c3cabc838',
+ 'timestamp': 1529992740,
+ 'upload_date': '20180626',
+ },
+ 'skip': 'Requires account credential',
+ }
+ _NETRC_MACHINE = 'playplustv'
+ _GEO_COUNTRIES = ['BR']
+ _token = None
+ _profile_id = None
+
+ def _call_api(self, resource, video_id=None, query=None):
+ return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={
+ 'Authorization': 'Bearer ' + self._token,
+ }, query=query)
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ self.raise_login_required()
+
+ req = PUTRequest(
+ 'https://api.playplus.tv/api/web/login', json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode(), {
+ 'Content-Type': 'application/json; charset=utf-8',
+ })
+
+ try:
+ self._token = self._download_json(req, None)['token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError(self._parse_json(
+ e.cause.read(), None)['errorMessage'], expected=True)
+ raise
+
+ self._profile = self._call_api('Profiles')['list'][0]['_id']
+
+ def _real_extract(self, url):
+ project_id, media_id = re.match(self._VALID_URL, url).groups()
+ media = self._call_api(
+ 'Media', media_id, {
+ 'profileId': self._profile,
+ 'projectId': project_id,
+ 'mediaId': media_id,
+ })['obj']
+ title = media['title']
+
+ formats = []
+ for f in media.get('files', []):
+ f_url = f.get('url')
+ if not f_url:
+ continue
+ file_info = f.get('fileInfo') or {}
+ formats.append({
+ 'url': f_url,
+ 'width': int_or_none(file_info.get('width')),
+ 'height': int_or_none(file_info.get('height')),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumb in media.get('thumbs', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'url': thumb_url,
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(media.get('description')) or media.get('shortDescription'),
+ 'timestamp': int_or_none(media.get('publishDate'), 1000),
+ 'view_count': int_or_none(media.get('numberOfViews')),
+ 'comment_count': int_or_none(media.get('numberOfComments')),
+ 'tags': media.get('tags'),
+ }
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py
index 1257841e4..abd08bc28 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dl/extractor/pluralsight.py
@@ -4,6 +4,7 @@ import collections
import json
import os
import random
+import re
from .common import InfoExtractor
from ..compat import (
@@ -196,7 +197,10 @@ query viewClip {
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
- if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+ if all(not re.search(p, response) for p in (
+ r'__INITIAL_STATE__', r'["\']currentUser["\']',
+ # new layout?
+ r'>\s*Sign out\s*<')):
BLOCKED = 'Your account has been blocked due to suspicious activity'
if BLOCKED in response:
raise ExtractorError(
@@ -210,18 +214,26 @@ query viewClip {
raise ExtractorError('Unable to log in')
- def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id):
- captions_post = {
- 'a': author,
- 'cn': clip_idx,
- 'lc': lang,
- 'm': name,
- }
- captions = self._download_json(
- '%s/player/retrieve-captions' % self._API_BASE, video_id,
- 'Downloading captions JSON', 'Unable to download captions JSON',
- fatal=False, data=json.dumps(captions_post).encode('utf-8'),
- headers={'Content-Type': 'application/json;charset=utf-8'})
+ def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id):
+ captions = None
+ if clip_id:
+ captions = self._download_json(
+ '%s/transcript/api/v1/caption/json/%s/%s'
+ % (self._API_BASE, clip_id, lang), video_id,
+ 'Downloading captions JSON', 'Unable to download captions JSON',
+ fatal=False)
+ if not captions:
+ captions_post = {
+ 'a': author,
+ 'cn': int(clip_idx),
+ 'lc': lang,
+ 'm': name,
+ }
+ captions = self._download_json(
+ '%s/player/retrieve-captions' % self._API_BASE, video_id,
+ 'Downloading captions JSON', 'Unable to download captions JSON',
+ fatal=False, data=json.dumps(captions_post).encode('utf-8'),
+ headers={'Content-Type': 'application/json;charset=utf-8'})
if captions:
return {
lang: [{
@@ -315,7 +327,7 @@ query viewClip {
)
# Some courses also offer widescreen resolution for high quality (see
- # https://github.com/rg3/youtube-dl/issues/7766)
+ # https://github.com/ytdl-org/youtube-dl/issues/7766)
widescreen = course.get('supportsWideScreenVideoFormats') is True
best_quality = 'high-widescreen' if widescreen else 'high'
if widescreen:
@@ -376,8 +388,8 @@ query viewClip {
# Pluralsight tracks multiple sequential calls to ViewClip API and start
# to return 429 HTTP errors after some time (see
- # https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead
- # to account ban (see https://github.com/rg3/youtube-dl/issues/6842).
+ # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead
+ # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842).
# To somewhat reduce the probability of these consequences
# we will sleep random amount of time before each call to ViewClip.
self._sleep(
@@ -413,7 +425,7 @@ query viewClip {
# TODO: other languages?
subtitles = self.extract_subtitles(
- author, clip_idx, 'en', name, duration, display_id)
+ author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id)
return {
'id': clip_id,
diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py
index 25fcebf9f..e782e3f1f 100644
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dl/extractor/podomatic.py
@@ -50,8 +50,8 @@ class PodomaticIE(InfoExtractor):
video_id = mobj.group('id')
channel = mobj.group('channel') or mobj.group('channel_2')
- json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' +
- '?permalink=true&rtmp=0') %
+ json_url = (('%s://%s.podomatic.com/entry/embed_params/%s'
+ + '?permalink=true&rtmp=0') %
(mobj.group('proto'), channel, video_id))
data_json = self._download_webpage(
json_url, video_id, 'Downloading video info')
diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py
index dd5f17f11..80222d428 100644
--- a/youtube_dl/extractor/pokemon.py
+++ b/youtube_dl/extractor/pokemon.py
@@ -20,20 +20,16 @@ class PokemonIE(InfoExtractor):
'ext': 'mp4',
'title': 'The Ol’ Raise and Switch!',
'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
- 'timestamp': 1511824728,
- 'upload_date': '20171127',
},
'add_id': ['LimelightMedia'],
}, {
# no data-video-title
- 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
+ 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
'info_dict': {
- 'id': '99f3bae270bf4e5097274817239ce9c8',
+ 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
'ext': 'mp4',
- 'title': 'Pokémon: The Rise of Darkrai',
- 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
- 'timestamp': 1417778347,
- 'upload_date': '20141205',
+ 'title': "Pokémon : L'ascension de Darkrai",
+ 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
},
'add_id': ['LimelightMedia'],
'params': {
diff --git a/youtube_dl/extractor/popcorntimes.py b/youtube_dl/extractor/popcorntimes.py
new file mode 100644
index 000000000..7bf7f9858
--- /dev/null
+++ b/youtube_dl/extractor/popcorntimes.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_chr,
+)
+from ..utils import int_or_none
+
+
+class PopcorntimesIE(InfoExtractor):
+ _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
+ 'md5': '93f210991ad94ba8c3485950a2453257',
+ 'info_dict': {
+ 'id': 'A1XCFvz',
+ 'display_id': 'haensel-und-gretel-opera-fantasy',
+ 'ext': 'mp4',
+ 'title': 'Hänsel und Gretel',
+ 'description': 'md5:1b8146791726342e7b22ce8125cf6945',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'John Paul',
+ 'release_date': '19541009',
+ 'duration': 4260,
+ 'tbr': 5380,
+ 'width': 720,
+ 'height': 540,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title',
+ default=None) or self._html_search_meta(
+ 'ya:ovs:original_name', webpage, 'title', fatal=True)
+
+ loc = self._search_regex(
+ r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
+ group='value')
+
+ loc_b64 = ''
+ for c in loc:
+ c_ord = ord(c)
+ if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
+ upper = ord('Z') if c_ord <= ord('Z') else ord('z')
+ c_ord += 13
+ if upper < c_ord:
+ c_ord -= 26
+ loc_b64 += compat_chr(c_ord)
+
+ video_url = compat_b64decode(loc_b64).decode('utf-8')
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'thumbnail', default=None,
+ group='value') or self._og_search_thumbnail(webpage)
+
+ creator = self._html_search_meta(
+ 'video:director', webpage, 'creator', default=None)
+
+ release_date = self._html_search_meta(
+ 'video:release_date', webpage, default=None)
+ if release_date:
+ release_date = release_date.replace('-', '')
+
+ def int_meta(name):
+ return int_or_none(self._html_search_meta(
+ name, webpage, default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'creator': creator,
+ 'release_date': release_date,
+ 'duration': int_meta('video:duration'),
+ 'tbr': int_meta('ya:ovs:bitrate'),
+ 'width': int_meta('og:video:width'),
+ 'height': int_meta('og:video:height'),
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index 24c3600fe..20eac647a 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -39,7 +39,12 @@ class Porn91IE(InfoExtractor):
r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
title = title.replace('\n', '')
- info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
+ video_link_url = self._search_regex(
+ r'<textarea[^>]+id=["\']fm-video_link[^>]+>([^<]+)</textarea>',
+ webpage, 'video link')
+ videopage = self._download_webpage(video_link_url, video_id)
+
+ info_dict = self._parse_html5_media_entries(url, videopage, video_id)[0]
duration = parse_duration(self._search_regex(
r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py
deleted file mode 100644
index 025985fbc..000000000
--- a/youtube_dl/extractor/pornflip.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_str,
-)
-from ..utils import (
- int_or_none,
- try_get,
- unified_timestamp,
-)
-
-
-class PornFlipIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.pornflip.com/v/wz7DfNhMmep',
- 'md5': '98c46639849145ae1fd77af532a9278c',
- 'info_dict': {
- 'id': 'wz7DfNhMmep',
- 'ext': 'mp4',
- 'title': '2 Amateurs swallow make his dream cumshots true',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 112,
- 'timestamp': 1481655502,
- 'upload_date': '20161213',
- 'uploader_id': '106786',
- 'uploader': 'figifoto',
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornflip.com/v/EkRD6-vS2-s',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornflip.com/embed/EkRD6-vS2-s',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornflip.com/v/NG9q6Pb_iK8',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://www.pornflip.com/v/%s' % video_id, video_id)
-
- flashvars = compat_parse_qs(self._search_regex(
- r'<embed[^>]+flashvars=(["\'])(?P<flashvars>(?:(?!\1).)+)\1',
- webpage, 'flashvars', group='flashvars'))
-
- title = flashvars['video_vars[title]'][0]
-
- def flashvar(kind):
- return try_get(
- flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str)
-
- formats = []
- for key, value in flashvars.items():
- if not (value and isinstance(value, list)):
- continue
- format_url = value[0]
- if key == 'video_vars[hds_manifest]':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', fatal=False))
- continue
- height = self._search_regex(
- r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None)
- if not height:
- continue
- formats.append({
- 'url': format_url,
- 'format_id': 'http-%s' % height,
- 'height': int_or_none(height),
- })
- self._sort_formats(formats)
-
- uploader = self._html_search_regex(
- (r'<span[^>]+class="name"[^>]*>\s*<a[^>]+>\s*<strong>(?P<uploader>[^<]+)',
- r'<meta[^>]+content=(["\'])[^>]*\buploaded by (?P<uploader>.+?)\1'),
- webpage, 'uploader', fatal=False, group='uploader')
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'thumbnail': flashvar('big_thumb'),
- 'duration': int_or_none(flashvar('duration')),
- 'timestamp': unified_timestamp(self._html_search_meta(
- 'uploadDate', webpage, 'timestamp')),
- 'uploader_id': flashvar('author_id'),
- 'uploader': uploader,
- 'view_count': int_or_none(flashvar('views')),
- 'age_limit': 18,
- }
diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py
index b52879c7a..c6052ac9f 100644
--- a/youtube_dl/extractor/pornhd.py
+++ b/youtube_dl/extractor/pornhd.py
@@ -4,9 +4,12 @@ import re
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
js_to_json,
+ merge_dicts,
+ urljoin,
)
@@ -14,7 +17,7 @@ class PornHdIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
_TESTS = [{
'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
- 'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5',
+ 'md5': '87f1540746c1d32ec7a2305c12b96b25',
'info_dict': {
'id': '9864',
'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
@@ -23,23 +26,24 @@ class PornHdIE(InfoExtractor):
'description': 'md5:3748420395e03e31ac96857a8f125b2b',
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
+ 'like_count': int,
'age_limit': 18,
- }
+ },
+ 'skip': 'HTTP Error 404: Not Found',
}, {
- # removed video
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
- 'md5': '956b8ca569f7f4d8ec563e2c41598441',
+ 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
'info_dict': {
'id': '1962',
'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
'ext': 'mp4',
- 'title': 'Sierra loves doing laundry',
+ 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
'thumbnail': r're:^https?://.*\.jpg',
'view_count': int,
+ 'like_count': int,
'age_limit': 18,
},
- 'skip': 'Not available anymore',
}]
def _real_extract(self, url):
@@ -57,7 +61,13 @@ class PornHdIE(InfoExtractor):
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
webpage, 'sources', default='{}')), video_id)
+ info = {}
if not sources:
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+ if entries:
+ info = entries[0]
+
+ if not sources and not info:
message = self._html_search_regex(
r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
webpage, 'error message', group='value')
@@ -65,33 +75,47 @@ class PornHdIE(InfoExtractor):
formats = []
for format_id, video_url in sources.items():
+ video_url = urljoin(url, video_url)
if not video_url:
continue
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_id, 'height', default=None))
formats.append({
'url': video_url,
+ 'ext': determine_ext(video_url, 'mp4'),
'format_id': format_id,
'height': height,
})
- self._sort_formats(formats)
+ if formats:
+ info['formats'] = formats
+ self._sort_formats(info['formats'])
description = self._html_search_regex(
- r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
- webpage, 'description', fatal=False, group='value')
+ (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
+ r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
+ webpage, 'description', fatal=False,
+ group='value') or self._html_search_meta(
+ 'description', webpage, default=None) or self._og_search_description(webpage)
view_count = int_or_none(self._html_search_regex(
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
thumbnail = self._search_regex(
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
- 'thumbnail', fatal=False, group='url')
+ 'thumbnail', default=None, group='url')
+
+ like_count = int_or_none(self._search_regex(
+ (r'(\d+)</span>\s*likes',
+ r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
+ r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
+ webpage, 'like count', fatal=False))
- return {
+ return merge_dicts(info, {
'id': video_id,
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'view_count': view_count,
+ 'like_count': like_count,
'formats': formats,
'age_limit': 18,
- }
+ })
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 19eaf389f..3567a3283 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -10,11 +10,14 @@ from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
+ compat_urllib_request,
)
+from .openload import PhantomJSwrapper
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
- js_to_json,
+ NO_DEFAULT,
orderedSet,
remove_quotes,
str_to_int,
@@ -22,12 +25,34 @@ from ..utils import (
)
-class PornHubIE(InfoExtractor):
+class PornHubBaseIE(InfoExtractor):
+ def _download_webpage_handle(self, *args, **kwargs):
+ def dl(*args, **kwargs):
+ return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+ webpage, urlh = dl(*args, **kwargs)
+
+ if any(re.search(p, webpage) for p in (
+ r'<body\b[^>]+\bonload=["\']go\(\)',
+ r'document\.cookie\s*=\s*["\']RNKEY=',
+ r'document\.location\.reload\(true\)')):
+ url_or_request = args[0]
+ url = (url_or_request.get_full_url()
+ if isinstance(url_or_request, compat_urllib_request.Request)
+ else url_or_request)
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ phantom.get(url, html=webpage)
+ webpage, urlh = dl(*args, **kwargs)
+
+ return webpage, urlh
+
+
+class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x)
https?://
(?:
- (?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
@@ -121,12 +146,18 @@ class PornHubIE(InfoExtractor):
}, {
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
+ 'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
webpage)
def _extract_count(self, pattern, webpage, name):
@@ -134,14 +165,23 @@ class PornHubIE(InfoExtractor):
pattern, webpage, '%s count' % name, fatal=False))
def _real_extract(self, url):
- video_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host') or 'pornhub.com'
+ video_id = mobj.group('id')
+
+ if 'premium' in host:
+ if not self._downloader.params.get('cookiefile'):
+ raise ExtractorError(
+ 'PornHub Premium requires authentication.'
+ ' You may want to use --cookies.',
+ expected=True)
- self._set_cookie('pornhub.com', 'age_verified', '1')
+ self._set_cookie(host, 'age_verified', '1')
def dl_webpage(platform):
- self._set_cookie('pornhub.com', 'platform', platform)
+ self._set_cookie(host, 'platform', platform)
return self._download_webpage(
- 'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
+ 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
video_id, 'Downloading %s webpage' % platform)
webpage = dl_webpage('pc')
@@ -159,10 +199,10 @@ class PornHubIE(InfoExtractor):
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
title = self._html_search_meta(
- 'twitter:title', webpage, default=None) or self._search_regex(
- (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
- r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
- r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
+ 'twitter:title', webpage, default=None) or self._html_search_regex(
+ (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+ r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
webpage, 'title', group='title')
video_urls = []
@@ -198,12 +238,13 @@ class PornHubIE(InfoExtractor):
else:
thumbnail, duration = [None] * 2
- if not video_urls:
- tv_webpage = dl_webpage('tv')
-
+ def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
assignments = self._search_regex(
- r'(var.+?mediastring.+?)</script>', tv_webpage,
- 'encoded url').split(';')
+ pattern, webpage, 'encoded url', default=default)
+ if not assignments:
+ return {}
+
+ assignments = assignments.split(';')
js_vars = {}
@@ -225,11 +266,35 @@ class PornHubIE(InfoExtractor):
assn = re.sub(r'var\s+', '', assn)
vname, value = assn.split('=', 1)
js_vars[vname] = parse_js_value(value)
+ return js_vars
- video_url = js_vars['mediastring']
- if video_url not in video_urls_set:
- video_urls.append((video_url, None))
- video_urls_set.add(video_url)
+ def add_video_url(video_url):
+ v_url = url_or_none(video_url)
+ if not v_url:
+ return
+ if v_url in video_urls_set:
+ return
+ video_urls.append((v_url, None))
+ video_urls_set.add(v_url)
+
+ if not video_urls:
+ FORMAT_PREFIXES = ('media', 'quality')
+ js_vars = extract_js_vars(
+ webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
+ default=None)
+ if js_vars:
+ for key, format_url in js_vars.items():
+ if any(key.startswith(p) for p in FORMAT_PREFIXES):
+ add_video_url(format_url)
+ if not video_urls and re.search(
+ r'<[^>]+\bid=["\']lockedPlayer', webpage):
+ raise ExtractorError(
+ 'Video %s is locked' % video_id, expected=True)
+
+ if not video_urls:
+ js_vars = extract_js_vars(
+ dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
+ add_video_url(js_vars['mediastring'])
for mobj in re.finditer(
r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
@@ -247,6 +312,16 @@ class PornHubIE(InfoExtractor):
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
if upload_date:
upload_date = upload_date.replace('/', '')
+ ext = determine_ext(video_url)
+ if ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
tbr = None
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
if mobj:
@@ -274,14 +349,12 @@ class PornHubIE(InfoExtractor):
comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
- page_params = self._parse_json(self._search_regex(
- r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
- webpage, 'page parameters', group='data', default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
- tags = categories = None
- if page_params:
- tags = page_params.get('tags', '').split(',')
- categories = page_params.get('categories', '').split(',')
+ def extract_list(meta_key):
+ div = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
+ % meta_key, webpage, meta_key, default=None)
+ if div:
+ return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
return {
'id': video_id,
@@ -296,24 +369,24 @@ class PornHubIE(InfoExtractor):
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
- 'tags': tags,
- 'categories': categories,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
'subtitles': subtitles,
}
-class PornHubPlaylistBaseIE(InfoExtractor):
- def _extract_entries(self, webpage):
+class PornHubPlaylistBaseIE(PornHubBaseIE):
+ def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
- # https://github.com/rg3/youtube-dl/issues/11594).
+ # https://github.com/ytdl-org/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
'container', default=webpage)
return [
self.url_result(
- 'http://www.pornhub.com/%s' % video_url,
+ 'http://www.%s/%s' % (host, video_url),
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
@@ -321,11 +394,13 @@ class PornHubPlaylistBaseIE(InfoExtractor):
]
def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
- entries = self._extract_entries(webpage)
+ entries = self._extract_entries(webpage, host)
playlist = self._parse_json(
self._search_regex(
@@ -339,37 +414,99 @@ class PornHubPlaylistBaseIE(InfoExtractor):
entries, playlist_id, title, playlist.get('description'))
-class PornHubPlaylistIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
+class PornHubUserIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
_TESTS = [{
- 'url': 'http://www.pornhub.com/playlist/4667351',
+ 'url': 'https://www.pornhub.com/model/zoe_ph',
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
'info_dict': {
- 'id': '4667351',
- 'title': 'Nataly Hot',
+ 'id': 'liz-vicious',
},
- 'playlist_mincount': 2,
+ 'playlist_mincount': 118,
}, {
- 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'url': 'https://www.pornhub.com/users/russianveet69',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/channels/povd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
'only_matching': True,
}]
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('id')
+ return self.url_result(
+ '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
+ video_id=user_id)
+
+
+class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
+ @staticmethod
+ def _has_more(webpage):
+ return re.search(
+ r'''(?x)
+ <li[^>]+\bclass=["\']page_next|
+ <link[^>]+\brel=["\']next|
+ <button[^>]+\bid=["\']moreDataBtn
+ ''', webpage) is not None
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ page = int_or_none(self._search_regex(
+ r'\bpage=(\d+)', url, 'page', default=None))
-class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
+ entries = []
+ for page_num in (page, ) if page is not None else itertools.count(1):
+ try:
+ webpage = self._download_webpage(
+ url, item_id, 'Downloading page %d' % page_num,
+ query={'page': page_num})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ break
+ raise
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ entries.extend(page_entries)
+ if not self._has_more(webpage):
+ break
+
+ return self.playlist_result(orderedSet(entries), item_id)
+
+
+class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
- 'info_dict': {
- 'id': 'zoe_ph',
- },
- 'playlist_mincount': 171,
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
+ 'only_matching': True,
}, {
'url': 'http://www.pornhub.com/users/rushandlia/videos',
'only_matching': True,
}, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 149,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 40,
+ }, {
# default sorting as Top Rated Videos
'url': 'https://www.pornhub.com/channels/povd/videos',
'info_dict': {
- 'id': 'povd',
+ 'id': 'channels/povd/videos',
},
'playlist_mincount': 293,
}, {
@@ -388,29 +525,87 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'only_matching': True,
}, {
- 'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
'only_matching': True,
}, {
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
+ 'only_matching': True,
+ }, {
+ # Longest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
+ 'only_matching': True,
+ }, {
+ # Newest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/search?search=123',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video?page=2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/44121572',
+ 'info_dict': {
+ 'id': 'playlist/44121572',
+ },
+ 'playlist_mincount': 132,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
}]
- def _real_extract(self, url):
- user_id = self._match_id(url)
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
+ else super(PornHubPagedVideoListIE, cls).suitable(url))
- entries = []
- for page_num in itertools.count(1):
- try:
- webpage = self._download_webpage(
- url, user_id, 'Downloading page %d' % page_num,
- query={'page': page_num})
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- break
- raise
- page_entries = self._extract_entries(webpage)
- if not page_entries:
- break
- entries.extend(page_entries)
- return self.playlist_result(entries, user_id)
+class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+ 'info_dict': {
+ 'id': 'jenny-blighe',
+ },
+ 'playlist_mincount': 129,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
+ 'only_matching': True,
+ }]
diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py
deleted file mode 100644
index 0c1024772..000000000
--- a/youtube_dl/extractor/primesharetv.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class PrimeShareTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
-
- _TEST = {
- 'url': 'http://primeshare.tv/download/238790B611',
- 'md5': 'b92d9bf5461137c36228009f31533fbc',
- 'info_dict': {
- 'id': '238790B611',
- 'ext': 'mp4',
- 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- if '>File not exist<' in webpage:
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- fields = self._hidden_inputs(webpage)
-
- headers = {
- 'Referer': url,
- 'Content-Type': 'application/x-www-form-urlencoded',
- }
-
- wait_time = int(self._search_regex(
- r'var\s+cWaitTime\s*=\s*(\d+)',
- webpage, 'wait time', default=7)) + 1
- self._sleep(wait_time, video_id)
-
- req = sanitized_Request(
- url, urlencode_postdata(fields), headers)
- video_page = self._download_webpage(
- req, video_id, 'Downloading video page')
-
- video_url = self._search_regex(
- r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
- video_page, 'video url')
-
- title = self._html_search_regex(
- r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
- video_page, 'title')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'ext': 'mp4',
- }
diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py
deleted file mode 100644
index 23ac93d7e..000000000
--- a/youtube_dl/extractor/promptfile.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- ExtractorError,
- urlencode_postdata,
-)
-
-
-class PromptFileIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
- _TEST = {
- 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416',
- 'md5': '5a7e285a26e0d66d9a263fae91bc92ce',
- 'info_dict': {
- 'id': '86D1CE8462-576CAAE416',
- 'ext': 'mp4',
- 'title': 'oceans.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
- raise ExtractorError('Video %s does not exist' % video_id,
- expected=True)
-
- chash = self._search_regex(
- r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash')
- fields = self._hidden_inputs(webpage)
- keys = list(fields.keys())
- chash_key = keys[0] if len(keys) == 1 else next(
- key for key in keys if key.startswith('cha'))
- fields[chash_key] = chash + fields[chash_key]
-
- webpage = self._download_webpage(
- url, video_id, 'Downloading video page',
- data=urlencode_postdata(fields),
- headers={'Content-type': 'application/x-www-form-urlencoded'})
-
- video_url = self._search_regex(
- (r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File',
- r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'),
- webpage, 'video url', group='url')
- title = self._html_search_regex(
- r'<span.+title="([^"]+)">', webpage, 'title')
- thumbnail = self._html_search_regex(
- r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
- webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
-
- formats = [{
- 'format_id': 'sd',
- 'url': video_url,
- 'ext': determine_ext(title),
- }]
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
index d0955d079..e47088292 100644
--- a/youtube_dl/extractor/prosiebensat1.py
+++ b/youtube_dl/extractor/prosiebensat1.py
@@ -11,11 +11,17 @@ from ..utils import (
determine_ext,
float_or_none,
int_or_none,
+ merge_dicts,
unified_strdate,
)
class ProSiebenSat1BaseIE(InfoExtractor):
+ _GEO_BYPASS = False
+ _ACCESS_ID = None
+ _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
+ _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
+
def _extract_video_info(self, url, clip_id):
client_location = url
@@ -31,93 +37,132 @@ class ProSiebenSat1BaseIE(InfoExtractor):
if video.get('is_protected') is True:
raise ExtractorError('This video is DRM protected.', expected=True)
- duration = float_or_none(video.get('duration'))
- source_ids = [compat_str(source['id']) for source in video['sources']]
-
- client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
-
- sources = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
- clip_id, 'Downloading sources JSON', query={
- 'access_token': self._TOKEN,
- 'client_id': client_id,
- 'client_location': client_location,
- 'client_name': self._CLIENT_NAME,
- })
- server_id = sources['server_id']
+ formats = []
+ if self._ACCESS_ID:
+ raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
+ protocols = self._download_json(
+ self._V4_BASE_URL + 'protocols', clip_id,
+ 'Downloading protocols JSON',
+ headers=self.geo_verification_headers(), query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct).encode()).hexdigest(),
+ 'video_id': clip_id,
+ }, fatal=False, expected_status=(403,)) or {}
+ error = protocols.get('error') or {}
+ if error.get('title') == 'Geo check failed':
+ self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
+ server_token = protocols.get('server_token')
+ if server_token:
+ urls = (self._download_json(
+ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
+ 'protocols': self._SUPPORTED_PROTOCOLS,
+ 'server_token': server_token,
+ 'video_id': clip_id,
+ }, fatal=False) or {}).get('urls') or {}
+ for protocol, variant in urls.items():
+ source_url = variant.get('clear', {}).get('url')
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+ if not formats:
+ source_ids = [compat_str(source['id']) for source in video['sources']]
- def fix_bitrate(bitrate):
- bitrate = int_or_none(bitrate)
- if not bitrate:
- return None
- return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+ client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
- formats = []
- for source_id in source_ids:
- client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
- urls = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
- clip_id, 'Downloading urls JSON', fatal=False, query={
+ sources = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
+ clip_id, 'Downloading sources JSON', query={
'access_token': self._TOKEN,
'client_id': client_id,
'client_location': client_location,
'client_name': self._CLIENT_NAME,
- 'server_id': server_id,
- 'source_ids': source_id,
})
- if not urls:
- continue
- if urls.get('status_code') != 0:
- raise ExtractorError('This video is unavailable', expected=True)
- urls_sources = urls['sources']
- if isinstance(urls_sources, dict):
- urls_sources = urls_sources.values()
- for source in urls_sources:
- source_url = source.get('url')
- if not source_url:
+ server_id = sources['server_id']
+
+ def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
+ return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+
+ for source_id in source_ids:
+ client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ urls = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+ clip_id, 'Downloading urls JSON', fatal=False, query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'server_id': server_id,
+ 'source_ids': source_id,
+ })
+ if not urls:
continue
- protocol = source.get('protocol')
- mimetype = source.get('mimetype')
- if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
- formats.extend(self._extract_f4m_formats(
- source_url, clip_id, f4m_id='hds', fatal=False))
- elif mimetype == 'application/x-mpegURL':
- formats.extend(self._extract_m3u8_formats(
- source_url, clip_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif mimetype == 'application/dash+xml':
- formats.extend(self._extract_mpd_formats(
- source_url, clip_id, mpd_id='dash', fatal=False))
- else:
- tbr = fix_bitrate(source['bitrate'])
- if protocol in ('rtmp', 'rtmpe'):
- mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
- if not mobj:
- continue
- path = mobj.group('path')
- mp4colon_index = path.rfind('mp4:')
- app = path[:mp4colon_index]
- play_path = path[mp4colon_index:]
- formats.append({
- 'url': '%s/%s' % (mobj.group('url'), app),
- 'app': app,
- 'play_path': play_path,
- 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
- 'page_url': 'http://www.prosieben.de',
- 'tbr': tbr,
- 'ext': 'flv',
- 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
- })
+ if urls.get('status_code') != 0:
+ raise ExtractorError('This video is unavailable', expected=True)
+ urls_sources = urls['sources']
+ if isinstance(urls_sources, dict):
+ urls_sources = urls_sources.values()
+ for source in urls_sources:
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ protocol = source.get('protocol')
+ mimetype = source.get('mimetype')
+ if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, clip_id, f4m_id='hds', fatal=False))
+ elif mimetype == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
else:
- formats.append({
- 'url': source_url,
- 'tbr': tbr,
- 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
- })
+ tbr = fix_bitrate(source['bitrate'])
+ if protocol in ('rtmp', 'rtmpe'):
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+ if not mobj:
+ continue
+ path = mobj.group('path')
+ mp4colon_index = path.rfind('mp4:')
+ app = path[:mp4colon_index]
+ play_path = path[mp4colon_index:]
+ formats.append({
+ 'url': '%s/%s' % (mobj.group('url'), app),
+ 'app': app,
+ 'play_path': play_path,
+ 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
+ 'page_url': 'http://www.prosieben.de',
+ 'tbr': tbr,
+ 'ext': 'flv',
+ 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'tbr': tbr,
+ 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+ })
self._sort_formats(formats)
return {
- 'duration': duration,
+ 'duration': float_or_none(video.get('duration')),
'formats': formats,
}
@@ -131,7 +176,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
(?:
(?:beta\.)?
(?:
- prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
+ prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
)\.(?:de|at|ch)|
ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
)
@@ -140,8 +185,8 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
_TESTS = [
{
- # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242
- # in response to fixing https://github.com/rg3/youtube-dl/issues/6215:
+ # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
+ # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
# - malformed f4m manifest support
# - proper handling of URLs starting with `https?://` in 2.0 manifests
# - recursive child f4m manifests extraction
@@ -149,10 +194,14 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'info_dict': {
'id': '2104602',
'ext': 'mp4',
- 'title': 'Episode 18 - Staffel 2',
+ 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
'upload_date': '20131231',
'duration': 5845.04,
+ 'series': 'CIRCUS HALLIGALLI',
+ 'season_number': 2,
+ 'episode': 'Episode 18 - Staffel 2',
+ 'episode_number': 18,
},
},
{
@@ -256,8 +305,9 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'info_dict': {
'id': '2572814',
'ext': 'mp4',
- 'title': 'Andreas Kümmert: Rocket Man',
+ 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
'description': 'md5:6ddb02b0781c6adf778afea606652e38',
+ 'timestamp': 1382041620,
'upload_date': '20131017',
'duration': 469.88,
},
@@ -266,7 +316,7 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
},
},
{
- 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
+ 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
'info_dict': {
'id': '2156342',
'ext': 'mp4',
@@ -289,19 +339,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
'skip': 'This video is unavailable',
},
{
- 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
- 'info_dict': {
- 'id': '4187506',
- 'ext': 'mp4',
- 'title': 'Best of Circus HalliGalli',
- 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
- 'upload_date': '20151229',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
# title in <h2 class="subtitle">
'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
'info_dict': {
@@ -344,6 +381,11 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
_TOKEN = 'prosieben'
_SALT = '01!8d8F_)r9]4s[qeuXfP%'
_CLIENT_NAME = 'kolibri-2.0.19-splec4'
+
+ _ACCESS_ID = 'x_prosiebenmaxx-de'
+ _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
+ _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
+
_CLIPID_REGEXES = [
r'"clip_id"\s*:\s+"(\d+)"',
r'clipid: "(\d+)"',
@@ -372,7 +414,6 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
]
_UPLOAD_DATE_REGEXES = [
- r'<meta property="og:published_time" content="(.+?)">',
r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
@@ -402,17 +443,21 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
if description is None:
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
- upload_date = unified_strdate(self._html_search_regex(
- self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
+ upload_date = unified_strdate(
+ self._html_search_meta('og:published_time', webpage,
+ 'upload date', default=None)
+ or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
+ webpage, 'upload date', default=None))
+
+ json_ld = self._search_json_ld(webpage, clip_id, default={})
- info.update({
+ return merge_dicts(info, {
'id': clip_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'upload_date': upload_date,
- })
- return info
+ }, json_ld)
def _extract_playlist(self, url, webpage):
playlist_id = self._html_search_regex(
diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py
index 5465e8ab7..ca71665e0 100644
--- a/youtube_dl/extractor/puhutv.py
+++ b/youtube_dl/extractor/puhutv.py
@@ -25,21 +25,21 @@ class PuhuTVIE(InfoExtractor):
_TESTS = [{
# film
'url': 'https://puhutv.com/sut-kardesler-izle',
- 'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
+ 'md5': 'a347470371d56e1585d1b2c8dab01c96',
'info_dict': {
'id': '5085',
'display_id': 'sut-kardesler',
'ext': 'mp4',
'title': 'Süt Kardeşler',
- 'description': 'md5:405fd024df916ca16731114eb18e511a',
+ 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 4832.44,
'creator': 'Arzu Film',
- 'timestamp': 1469778212,
- 'upload_date': '20160729',
+ 'timestamp': 1561062602,
+ 'upload_date': '20190620',
'release_year': 1976,
'view_count': int,
- 'tags': ['Aile', 'Komedi', 'Klasikler'],
+ 'tags': list,
},
}, {
# episode, geo restricted, bypassable with --geo-verification-proxy
@@ -64,9 +64,10 @@ class PuhuTVIE(InfoExtractor):
display_id)['data']
video_id = compat_str(info['id'])
- title = info.get('name') or info['title']['name']
+ show = info.get('title') or {}
+ title = info.get('name') or show['name']
if info.get('display_name'):
- title = '%s %s' % (title, info.get('display_name'))
+ title = '%s %s' % (title, info['display_name'])
try:
videos = self._download_json(
@@ -78,17 +79,22 @@ class PuhuTVIE(InfoExtractor):
self.raise_geo_restricted()
raise
+ urls = []
formats = []
+
for video in videos['data']['videos']:
media_url = url_or_none(video.get('url'))
- if not media_url:
+ if not media_url or media_url in urls:
continue
+ urls.append(media_url)
+
playlist = video.get('is_playlist')
- if video.get('stream_type') == 'hls' and playlist is True:
+ if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
continue
+
quality = int_or_none(video.get('quality'))
f = {
'url': media_url,
@@ -96,12 +102,12 @@ class PuhuTVIE(InfoExtractor):
'height': quality
}
video_format = video.get('video_format')
- if video_format == 'hls' and playlist is False:
+ is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
+ if is_hls:
format_id = 'hls'
f['protocol'] = 'm3u8_native'
elif video_format == 'mp4':
format_id = 'http'
-
else:
continue
if quality:
@@ -110,20 +116,13 @@ class PuhuTVIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
- description = try_get(
- info, lambda x: x['title']['description'],
- compat_str) or info.get('description')
- timestamp = unified_timestamp(info.get('created_at'))
creator = try_get(
- info, lambda x: x['title']['producer']['name'], compat_str)
+ show, lambda x: x['producer']['name'], compat_str)
- duration = float_or_none(
- try_get(info, lambda x: x['content']['duration_in_ms'], int),
- scale=1000)
- view_count = try_get(info, lambda x: x['content']['watch_count'], int)
+ content = info.get('content') or {}
images = try_get(
- info, lambda x: x['content']['images']['wide'], dict) or {}
+ content, lambda x: x['images']['wide'], dict) or {}
thumbnails = []
for image_id, image_url in images.items():
if not isinstance(image_url, compat_str):
@@ -137,14 +136,8 @@ class PuhuTVIE(InfoExtractor):
})
thumbnails.append(t)
- release_year = try_get(info, lambda x: x['title']['released_at'], int)
-
- season_number = int_or_none(info.get('season_number'))
- season_id = str_or_none(info.get('season_id'))
- episode_number = int_or_none(info.get('episode_number'))
-
tags = []
- for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
+ for genre in show.get('genres') or []:
if not isinstance(genre, dict):
continue
genre_name = genre.get('name')
@@ -152,12 +145,11 @@ class PuhuTVIE(InfoExtractor):
tags.append(genre_name)
subtitles = {}
- for subtitle in try_get(
- info, lambda x: x['content']['subtitles'], list) or []:
+ for subtitle in content.get('subtitles') or []:
if not isinstance(subtitle, dict):
continue
lang = subtitle.get('language')
- sub_url = url_or_none(subtitle.get('url'))
+ sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
if not lang or not isinstance(lang, compat_str) or not sub_url:
continue
subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
@@ -168,15 +160,15 @@ class PuhuTVIE(InfoExtractor):
'id': video_id,
'display_id': display_id,
'title': title,
- 'description': description,
- 'season_id': season_id,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'release_year': release_year,
- 'timestamp': timestamp,
+ 'description': info.get('description') or show.get('description'),
+ 'season_id': str_or_none(info.get('season_id')),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'release_year': int_or_none(show.get('released_at')),
+ 'timestamp': unified_timestamp(info.get('created_at')),
'creator': creator,
- 'view_count': view_count,
- 'duration': duration,
+ 'view_count': int_or_none(content.get('watch_count')),
+ 'duration': float_or_none(content.get('duration_in_ms'), 1000),
'tags': tags,
'subtitles': subtitles,
'thumbnails': thumbnails,
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py
index b952e59b4..a28b1a24c 100644
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dl/extractor/radiocanada.py
@@ -4,16 +4,12 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
- xpath_text,
- find_xpath_attr,
determine_ext,
+ ExtractorError,
int_or_none,
unified_strdate,
- xpath_element,
- ExtractorError,
- determine_protocol,
- unsmuggle_url,
)
@@ -49,107 +45,79 @@ class RadioCanadaIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
+ },
+ {
+ # with protectionType but not actually DRM protected
+ 'url': 'radiocanada:toutv:140872',
+ 'info_dict': {
+ 'id': '140872',
+ 'title': 'Épisode 1',
+ 'series': 'District 31',
+ },
+ 'only_matching': True,
}
]
+ _GEO_COUNTRIES = ['CA']
+ _access_token = None
+ _claims = None
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- app_code, video_id = re.match(self._VALID_URL, url).groups()
-
- metadata = self._download_xml(
- 'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
- video_id, note='Downloading metadata XML', query={
+ def _call_api(self, path, video_id=None, app_code=None, query=None):
+ if not query:
+ query = {}
+ query.update({
+ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
+ 'output': 'json',
+ })
+ if video_id:
+ query.update({
'appCode': app_code,
'idMedia': video_id,
})
+ if self._access_token:
+ query['access_token'] = self._access_token
+ try:
+ return self._download_json(
+ 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
+ data = self._parse_json(e.cause.read().decode(), None)
+ error = data.get('error_description') or data['errorMessage']['text']
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract_info(self, app_code, video_id):
+ metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
def get_meta(name):
- el = find_xpath_attr(metadata, './/Meta', 'name', name)
- return el.text if el is not None else None
+ for meta in metas:
+ if meta.get('name') == name:
+ text = meta.get('text')
+ if text:
+ return text
+ # protectionType does not necessarily mean the video is DRM protected (see
+ # https://github.com/ytdl-org/youtube-dl/pull/18609).
if get_meta('protectionType'):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- device_types = ['ipad']
- if not smuggled_data:
- device_types.append('flash')
- device_types.append('android')
+ self.report_warning('This video is probably DRM protected.')
- formats = []
- error = None
- # TODO: extract f4m formats
- # f4m formats can be extracted using flashhd device_type but they produce unplayable file
- for device_type in device_types:
- validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx'
- query = {
- 'appCode': app_code,
- 'idMedia': video_id,
- 'connectionType': 'broadband',
- 'multibitrate': 'true',
- 'deviceType': device_type,
- }
- if smuggled_data:
- validation_url = 'https://services.radio-canada.ca/media/validation/v2/'
- query.update(smuggled_data)
- else:
- query.update({
- # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
- 'paysJ391wsHjbOJwvCs26toz': 'CA',
- 'bypasslock': 'NZt5K62gRqfc',
- })
- v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)
- v_url = xpath_text(v_data, 'url')
- if not v_url:
- continue
- if v_url == 'null':
- error = xpath_text(v_data, 'message')
- continue
- ext = determine_ext(v_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- v_url, video_id, f4m_id='hds', fatal=False))
- else:
- ext = determine_ext(v_url)
- bitrates = xpath_element(v_data, 'bitrates')
- for url_e in bitrates.findall('url'):
- tbr = int_or_none(url_e.get('bitrate'))
- if not tbr:
- continue
- f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url)
- protocol = determine_protocol({'url': f_url})
- f = {
- 'format_id': '%s-%d' % (protocol, tbr),
- 'url': f_url,
- 'ext': 'flv' if protocol == 'rtmp' else ext,
- 'protocol': protocol,
- 'width': int_or_none(url_e.get('width')),
- 'height': int_or_none(url_e.get('height')),
- 'tbr': tbr,
- }
- mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url)
- if mobj:
- f.update({
- 'url': mobj.group('url') + mobj.group('auth'),
- 'play_path': mobj.group('playpath'),
- })
- formats.append(f)
- if protocol == 'rtsp':
- base_url = self._search_regex(
- r'rtsp://([^?]+)', f_url, 'base url', default=None)
- if base_url:
- base_url = 'http://' + base_url
- formats.extend(self._extract_m3u8_formats(
- base_url + '/playlist.m3u8', video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- base_url + '/manifest.f4m', video_id,
- f4m_id='hds', fatal=False))
- if not formats and error:
+ query = {
+ 'connectionType': 'hd',
+ 'deviceType': 'ipad',
+ 'multibitrate': 'true',
+ }
+ if self._claims:
+ query['claims'] = self._claims
+ v_data = self._call_api('validation/v2/', video_id, app_code, query)
+ v_url = v_data.get('url')
+ if not v_url:
+ error = v_data['message']
+ if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
+ raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
+ if error == 'Le contenu sélectionné est disponible seulement en premium':
+ self.raise_login_required(error)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error), expected=True)
+ formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
self._sort_formats(formats)
subtitles = {}
@@ -174,11 +142,14 @@ class RadioCanadaIE(InfoExtractor):
'formats': formats,
}
+ def _real_extract(self, url):
+ return self._extract_info(*re.match(self._VALID_URL, url).groups())
+
class RadioCanadaAudioVideoIE(InfoExtractor):
- 'radiocanada:audiovideo'
- _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
- _TEST = {
+ IE_NAME = 'radiocanada:audiovideo'
+ _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
+ _TESTS = [{
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
'info_dict': {
'id': '7527184',
@@ -191,7 +162,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):
# m3u8 download
'skip_download': True,
},
- }
+ }, {
+ 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index 548a6553b..207a6c247 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -74,11 +74,11 @@ class RaiBaseIE(InfoExtractor):
if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'):
continue
- if ext == 'm3u8':
+ if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon':
formats.extend(self._extract_m3u8_formats(
media_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
- elif ext == 'f4m':
+ elif ext == 'f4m' or platform == 'flash':
manifest_url = update_url_query(
media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'),
{'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
@@ -288,7 +288,7 @@ class RaiPlayPlaylistIE(InfoExtractor):
class RaiIE(RaiBaseIE):
- _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/dl/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
+ _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE
_TESTS = [{
# var uniquename = "ContentItem-..."
# data-id="ContentItem-..."
@@ -375,6 +375,9 @@ class RaiIE(RaiBaseIE):
# Direct MMS URL
'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html',
+ 'only_matching': True,
}]
def _extract_from_content_id(self, content_id, url):
diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py
index 7e8d58f38..dbe1aaded 100644
--- a/youtube_dl/extractor/redbulltv.py
+++ b/youtube_dl/extractor/redbulltv.py
@@ -10,7 +10,7 @@ from ..utils import (
class RedBullTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?P<id>AP-\w+)'
_TESTS = [{
# film
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
@@ -38,6 +38,12 @@ class RedBullTVIE(InfoExtractor):
}, {
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -98,3 +104,25 @@ class RedBullTVIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class RedBullTVRrnContentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_url = self._og_search_url(webpage)
+
+ return self.url_result(
+ video_url, ie=RedBullTVIE.ie_key(),
+ video_id=RedBullTVIE._match_id(video_url))
diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py
index 7b0aa6232..663f622b3 100644
--- a/youtube_dl/extractor/reddit.py
+++ b/youtube_dl/extractor/reddit.py
@@ -7,6 +7,7 @@ from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
+ url_or_none,
)
@@ -119,7 +120,7 @@ class RedditRIE(InfoExtractor):
'_type': 'url_transparent',
'url': video_url,
'title': data.get('title'),
- 'thumbnail': data.get('thumbnail'),
+ 'thumbnail': url_or_none(data.get('thumbnail')),
'timestamp': float_or_none(data.get('created_utc')),
'uploader': data.get('author'),
'like_count': int_or_none(data.get('ups')),
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
index 10311a81a..2d2f6a98c 100644
--- a/youtube_dl/extractor/redtube.py
+++ b/youtube_dl/extractor/redtube.py
@@ -4,8 +4,10 @@ import re
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
+ merge_dicts,
str_to_int,
unified_strdate,
url_or_none,
@@ -42,14 +44,24 @@ class RedTubeIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.redtube.com/%s' % video_id, video_id)
- if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
- raise ExtractorError('Video %s has been removed' % video_id, expected=True)
+ ERRORS = (
+ (('video-deleted-info', '>This video has been removed'), 'has been removed'),
+ (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
+ )
- title = self._html_search_regex(
- (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
- r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
- webpage, 'title', group='title',
- default=None) or self._og_search_title(webpage)
+ for patterns, message in ERRORS:
+ if any(p in webpage for p in patterns):
+ raise ExtractorError(
+ 'Video %s %s' % (video_id, message), expected=True)
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ if not info.get('title'):
+ info['title'] = self._html_search_regex(
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
formats = []
sources = self._parse_json(
@@ -66,7 +78,7 @@ class RedTubeIE(InfoExtractor):
})
medias = self._parse_json(
self._search_regex(
- r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
+ r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
'media definitions', default='{}'),
video_id, fatal=False)
if medias and isinstance(medias, list):
@@ -74,6 +86,12 @@ class RedTubeIE(InfoExtractor):
format_url = url_or_none(media.get('videoUrl'))
if not format_url:
continue
+ if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ continue
format_id = media.get('quality')
formats.append({
'url': format_url,
@@ -88,28 +106,28 @@ class RedTubeIE(InfoExtractor):
thumbnail = self._og_search_thumbnail(webpage)
upload_date = unified_strdate(self._search_regex(
- r'<span[^>]+>ADDED ([^<]+)<',
- webpage, 'upload date', fatal=False))
+ r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
+ webpage, 'upload date', default=None))
duration = int_or_none(self._og_search_property(
'video:duration', webpage, default=None) or self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
(r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
- r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'),
- webpage, 'view count', fatal=False))
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
+ r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
+ webpage, 'view count', default=None))
# No self-labeling, but they describe themselves as
# "Home of Videos Porno"
age_limit = 18
- return {
+ return merge_dicts(info, {
'id': video_id,
'ext': 'mp4',
- 'title': title,
'thumbnail': thumbnail,
'upload_date': upload_date,
'duration': duration,
'view_count': view_count,
'age_limit': age_limit,
'formats': formats,
- }
+ })
diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py
deleted file mode 100644
index 833d8a2f0..000000000
--- a/youtube_dl/extractor/revision3.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_iso8601,
- unescapeHTML,
- qualities,
-)
-
-
-class Revision3EmbedIE(InfoExtractor):
- IE_NAME = 'revision3:embed'
- _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
- _TEST = {
- 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
- 'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
- 'info_dict': {
- 'id': '67558',
- 'ext': 'mp4',
- 'title': 'The Pros & Cons Of Zoos',
- 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
- 'uploader_id': 'dnews',
- 'uploader': 'DNews',
- }
- }
- _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('playlist_id')
- playlist_type = mobj.group('playlist_type') or 'video_id'
- video_data = self._download_json(
- 'http://revision3.com/api/getPlaylist.json', playlist_id, query={
- 'api_key': self._API_KEY,
- 'codecs': 'h264,vp8,theora',
- playlist_type: playlist_id,
- })['items'][0]
-
- formats = []
- for vcodec, media in video_data['media'].items():
- for quality_id, quality in media.items():
- if quality_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
- quality['url'], playlist_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': quality['url'],
- 'format_id': '%s-%s' % (vcodec, quality_id),
- 'tbr': int_or_none(quality.get('bitrate')),
- 'vcodec': vcodec,
- })
- self._sort_formats(formats)
-
- return {
- 'id': playlist_id,
- 'title': unescapeHTML(video_data['title']),
- 'description': unescapeHTML(video_data.get('summary')),
- 'uploader': video_data.get('show', {}).get('name'),
- 'uploader_id': video_data.get('show', {}).get('slug'),
- 'duration': int_or_none(video_data.get('duration')),
- 'formats': formats,
- }
-
-
-class Revision3IE(InfoExtractor):
- IE_NAME = 'revision'
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
- _TESTS = [{
- 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
- 'md5': 'd94a72d85d0a829766de4deb8daaf7df',
- 'info_dict': {
- 'id': '71089',
- 'display_id': 'technobuffalo/5-google-predictions-for-2016',
- 'ext': 'webm',
- 'title': '5 Google Predictions for 2016',
- 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.',
- 'upload_date': '20151228',
- 'timestamp': 1451325600,
- 'duration': 187,
- 'uploader': 'TechnoBuffalo',
- 'uploader_id': 'technobuffalo',
- }
- }, {
- # Show
- 'url': 'http://revision3.com/variant',
- 'only_matching': True,
- }, {
- # Tag
- 'url': 'http://revision3.com/vr',
- 'only_matching': True,
- }]
- _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
-
- def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
- site = domain.split('.')[0]
- page_info = self._download_json(
- self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
-
- page_data = page_info['data']
- page_type = page_data['type']
- if page_type in ('episode', 'embed'):
- show_data = page_data['show']['data']
- page_id = compat_str(page_data['id'])
- video_id = compat_str(page_data['video']['data']['id'])
-
- preference = qualities(['mini', 'small', 'medium', 'large'])
- thumbnails = [{
- 'url': image_url,
- 'id': image_id,
- 'preference': preference(image_id)
- } for image_id, image_url in page_data.get('images', {}).items()]
-
- info = {
- 'id': page_id,
- 'display_id': display_id,
- 'title': unescapeHTML(page_data['name']),
- 'description': unescapeHTML(page_data.get('summary')),
- 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
- 'author': page_data.get('author'),
- 'uploader': show_data.get('name'),
- 'uploader_id': show_data.get('slug'),
- 'thumbnails': thumbnails,
- 'extractor_key': site,
- }
-
- if page_type == 'embed':
- info.update({
- '_type': 'url_transparent',
- 'url': page_data['video']['data']['embed'],
- })
- return info
-
- info.update({
- '_type': 'url_transparent',
- 'url': 'revision3:%s' % video_id,
- })
- return info
- else:
- list_data = page_info[page_type]['data']
- episodes_data = page_info['episodes']['data']
- num_episodes = page_info['meta']['totalEpisodes']
- processed_episodes = 0
- entries = []
- page_num = 1
- while True:
- entries.extend([{
- '_type': 'url',
- 'url': 'http://%s%s' % (domain, episode['path']),
- 'id': compat_str(episode['id']),
- 'ie_key': 'Revision3',
- 'extractor_key': site,
- } for episode in episodes_data])
- processed_episodes += len(episodes_data)
- if processed_episodes == num_episodes:
- break
- page_num += 1
- episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % (
- domain, display_id + '/' + compat_str(page_num), domain),
- display_id)['episodes']['data']
-
- return self.playlist_result(
- entries, compat_str(list_data['id']),
- list_data.get('name'), list_data.get('summary'))
diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py
index e921ca3e6..c3623edcc 100644
--- a/youtube_dl/extractor/rmcdecouverte.py
+++ b/youtube_dl/extractor/rmcdecouverte.py
@@ -1,38 +1,46 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from .brightcove import BrightcoveLegacyIE
from ..compat import (
compat_parse_qs,
compat_urlparse,
)
+from ..utils import smuggle_url
class RMCDecouverteIE(InfoExtractor):
- _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
+ _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))'
- _TEST = {
- 'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET',
+ _TESTS = [{
+ 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/',
'info_dict': {
- 'id': '5419055995001',
+ 'id': '5983675500001',
'ext': 'mp4',
- 'title': 'UN DELICIEUX PROJET',
- 'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5',
+ 'title': 'CORVETTE',
+ 'description': 'md5:c1e8295521e45ffebf635d6a7658f506',
'uploader_id': '1969646226001',
- 'upload_date': '20170502',
- 'timestamp': 1493745308,
+ 'upload_date': '20181226',
+ 'timestamp': 1545861635,
},
'params': {
'skip_download': True,
},
'skip': 'only available for a week',
- }
+ }, {
+ # live, geo restricted, bypassable
+ 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/',
+ 'only_matching': True,
+ }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id') or mobj.group('live_id')
+ webpage = self._download_webpage(url, display_id)
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_legacy_url:
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
@@ -41,5 +49,7 @@ class RMCDecouverteIE(InfoExtractor):
brightcove_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
return self.url_result(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
- brightcove_id)
+ smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['FR']}),
+ 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py
index 857434540..8883639b2 100644
--- a/youtube_dl/extractor/roosterteeth.py
+++ b/youtube_dl/extractor/roosterteeth.py
@@ -1,35 +1,34 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
ExtractorError,
int_or_none,
- strip_or_none,
- unescapeHTML,
+ str_or_none,
urlencode_postdata,
)
class RoosterTeethIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)'
- _LOGIN_URL = 'https://roosterteeth.com/login'
+ _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'roosterteeth'
_TESTS = [{
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'md5': 'e2bd7764732d785ef797700a2489f212',
'info_dict': {
- 'id': '26576',
+ 'id': '9156',
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'ext': 'mp4',
- 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement',
- 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5',
+ 'title': 'Million Dollars, But... The Game Announcement',
+ 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
'thumbnail': r're:^https?://.*\.png$',
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
- 'comment_count': int,
},
}, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
@@ -47,102 +46,92 @@ class RoosterTeethIE(InfoExtractor):
# only available for FIRST members
'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
'only_matching': True,
+ }, {
+ 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'only_matching': True,
}]
+ _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
- login_page = self._download_webpage(
- self._LOGIN_URL, None,
- note='Downloading login page',
- errnote='Unable to download login page')
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'username': username,
- 'password': password,
- })
-
- login_request = self._download_webpage(
- self._LOGIN_URL, None,
- note='Logging in',
- data=urlencode_postdata(login_form),
- headers={
- 'Referer': self._LOGIN_URL,
- })
-
- if not any(re.search(p, login_request) for p in (
- r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
- r'>Sign Out<')):
- error = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
- login_request, 'alert', default=None, group='error')
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
+ try:
+ self._download_json(
+ 'https://auth.roosterteeth.com/oauth/token',
+ None, 'Logging in', data=urlencode_postdata({
+ 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ }))
+ except ExtractorError as e:
+ msg = 'Unable to login'
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
+ if resp:
+ error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
+ if error:
+ msg += ': ' + error
+ self.report_warning(msg)
def _real_initialize(self):
+ if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
+ return
self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- episode = strip_or_none(unescapeHTML(self._search_regex(
- (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'<title>(?P<title>[^<]+)</title>'), webpage, 'title',
- default=None, group='title')))
-
- title = strip_or_none(self._og_search_title(
- webpage, default=None)) or episode
-
- m3u8_url = self._search_regex(
- r'file\s*:\s*(["\'])(?P<url>http.+?\.m3u8.*?)\1',
- webpage, 'm3u8 url', default=None, group='url')
-
- if not m3u8_url:
- if re.search(r'<div[^>]+class=["\']non-sponsor', webpage):
- self.raise_login_required(
- '%s is only available for FIRST members' % display_id)
-
- if re.search(r'<div[^>]+class=["\']golive-gate', webpage):
- self.raise_login_required('%s is not available yet' % display_id)
-
- raise ExtractorError('Unable to extract m3u8 URL')
+ api_episode_url = self._EPISODE_BASE_URL + display_id
+
+ try:
+ m3u8_url = self._download_json(
+ api_episode_url + '/videos', display_id,
+ 'Downloading video JSON metadata')['data'][0]['attributes']['url']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
+ self.raise_login_required(
+ '%s is only available for FIRST members' % display_id)
+ raise
formats = self._extract_m3u8_formats(
- m3u8_url, display_id, ext='mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
+ m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
self._sort_formats(formats)
- description = strip_or_none(self._og_search_description(webpage))
- thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage))
-
- series = self._search_regex(
- (r'<h2>More ([^<]+)</h2>', r'<a[^>]+>See All ([^<]+) Videos<'),
- webpage, 'series', fatal=False)
-
- comment_count = int_or_none(self._search_regex(
- r'>Comments \((\d+)\)<', webpage,
- 'comment count', fatal=False))
-
- video_id = self._search_regex(
- (r'containerId\s*=\s*["\']episode-(\d+)\1',
- r'<div[^<]+id=["\']episode-(\d+)'), webpage,
- 'video id', default=display_id)
+ episode = self._download_json(
+ api_episode_url, display_id,
+ 'Downloading episode JSON metadata')['data'][0]
+ attributes = episode['attributes']
+ title = attributes.get('title') or attributes['display_title']
+ video_id = compat_str(episode['id'])
+
+ thumbnails = []
+ for image in episode.get('included', {}).get('images', []):
+ if image.get('type') == 'episode_image':
+ img_attributes = image.get('attributes') or {}
+ for k in ('thumb', 'small', 'medium', 'large'):
+ img_url = img_attributes.get(k)
+ if img_url:
+ thumbnails.append({
+ 'id': k,
+ 'url': img_url,
+ })
return {
'id': video_id,
'display_id': display_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'series': series,
- 'episode': episode,
- 'comment_count': comment_count,
+ 'description': attributes.get('description') or attributes.get('caption'),
+ 'thumbnails': thumbnails,
+ 'series': attributes.get('show_title'),
+ 'season_number': int_or_none(attributes.get('season_number')),
+ 'season_id': attributes.get('season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(attributes.get('number')),
+ 'episode_id': str_or_none(episode.get('uuid')),
'formats': formats,
+ 'channel_id': attributes.get('channel_id'),
+ 'duration': int_or_none(attributes.get('length')),
}
diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py
index a6fac6c35..1fbc72915 100644
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dl/extractor/rte.py
@@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
from ..utils import (
float_or_none,
parse_iso8601,
+ str_or_none,
+ try_get,
unescapeHTML,
+ url_or_none,
ExtractorError,
)
@@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
def _real_extract(self, url):
item_id = self._match_id(url)
- try:
- json_string = self._download_json(
- 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
- item_id)
- except ExtractorError as ee:
- if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
- error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
- if error_info:
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, error_info['message']),
- expected=True)
- raise
-
- # NB the string values in the JSON are stored using XML escaping(!)
- show = json_string['shows'][0]
- title = unescapeHTML(show['title'])
- description = unescapeHTML(show.get('description'))
- thumbnail = show.get('thumbnail')
- duration = float_or_none(show.get('duration'), 1000)
- timestamp = parse_iso8601(show.get('published'))
-
- mg = show['media:group'][0]
-
+ info_dict = {}
formats = []
- if mg.get('url'):
- m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
- if m:
- m = m.groupdict()
- formats.append({
- 'url': m['url'] + '/' + m['app'],
- 'app': m['app'],
- 'play_path': m['playpath'],
- 'player_url': url,
- 'ext': 'flv',
- 'format_id': 'rtmp',
- })
-
- if mg.get('hls_server') and mg.get('hls_url'):
- formats.extend(self._extract_m3u8_formats(
- mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
-
- if mg.get('hds_server') and mg.get('hds_url'):
- formats.extend(self._extract_f4m_formats(
- mg['hds_server'] + mg['hds_url'], item_id,
- f4m_id='hds', fatal=False))
+ ENDPOINTS = (
+ 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
+ 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
+ )
+
+ for num, ep_url in enumerate(ENDPOINTS, start=1):
+ try:
+ data = self._download_json(ep_url + item_id, item_id)
+ except ExtractorError as ee:
+ if num < len(ENDPOINTS) or formats:
+ continue
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
+ error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
+ if error_info:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error_info['message']),
+ expected=True)
+ raise
+
+ # NB the string values in the JSON are stored using XML escaping(!)
+ show = try_get(data, lambda x: x['shows'][0], dict)
+ if not show:
+ continue
+
+ if not info_dict:
+ title = unescapeHTML(show['title'])
+ description = unescapeHTML(show.get('description'))
+ thumbnail = show.get('thumbnail')
+ duration = float_or_none(show.get('duration'), 1000)
+ timestamp = parse_iso8601(show.get('published'))
+ info_dict = {
+ 'id': item_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ }
+
+ mg = try_get(show, lambda x: x['media:group'][0], dict)
+ if not mg:
+ continue
+
+ if mg.get('url'):
+ m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
+ if m:
+ m = m.groupdict()
+ formats.append({
+ 'url': m['url'] + '/' + m['app'],
+ 'app': m['app'],
+ 'play_path': m['playpath'],
+ 'player_url': url,
+ 'ext': 'flv',
+ 'format_id': 'rtmp',
+ })
+
+ if mg.get('hls_server') and mg.get('hls_url'):
+ formats.extend(self._extract_m3u8_formats(
+ mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+
+ if mg.get('hds_server') and mg.get('hds_url'):
+ formats.extend(self._extract_f4m_formats(
+ mg['hds_server'] + mg['hds_url'], item_id,
+ f4m_id='hds', fatal=False))
+
+ mg_rte_server = str_or_none(mg.get('rte:server'))
+ mg_url = str_or_none(mg.get('url'))
+ if mg_rte_server and mg_url:
+ hds_url = url_or_none(mg_rte_server + mg_url)
+ if hds_url:
+ formats.extend(self._extract_f4m_formats(
+ hds_url, item_id, f4m_id='hds', fatal=False))
self._sort_formats(formats)
- return {
- 'id': item_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- }
+ info_dict['formats'] = formats
+ return info_dict
class RteIE(RteBaseIE):
diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py
index 18a327d81..70f000ca8 100644
--- a/youtube_dl/extractor/rtl2.py
+++ b/youtube_dl/extractor/rtl2.py
@@ -21,7 +21,7 @@ from ..utils import (
class RTL2IE(InfoExtractor):
IE_NAME = 'rtl2'
- _VALID_URL = r'http?://(?:www\.)?rtl2\.de/[^?#]*?/(?P<id>[^?#/]*?)(?:$|/(?:$|[?#]))'
+ _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0',
'info_dict': {
@@ -34,10 +34,11 @@ class RTL2IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}, {
'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/',
'info_dict': {
- 'id': '21040-anna-erwischt-alex',
+ 'id': 'anna-erwischt-alex',
'ext': 'mp4',
'title': 'Anna erwischt Alex!',
'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.'
@@ -46,31 +47,29 @@ class RTL2IE(InfoExtractor):
# rtmp download
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'],
}]
def _real_extract(self, url):
- # Some rtl2 urls have no slash at the end, so append it.
- if not url.endswith('/'):
- url += '/'
-
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- mobj = re.search(
- r'<div[^>]+data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
- webpage)
- if mobj:
- vico_id = mobj.group('vico_id')
- vivi_id = mobj.group('vivi_id')
- else:
- vico_id = self._html_search_regex(
- r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
- vivi_id = self._html_search_regex(
- r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
+ vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups()
+ if not vico_id:
+ webpage = self._download_webpage(url, display_id)
+
+ mobj = re.search(
+ r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"',
+ webpage)
+ if mobj:
+ vico_id = mobj.group('vico_id')
+ vivi_id = mobj.group('vivi_id')
+ else:
+ vico_id = self._html_search_regex(
+ r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id')
+ vivi_id = self._html_search_regex(
+ r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id')
info = self._download_json(
- 'http://www.rtl2.de/sites/default/modules/rtl2/mediathek/php/get_video_jw.php',
- video_id, query={
+ 'https://service.rtl2.de/api-player-vipo/video.php',
+ display_id, query={
'vico_id': vico_id,
'vivi_id': vivi_id,
})
@@ -89,7 +88,7 @@ class RTL2IE(InfoExtractor):
'format_id': 'rtmp',
'url': rtmp_url,
'play_path': stream_url,
- 'player_url': 'http://www.rtl2.de/flashplayer/vipo_player.swf',
+ 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf',
'page_url': url,
'flash_version': 'LNX 11,2,202,429',
'rtmp_conn': rtmp_conn,
@@ -99,12 +98,12 @@ class RTL2IE(InfoExtractor):
m3u8_url = video_info.get('streamurl_hls')
if m3u8_url:
- formats.extend(self._extract_akamai_formats(m3u8_url, video_id))
+ formats.extend(self._extract_akamai_formats(m3u8_url, display_id))
self._sort_formats(formats)
return {
- 'id': video_id,
+ 'id': display_id,
'title': title,
'thumbnail': video_info.get('image'),
'description': video_info.get('beschreibung'),
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py
index be36acc46..fadca8c17 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -32,7 +32,7 @@ class RtlNlIE(InfoExtractor):
'duration': 1167.96,
},
}, {
- # best format avaialble a3t
+ # best format available a3t
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false',
'md5': 'dea7474214af1271d91ef332fb8be7ea',
'info_dict': {
@@ -45,7 +45,7 @@ class RtlNlIE(InfoExtractor):
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
}
}, {
- # empty synopsis and missing episodes (see https://github.com/rg3/youtube-dl/issues/6275)
+ # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
# best format available nettv
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
'info_dict': {
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py
index 533ee27cb..02986f442 100644
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dl/extractor/rtp.py
@@ -1,9 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+)
class RTPIE(InfoExtractor):
@@ -18,10 +20,6 @@ class RTPIE(InfoExtractor):
'description': 'As paixões musicais de António Cartaxo e António Macedo',
'thumbnail': r're:^https?://.*\.jpg',
},
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
}, {
'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas',
'only_matching': True,
@@ -33,57 +31,36 @@ class RTPIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
title = self._html_search_meta(
'twitter:title', webpage, display_name='title', fatal=True)
- description = self._html_search_meta('description', webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- player_config = self._search_regex(
- r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
- config = self._parse_json(player_config, video_id)
-
- path, ext = config.get('file').rsplit('.', 1)
- formats = [{
- 'format_id': 'rtmp',
- 'ext': ext,
- 'vcodec': config.get('type') == 'audio' and 'none' or None,
- 'preference': -2,
- 'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
- 'app': config.get('application'),
- 'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
- 'page_url': url,
- 'rtmp_live': config.get('live', False),
- 'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
- 'rtmp_real_time': True,
- }]
-
- # Construct regular HTTP download URLs
- replacements = {
- 'audio': {
- 'format_id': 'mp3',
- 'pattern': r'^nas2\.share/wavrss/',
- 'repl': 'http://rsspod.rtp.pt/podcasts/',
- 'vcodec': 'none',
- },
- 'video': {
- 'format_id': 'mp4_h264',
- 'pattern': r'^nas2\.share/h264/',
- 'repl': 'http://rsspod.rtp.pt/videocasts/',
- 'vcodec': 'h264',
- },
- }
- r = replacements[config['type']]
- if re.match(r['pattern'], config['file']) is not None:
- formats.append({
- 'format_id': r['format_id'],
- 'url': re.sub(r['pattern'], r['repl'], config['file']),
- 'vcodec': r['vcodec'],
- })
- self._sort_formats(formats)
+ config = self._parse_json(self._search_regex(
+ r'(?s)RTPPlayer\(({.+?})\);', webpage,
+ 'player config'), video_id, js_to_json)
+ file_url = config['file']
+ ext = determine_ext(file_url)
+ if ext == 'm3u8':
+ file_key = config.get('fileKey')
+ formats = self._extract_m3u8_formats(
+ file_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=file_key)
+ if file_key:
+ formats.append({
+ 'url': 'https://cdn-ondemand.rtp.pt' + file_key,
+ 'preference': 1,
+ })
+ self._sort_formats(formats)
+ else:
+ formats = [{
+ 'url': file_url,
+ 'ext': ext,
+ }]
+ if config.get('mediaType') == 'audio':
+ for f in formats:
+ f['vcodec'] = 'none'
return {
'id': video_id,
'title': title,
'formats': formats,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'description': self._html_search_meta(['description', 'twitter:description'], webpage),
+ 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage),
}
diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py
deleted file mode 100644
index f036f6757..000000000
--- a/youtube_dl/extractor/rudo.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- js_to_json,
- get_element_by_class,
- unified_strdate,
-)
-
-
-class RudoIE(InfoExtractor):
- _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
-
- _TEST = {
- 'url': 'http://rudo.video/vod/oTzw0MGnyG',
- 'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
- 'info_dict': {
- 'id': 'oTzw0MGnyG',
- 'ext': 'mp4',
- 'title': 'Comentario Tomás Mosciatti',
- 'upload_date': '20160617',
- },
- }
-
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
- webpage)
- if mobj:
- return mobj.group('url')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
-
- jwplayer_data = self._parse_json(self._search_regex(
- r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
- transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
-
- info_dict = self._parse_jwplayer_data(
- jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash')
-
- info_dict.update({
- 'title': self._og_search_title(webpage),
- 'upload_date': unified_strdate(get_element_by_class('date', webpage)),
- })
-
- return info_dict
diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py
deleted file mode 100644
index ebf9808d5..000000000
--- a/youtube_dl/extractor/ruleporn.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import unicode_literals
-
-from .nuevo import NuevoBaseIE
-
-
-class RulePornIE(NuevoBaseIE):
- _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/',
- 'md5': '86861ebc624a1097c7c10eaf06d7d505',
- 'info_dict': {
- 'id': '48212',
- 'display_id': 'brunette-nympho-chick-takes-her-boyfriend-in-every-angle',
- 'ext': 'mp4',
- 'title': 'Brunette Nympho Chick Takes Her Boyfriend In Every Angle',
- 'description': 'md5:6d28be231b981fff1981deaaa03a04d5',
- 'age_limit': 18,
- 'duration': 635.1,
- }
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._search_regex(
- r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id')
-
- title = self._search_regex(
- r'<h2[^>]+title=(["\'])(?P<url>.+?)\1',
- webpage, 'title', group='url')
- description = self._html_search_meta('description', webpage)
-
- info = self._extract_nuevo(
- 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id,
- video_id)
- info.update({
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'age_limit': 18
- })
- return info
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py
index 261bcbb83..8f54d5675 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dl/extractor/rutube.py
@@ -21,7 +21,17 @@ from ..utils import (
class RutubeBaseIE(InfoExtractor):
- def _extract_video(self, video, video_id=None, require_title=True):
+ def _download_api_info(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/video/%s/' % video_id,
+ video_id, 'Downloading video JSON',
+ 'Unable to download video JSON', query=query)
+
+ @staticmethod
+ def _extract_info(video, video_id=None, require_title=True):
title = video['title'] if require_title else video.get('title')
age_limit = video.get('is_adult')
@@ -32,7 +42,7 @@ class RutubeBaseIE(InfoExtractor):
category = try_get(video, lambda x: x['category']['name'])
return {
- 'id': video.get('id') or video_id,
+ 'id': video.get('id') or video_id if video_id else video['id'],
'title': title,
'description': video.get('description'),
'thumbnail': video.get('thumbnail_url'),
@@ -47,6 +57,42 @@ class RutubeBaseIE(InfoExtractor):
'is_live': bool_or_none(video.get('is_livestream')),
}
+ def _download_and_extract_info(self, video_id, query=None):
+ return self._extract_info(
+ self._download_api_info(video_id, query=query), video_id)
+
+ def _download_api_options(self, video_id, query=None):
+ if not query:
+ query = {}
+ query['format'] = 'json'
+ return self._download_json(
+ 'http://rutube.ru/api/play/options/%s/' % video_id,
+ video_id, 'Downloading options JSON',
+ 'Unable to download options JSON',
+ headers=self.geo_verification_headers(), query=query)
+
+ def _extract_formats(self, options, video_id):
+ formats = []
+ for format_id, format_url in options['video_balancer'].items():
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+ return formats
+
+ def _download_and_extract_formats(self, video_id, query=None):
+ return self._extract_formats(
+ self._download_api_options(video_id, query=query), video_id)
+
class RutubeIE(RutubeBaseIE):
IE_NAME = 'rutube'
@@ -55,13 +101,13 @@ class RutubeIE(RutubeBaseIE):
_TESTS = [{
'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
- 'md5': '79938ade01294ef7e27574890d0d3769',
+ 'md5': '1d24f180fac7a02f3900712e5a5764d6',
'info_dict': {
'id': '3eac3b4561676c17df9132a9a1e62e3e',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Раненный кенгуру забежал в аптеку',
'description': 'http://www.ntdtv.ru ',
- 'duration': 80,
+ 'duration': 81,
'uploader': 'NTDRussian',
'uploader_id': '29790',
'timestamp': 1381943602,
@@ -94,38 +140,12 @@ class RutubeIE(RutubeBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://rutube.ru/api/video/%s/?format=json' % video_id,
- video_id, 'Downloading video JSON')
-
- info = self._extract_video(video, video_id)
-
- options = self._download_json(
- 'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
- video_id, 'Downloading options JSON')
-
- formats = []
- for format_id, format_url in options['video_balancer'].items():
- ext = determine_ext(format_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- format_url, video_id, f4m_id=format_id, fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- info['formats'] = formats
+ info = self._download_and_extract_info(video_id)
+ info['formats'] = self._download_and_extract_formats(video_id)
return info
-class RutubeEmbedIE(InfoExtractor):
+class RutubeEmbedIE(RutubeBaseIE):
IE_NAME = 'rutube:embed'
IE_DESC = 'Rutube embedded videos'
_VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)'
@@ -134,7 +154,7 @@ class RutubeEmbedIE(InfoExtractor):
'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=',
'info_dict': {
'id': 'a10e53b86e8f349080f718582ce4c661',
- 'ext': 'flv',
+ 'ext': 'mp4',
'timestamp': 1387830582,
'upload_date': '20131223',
'uploader_id': '297833',
@@ -148,16 +168,26 @@ class RutubeEmbedIE(InfoExtractor):
}, {
'url': 'http://rutube.ru/play/embed/8083783',
'only_matching': True,
+ }, {
+ # private video
+ 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ',
+ 'only_matching': True,
}]
def _real_extract(self, url):
embed_id = self._match_id(url)
- webpage = self._download_webpage(url, embed_id)
-
- canonical_url = self._html_search_regex(
- r'<link\s+rel="canonical"\s+href="([^"]+?)"', webpage,
- 'Canonical URL')
- return self.url_result(canonical_url, RutubeIE.ie_key())
+ # Query may contain private videos token and should be passed to API
+ # requests (see #19163)
+ query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ options = self._download_api_options(embed_id, query)
+ video_id = options['effective_video']
+ formats = self._extract_formats(options, video_id)
+ info = self._download_and_extract_info(video_id, query)
+ info.update({
+ 'extractor_key': 'Rutube',
+ 'formats': formats,
+ })
+ return info
class RutubePlaylistBaseIE(RutubeBaseIE):
@@ -180,7 +210,7 @@ class RutubePlaylistBaseIE(RutubeBaseIE):
video_url = url_or_none(result.get('video_url'))
if not video_url:
continue
- entry = self._extract_video(result, require_title=False)
+ entry = self._extract_info(result, require_title=False)
entry.update({
'_type': 'url',
'url': video_url,
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py
index 9fa8688f8..f984040aa 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dl/extractor/ruutu.py
@@ -59,13 +59,28 @@ class RuutuIE(InfoExtractor):
'url': 'http://www.ruutu.fi/video/3193728',
'only_matching': True,
},
+ {
+ # audio podcast
+ 'url': 'https://www.supla.fi/supla/3382410',
+ 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908',
+ 'info_dict': {
+ 'id': '3382410',
+ 'ext': 'mp3',
+ 'title': 'Mikä ihmeen poltergeist?',
+ 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'age_limit': 0,
+ },
+ 'expected_warnings': ['HTTP Error 502: Bad Gateway'],
+ }
]
def _real_extract(self, url):
video_id = self._match_id(url)
video_xml = self._download_xml(
- 'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
+ 'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
+ query={'id': video_id})
formats = []
processed_urls = []
@@ -76,8 +91,8 @@ class RuutuIE(InfoExtractor):
extract_formats(child)
elif child.tag.endswith('File'):
video_url = child.text
- if (not video_url or video_url in processed_urls or
- any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
+ if (not video_url or video_url in processed_urls
+ or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))):
continue
processed_urls.append(video_url)
ext = determine_ext(video_url)
@@ -93,6 +108,12 @@ class RuutuIE(InfoExtractor):
continue
formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp3' or child.tag == 'AudioMediaFile':
+ formats.append({
+ 'format_id': 'audio',
+ 'url': video_url,
+ 'vcodec': 'none',
+ })
else:
proto = compat_urllib_parse_urlparse(video_url).scheme
if not child.tag.startswith('HTTP') and proto != 'rtmp':
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
index 30e2a38b4..2cc665122 100644
--- a/youtube_dl/extractor/safari.py
+++ b/youtube_dl/extractor/safari.py
@@ -1,24 +1,26 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
from ..utils import (
ExtractorError,
- sanitized_Request,
- std_headers,
- urlencode_postdata,
update_url_query,
)
class SafariBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://www.safaribooksonline.com/accounts/login/'
+ _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
_NETRC_MACHINE = 'safari'
- _API_BASE = 'https://www.safaribooksonline.com/api/v1'
+ _API_BASE = 'https://learning.oreilly.com/api/v1'
_API_FORMAT = 'json'
LOGGED_IN = False
@@ -31,44 +33,53 @@ class SafariBaseIE(InfoExtractor):
if username is None:
return
- headers = std_headers.copy()
- if 'Referer' not in headers:
- headers['Referer'] = self._LOGIN_URL
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login form', headers=headers)
+ _, urlh = self._download_webpage_handle(
+ 'https://learning.oreilly.com/accounts/login-check/', None,
+ 'Downloading login page')
- def is_logged(webpage):
- return any(re.search(p, webpage) for p in (
- r'href=["\']/accounts/logout/', r'>Sign Out<'))
+ def is_logged(urlh):
+ return 'learning.oreilly.com/home/' in urlh.geturl()
- if is_logged(login_page):
+ if is_logged(urlh):
self.LOGGED_IN = True
return
- csrf = self._html_search_regex(
- r"name='csrfmiddlewaretoken'\s+value='([^']+)'",
- login_page, 'csrf token')
+ redirect_url = urlh.geturl()
+ parsed_url = compat_urlparse.urlparse(redirect_url)
+ qs = compat_parse_qs(parsed_url.query)
+ next_uri = compat_urlparse.urljoin(
+ 'https://api.oreilly.com', qs['next'][0])
+
+ auth, urlh = self._download_json_handle(
+ 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
+ data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'redirect_uri': next_uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': redirect_url,
+ }, expected_status=400)
+
+ credentials = auth.get('credentials')
+ if (not auth.get('logged_in') and not auth.get('redirect_uri')
+ and credentials):
+ raise ExtractorError(
+ 'Unable to login: %s' % credentials, expected=True)
- login_form = {
- 'csrfmiddlewaretoken': csrf,
- 'email': username,
- 'password1': password,
- 'login': 'Sign In',
- 'next': '',
- }
+ # oreilly serves two same instances of the following cookies
+ # in Set-Cookie header and expects first one to be actually set
+ for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
+ self._apply_first_set_cookie_header(urlh, cookie)
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form), headers=headers)
- login_page = self._download_webpage(
- request, None, 'Logging in')
+ _, urlh = self._download_webpage_handle(
+ auth.get('redirect_uri') or next_uri, None, 'Completing login',)
- if not is_logged(login_page):
- raise ExtractorError(
- 'Login failed; make sure your credentials are correct and try again.',
- expected=True)
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
- self.LOGGED_IN = True
+ raise ExtractorError('Unable to log in')
class SafariIE(SafariBaseIE):
@@ -76,7 +87,7 @@ class SafariIE(SafariBaseIE):
IE_DESC = 'safaribooksonline.com online video'
_VALID_URL = r'''(?x)
https?://
- (?:www\.)?safaribooksonline\.com/
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?:
library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
@@ -104,6 +115,12 @@ class SafariIE(SafariBaseIE):
}, {
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
+ 'only_matching': True,
}]
_PARTNER_ID = '1926081'
@@ -147,7 +164,8 @@ class SafariIE(SafariBaseIE):
kaltura_session = self._download_json(
'%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
video_id, 'Downloading kaltura session JSON',
- 'Unable to download kaltura session JSON', fatal=False)
+ 'Unable to download kaltura session JSON', fatal=False,
+ headers={'Accept': 'application/json'})
if kaltura_session:
session = kaltura_session.get('session')
if session:
@@ -160,7 +178,7 @@ class SafariIE(SafariBaseIE):
class SafariApiIE(SafariBaseIE):
IE_NAME = 'safari:api'
- _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
+ _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
_TESTS = [{
'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
@@ -185,7 +203,7 @@ class SafariCourseIE(SafariBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?safaribooksonline\.com/
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
(?:
library/view/[^/]+|
api/v1/book|
@@ -213,6 +231,12 @@ class SafariCourseIE(SafariBaseIE):
}, {
'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'only_matching': True,
}]
@classmethod
diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py
index 30f9cf824..21e44b69a 100644
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dl/extractor/savefrom.py
@@ -30,8 +30,5 @@ class SaveFromIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = os.path.splitext(url.split('/')[-1])[0]
- return {
- '_type': 'url',
- 'id': video_id,
- 'url': mobj.group('url'),
- }
+
+ return self.url_result(mobj.group('url'), video_id=video_id)
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py
index 845712a76..0e623ff7b 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dl/extractor/sbs.py
@@ -55,8 +55,8 @@ class SBSIE(InfoExtractor):
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
urls = player_params['releaseUrls']
- theplatform_url = (urls.get('progressive') or urls.get('html') or
- urls.get('standard') or player_params['relatedItemsURL'])
+ theplatform_url = (urls.get('progressive') or urls.get('html')
+ or urls.get('standard') or player_params['relatedItemsURL'])
return {
'_type': 'url_transparent',
diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py
index 62a6a8337..69a0d01f3 100644
--- a/youtube_dl/extractor/screencast.py
+++ b/youtube_dl/extractor/screencast.py
@@ -91,6 +91,15 @@ class ScreencastIE(InfoExtractor):
'meta tag video URL', default=None)
if video_url is None:
+ video_url = self._html_search_regex(
+ r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage, 'video url', default=None, group='url')
+
+ if video_url is None:
+ video_url = self._html_search_meta(
+ 'og:video', webpage, default=None)
+
+ if video_url is None:
raise ExtractorError('Cannot find video')
title = self._og_search_title(webpage, default=None)
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
index 4023aeef8..b40b4c4af 100644
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ b/youtube_dl/extractor/scrippsnetworks.py
@@ -7,6 +7,7 @@ import re
from .aws import AWSIE
from .anvato import AnvatoIE
+from .common import InfoExtractor
from ..utils import (
smuggle_url,
urlencode_postdata,
@@ -19,7 +20,7 @@ class ScrippsNetworksWatchIE(AWSIE):
_VALID_URL = r'''(?x)
https?://
watch\.
- (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
+ (?P<site>geniuskitchen)\.com/
(?:
player\.[A-Z0-9]+\.html\#|
show/(?:[^/]+/){2}|
@@ -28,38 +29,23 @@ class ScrippsNetworksWatchIE(AWSIE):
(?P<id>\d+)
'''
_TESTS = [{
- 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
- 'md5': '26545fd676d939954c6808274bdb905a',
+ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
'info_dict': {
- 'id': '4173834',
+ 'id': '4194875',
'ext': 'mp4',
- 'title': 'Best Ever Treehouses',
- 'description': "We're searching for the most over the top treehouses.",
+ 'title': 'Ample Hills Ice Cream Bike',
+ 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
'uploader': 'ANV',
- 'upload_date': '20170922',
- 'timestamp': 1506056400,
+ 'upload_date': '20171011',
+ 'timestamp': 1507698000,
},
'params': {
'skip_download': True,
},
'add_ie': [AnvatoIE.ie_key()],
- }, {
- 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
- 'only_matching': True,
- }, {
- 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
- 'only_matching': True,
- }, {
- 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
- 'only_matching': True,
}]
_SNI_TABLE = {
- 'hgtv': 'hgtv',
- 'diynetwork': 'diy',
- 'foodnetwork': 'food',
- 'cookingchanneltv': 'cook',
- 'travelchannel': 'trav',
'geniuskitchen': 'genius',
}
@@ -117,3 +103,50 @@ class ScrippsNetworksWatchIE(AWSIE):
'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
{'geo_countries': ['US']}),
AnvatoIE.ie_key(), video_id=mcp_id)
+
+
+class ScrippsNetworksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
+ 'info_dict': {
+ 'id': '0260338',
+ 'ext': 'mp4',
+ 'title': 'The Best of the Best',
+ 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
+ 'timestamp': 1475678834,
+ 'upload_date': '20161005',
+ 'uploader': 'SCNI-SCND',
+ },
+ 'add_ie': ['ThePlatform'],
+ }, {
+ 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
+ 'only_matching': True,
+ }]
+ _ACCOUNT_MAP = {
+ 'cookingchanneltv': 2433005105,
+ 'discovery': 2706091867,
+ 'diynetwork': 2433004575,
+ 'foodnetwork': 2433005105,
+ 'hgtv': 2433004575,
+ 'travelchannel': 2433005739,
+ }
+ _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
+
+ def _real_extract(self, url):
+ site, guid = re.match(self._VALID_URL, url).groups()
+ return self.url_result(smuggle_url(
+ self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
+ {'force_smil_url': True}), 'ThePlatform', guid)
diff --git a/youtube_dl/extractor/scte.py b/youtube_dl/extractor/scte.py
new file mode 100644
index 000000000..ca1de63b6
--- /dev/null
+++ b/youtube_dl/extractor/scte.py
@@ -0,0 +1,144 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ decode_packed_codes,
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class SCTEBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
+ _NETRC_MACHINE = 'scte'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']welcome\b', r'>Sign Out<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._hidden_inputs(login_popup)
+
+ login_form.update({
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ if '|pageRedirect|' not in response and not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class SCTEIE(SCTEBaseIE):
+ _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
+ 'info_dict': {
+ 'title': 'Introduction to DOCSIS Engineering Professional',
+ 'id': '31484',
+ },
+ 'playlist_count': 5,
+ 'skip': 'Requires account credentials',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
+ content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
+ context = decode_packed_codes(self._download_webpage(
+ '%smobile/data.js' % content_base, video_id))
+
+ data = self._parse_xml(
+ self._search_regex(
+ r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
+ video_id)
+
+ entries = []
+ for asset in data.findall('.//asset'):
+ asset_url = asset.get('url')
+ if not asset_url or not asset_url.endswith('.mp4'):
+ continue
+ asset_id = self._search_regex(
+ r'video_([^_]+)_', asset_url, 'asset id', default=None)
+ if not asset_id:
+ continue
+ entries.append({
+ 'id': asset_id,
+ 'title': title,
+ 'url': content_base + asset_url,
+ })
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class SCTECourseIE(SCTEBaseIE):
+ _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3639',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3073',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_id)
+
+ title = self._search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', default=None)
+
+ entries = []
+ for mobj in re.finditer(
+ r'''(?x)
+ <a[^>]+
+ href=(["\'])
+ (?P<url>
+ https?://learning\.scte\.org/mod/
+ (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
+ \bid=\d+
+ )
+ ''',
+ webpage):
+ item_url = mobj.group('url')
+ if item_url == url:
+ continue
+ ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
+ else SCTECourseIE.ie_key())
+ entries.append(self.url_result(item_url, ie=ie))
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py
index 3b9c65e7e..7872dc80d 100644
--- a/youtube_dl/extractor/seeker.py
+++ b/youtube_dl/extractor/seeker.py
@@ -4,34 +4,37 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import (
+ get_element_by_class,
+ strip_or_none,
+)
class SeekerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
_TESTS = [{
- # player.loadRevision3Item
'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
- 'md5': '30c1dc4030cc715cf05b423d0947ac18',
+ 'md5': '897d44bbe0d8986a2ead96de565a92db',
'info_dict': {
- 'id': '76243',
- 'ext': 'webm',
+ 'id': 'Elrn3gnY',
+ 'ext': 'mp4',
'title': 'Should Trump Be Required To Release His Tax Returns?',
- 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
- 'uploader': 'Seeker Daily',
- 'uploader_id': 'seekerdaily',
+ 'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
+ 'timestamp': 1490090165,
+ 'upload_date': '20170321',
}
}, {
'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
'playlist': [
{
- 'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
+ 'md5': '0497b9f20495174be73ae136949707d2',
'info_dict': {
- 'id': '67558',
+ 'id': 'FihYQ8AE',
'ext': 'mp4',
'title': 'The Pros & Cons Of Zoos',
- 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
- 'uploader': 'DNews',
- 'uploader_id': 'dnews',
+ 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
+ 'timestamp': 1490039133,
+ 'upload_date': '20170320',
},
}
],
@@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor):
def _real_extract(self, url):
display_id, article_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
- mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
- if mobj:
- playlist_type, playlist_id = mobj.groups()
- return self.url_result(
- 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
- else:
- entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
- r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
- return self.playlist_result(
- entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
+ entries = []
+ for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
+ entries.append(self.url_result(
+ 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
+ return self.playlist_result(
+ entries, article_id,
+ self._og_search_title(webpage),
+ strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))
diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py
deleted file mode 100644
index c013d678f..000000000
--- a/youtube_dl/extractor/servingsys.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
-)
-
-
-class ServingSysIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
-
- _TEST = {
- 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
- 'info_dict': {
- 'id': '5349193',
- 'title': 'AdAPPter_Hyundai_demo',
- },
- 'playlist': [{
- 'md5': 'baed851342df6846eb8677a60a011a0f',
- 'info_dict': {
- 'id': '29955898',
- 'ext': 'flv',
- 'title': 'AdAPPter_Hyundai_demo (1)',
- 'duration': 74,
- 'tbr': 1378,
- 'width': 640,
- 'height': 400,
- },
- }, {
- 'md5': '979b4da2655c4bc2d81aeb915a8c5014',
- 'info_dict': {
- 'id': '29907998',
- 'ext': 'flv',
- 'title': 'AdAPPter_Hyundai_demo (2)',
- 'duration': 34,
- 'width': 854,
- 'height': 480,
- 'tbr': 516,
- },
- }],
- 'params': {
- 'playlistend': 2,
- },
- '_skip': 'Blocked in the US [sic]',
- }
-
- def _real_extract(self, url):
- pl_id = self._match_id(url)
- vast_doc = self._download_xml(url, pl_id)
-
- title = vast_doc.find('.//AdTitle').text
- media = vast_doc.find('.//MediaFile').text
- info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
-
- doc = self._download_xml(info_url, pl_id, 'Downloading video info')
- entries = [{
- '_type': 'video',
- 'id': a.attrib['id'],
- 'title': '%s (%s)' % (title, a.attrib['assetID']),
- 'url': a.attrib['URL'],
- 'duration': int_or_none(a.attrib.get('length')),
- 'tbr': int_or_none(a.attrib.get('bitrate')),
- 'height': int_or_none(a.attrib.get('height')),
- 'width': int_or_none(a.attrib.get('width')),
- } for a in doc.findall('.//AdditionalAssets/asset')]
-
- return {
- '_type': 'playlist',
- 'id': pl_id,
- 'title': title,
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py
index 264e1dd8b..9401bf2cf 100644
--- a/youtube_dl/extractor/servus.py
+++ b/youtube_dl/extractor/servus.py
@@ -1,31 +1,57 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
class ServusIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:at|de)/p/[^/]+/(?P<id>AA-\w+|\d+-\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
+ servustv\.com/videos
+ )
+ /(?P<id>[aA]{2}-\w+|\d+-\d+)
+ '''
_TESTS = [{
- 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
- 'md5': '046dee641cda1c4cabe13baef3be2c1c',
+ # new URL schema
+ 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
+ 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
'info_dict': {
'id': 'AA-1T6VBU5PW1W12',
'ext': 'mp4',
- 'title': 'Die Grünen aus Volkssicht',
- 'description': 'md5:052b5da1cb2cd7d562ef1f19be5a5cba',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': 'Die Grünen aus Sicht des Volkes',
+ 'description': 'md5:1247204d85783afe3682644398ff2ec4',
+ 'thumbnail': r're:^https?://.*\.jpg',
}
}, {
+ # old URL schema
+ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+ 'only_matching': True,
+ }, {
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id = self._match_id(url).upper()
webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
+ title = self._search_regex(
+ (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
+ webpage, 'title', default=None,
+ group='title') or self._og_search_title(webpage)
+ title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
description = self._og_search_description(webpage)
thumbnail = self._og_search_thumbnail(webpage)
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
index b2250afdd..02295d1a4 100644
--- a/youtube_dl/extractor/shared.py
+++ b/youtube_dl/extractor/shared.py
@@ -1,10 +1,19 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_b64decode
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote_plus,
+)
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
+ js_to_json,
+ KNOWN_EXTENSIONS,
+ parse_filesize,
+ rot47,
+ url_or_none,
urlencode_postdata,
)
@@ -21,10 +30,8 @@ class SharedBaseIE(InfoExtractor):
video_url = self._extract_video_url(webpage, video_id, url)
- title = compat_b64decode(self._html_search_meta(
- 'full:title', webpage, 'title')).decode('utf-8')
- filesize = int_or_none(self._html_search_meta(
- 'full:size', webpage, 'file size', fatal=False))
+ title = self._extract_title(webpage)
+ filesize = int_or_none(self._extract_filesize(webpage))
return {
'id': video_id,
@@ -34,6 +41,14 @@ class SharedBaseIE(InfoExtractor):
'title': title,
}
+ def _extract_title(self, webpage):
+ return compat_b64decode(self._html_search_meta(
+ 'full:title', webpage, 'title')).decode('utf-8')
+
+ def _extract_filesize(self, webpage):
+ return self._html_search_meta(
+ 'full:size', webpage, 'file size', fatal=False)
+
class SharedIE(SharedBaseIE):
IE_DESC = 'shared.sx'
@@ -81,14 +96,43 @@ class VivoIE(SharedBaseIE):
'id': 'd7ddda0e78',
'ext': 'mp4',
'title': 'Chicken',
- 'filesize': 528031,
+ 'filesize': 515659,
},
}
- def _extract_video_url(self, webpage, video_id, *args):
- return self._parse_json(
+ def _extract_title(self, webpage):
+ title = self._html_search_regex(
+ r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
+ 'title', default=None, group='title')
+ if title:
+ ext = determine_ext(title)
+ if ext.lower() in KNOWN_EXTENSIONS:
+ title = title.rpartition('.' + ext)[0]
+ return title
+ return self._og_search_title(webpage)
+
+ def _extract_filesize(self, webpage):
+ return parse_filesize(self._search_regex(
+ r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
+ webpage, 'filesize', fatal=False))
+
+ def _extract_video_url(self, webpage, video_id, url):
+ def decode_url_old(encoded_url):
+ return compat_b64decode(encoded_url).decode('utf-8')
+
+ stream_url = self._search_regex(
+ r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'stream url', default=None, group='url')
+ if stream_url:
+ stream_url = url_or_none(decode_url_old(stream_url))
+ if stream_url:
+ return stream_url
+
+ def decode_url(encoded_url):
+ return rot47(compat_urllib_parse_unquote_plus(encoded_url))
+
+ return decode_url(self._parse_json(
self._search_regex(
- r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'stream', group='url'),
- video_id,
- transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
+ r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage,
+ 'stream'),
+ video_id, transform_source=js_to_json)['source'])
diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py
index 207ab4477..7ec66ecf3 100644
--- a/youtube_dl/extractor/sixplay.py
+++ b/youtube_dl/extractor/sixplay.py
@@ -19,7 +19,7 @@ from ..utils import (
class SixPlayIE(InfoExtractor):
IE_NAME = '6play'
- _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
+ _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
@@ -35,6 +35,9 @@ class SixPlayIE(InfoExtractor):
}, {
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
'only_matching': True,
+ }, {
+ 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -43,6 +46,7 @@ class SixPlayIE(InfoExtractor):
'6play.fr': ('6play', 'm6web'),
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
+ 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'),
}.get(domain, ('6play', 'm6web'))
data = self._download_json(
@@ -61,10 +65,11 @@ class SixPlayIE(InfoExtractor):
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
subtitles = {}
- for asset in clip_data['assets']:
+ assets = clip_data.get('assets') or []
+ for asset in assets:
asset_url = asset.get('full_physical_path')
protocol = asset.get('protocol')
- if not asset_url or protocol == 'primetime' or asset_url in urls:
+ if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
continue
urls.append(asset_url)
container = asset.get('video_container')
@@ -81,19 +86,18 @@ class SixPlayIE(InfoExtractor):
if not urlh:
continue
asset_url = urlh.geturl()
- asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
- formats.extend(self._extract_m3u8_formats(
- asset_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- formats.extend(self._extract_f4m_formats(
- asset_url.replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- formats.extend(self._extract_mpd_formats(
- asset_url.replace('.m3u8', '.mpd'),
- video_id, mpd_id='dash', fatal=False))
- formats.extend(self._extract_ism_formats(
- re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
- video_id, ism_id='mss', fatal=False))
+ asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
+ for i in range(3, 0, -1):
+ asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
+ m3u8_formats = self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_formats)
+ formats.extend(self._extract_mpd_formats(
+ asset_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if m3u8_formats:
+ break
else:
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',
diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/sky.py
index efcbb36a9..ea30d6e62 100644
--- a/youtube_dl/extractor/skysports.py
+++ b/youtube_dl/extractor/sky.py
@@ -10,34 +10,25 @@ from ..utils import (
)
-class SkySportsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
- 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
- 'info_dict': {
- 'id': '10328419',
- 'ext': 'mp4',
- 'title': 'Bale: It\'s our time to shine',
- 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
- },
- 'add_ie': ['Ooyala'],
- }
-
+class SkyBaseIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_data = extract_attributes(self._search_regex(
- r'(<div.+?class="sdc-article-video__media-ooyala"[^>]+>)', webpage, 'video data'))
+ r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)',
+ webpage, 'video data'))
video_url = 'ooyala:%s' % video_data['data-video-id']
if video_data.get('data-token-required') == 'true':
- token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
+ token_fetch_options = self._parse_json(video_data.get(
+ 'data-token-fetch-options', '{}'), video_id, fatal=False) or {}
token_fetch_url = token_fetch_options.get('url')
if token_fetch_url:
- embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
+ embed_token = self._download_webpage(urljoin(
+ url, token_fetch_url), video_id, fatal=False)
if embed_token:
- video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
+ video_url = smuggle_url(
+ video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
@@ -47,3 +38,33 @@ class SkySportsIE(InfoExtractor):
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',
}
+
+
+class SkySportsIE(SkyBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine',
+ 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec',
+ 'info_dict': {
+ 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ',
+ 'ext': 'mp4',
+ 'title': 'Bale: It\'s our time to shine',
+ 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d',
+ },
+ 'add_ie': ['Ooyala'],
+ }
+
+
+class SkyNewsIE(SkyBaseIE):
+ _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962',
+ 'md5': 'd6327e581473cea9976a3236ded370cd',
+ 'info_dict': {
+ 'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM',
+ 'ext': 'mp4',
+ 'title': 'Russian plane inspected after deadly fire',
+ 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.',
+ },
+ 'add_ie': ['Ooyala'],
+ }
diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py
index 5b4aaac6f..b7f8ac736 100644
--- a/youtube_dl/extractor/skylinewebcams.py
+++ b/youtube_dl/extractor/skylinewebcams.py
@@ -26,7 +26,7 @@ class SkylineWebcamsIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
stream_url = self._search_regex(
- r'url\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
+ r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage,
'stream url', group='url')
title = self._og_search_title(webpage)
diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py
index ed84322c5..d9ea76831 100644
--- a/youtube_dl/extractor/slideslive.py
+++ b/youtube_dl/extractor/slideslive.py
@@ -2,7 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import smuggle_url
class SlidesLiveIE(InfoExtractor):
@@ -14,9 +14,9 @@ class SlidesLiveIE(InfoExtractor):
'info_dict': {
'id': 'LMtgR8ba0b0',
'ext': 'mp4',
- 'title': '38902413: external video',
- 'description': '3890241320170925-9-1yd6ech.mp4',
- 'uploader': 'SlidesLive Administrator',
+ 'title': 'GCC IA16 backend',
+ 'description': 'Watch full version of this video at https://slideslive.com/38902413.',
+ 'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'upload_date': '20170925',
}
@@ -24,16 +24,38 @@ class SlidesLiveIE(InfoExtractor):
# video_service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
'only_matching': True,
+ }, {
+ # video_service_name = url
+ 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
+ 'only_matching': True,
+ }, {
+ # video_service_name = vimeo
+ 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
- url, video_id, headers={'Accept': 'application/json'})
+ 'https://ben.slideslive.com/player/' + video_id, video_id)
service_name = video_data['video_service_name'].lower()
- if service_name == 'youtube':
- yt_video_id = video_data['video_service_id']
- return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
+ assert service_name in ('url', 'vimeo', 'youtube')
+ service_id = video_data['video_service_id']
+ info = {
+ 'id': video_id,
+ 'thumbnail': video_data.get('thumbnail'),
+ 'url': service_id,
+ }
+ if service_name == 'url':
+ info['title'] = video_data['title']
else:
- raise ExtractorError(
- 'Unsupported service name: {0}'.format(service_name), expected=True)
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': service_name.capitalize(),
+ 'title': video_data.get('title'),
+ })
+ if service_name == 'vimeo':
+ info['url'] = smuggle_url(
+ 'https://player.vimeo.com/video/' + service_id,
+ {'http_headers': {'Referer': url}})
+ return info
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 81c81c8d5..d37c52543 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -9,18 +9,52 @@ from .common import (
SearchInfoExtractor
)
from ..compat import (
+ compat_HTTPError,
+ compat_kwargs,
compat_str,
compat_urlparse,
- compat_urllib_parse_urlencode,
)
from ..utils import (
+ error_to_compat_str,
ExtractorError,
+ float_or_none,
+ HEADRequest,
int_or_none,
- unified_strdate,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unified_timestamp,
update_url_query,
+ url_or_none,
+ urlhandle_detect_ext,
)
+class SoundcloudEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
+ _TEST = {
+ # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
+ 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
+ 'only_matching': True,
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ query = compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ api_url = query['url'][0]
+ secret_token = query.get('secret_token')
+ if secret_token:
+ api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
+ return self.url_result(api_url)
+
+
class SoundcloudIE(InfoExtractor):
"""Information extractor for soundcloud.com
To access the media, the uid of the song and a stream token
@@ -34,12 +68,11 @@ class SoundcloudIE(InfoExtractor):
(?:(?:(?:www\.|m\.)?soundcloud\.com/
(?!stations/track)
(?P<uploader>[\w\d-]+)/
- (?!(?:tracks|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
(?P<title>[\w\d-]+)/?
(?P<token>[^?]+?)?(?:[?].*)?$)
- |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
+ |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
(?:/?\?secret_token=(?P<secret_token>[^&]+))?)
- |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
)
'''
IE_NAME = 'soundcloud'
@@ -50,15 +83,21 @@ class SoundcloudIE(InfoExtractor):
'info_dict': {
'id': '62986583',
'ext': 'mp3',
- 'upload_date': '20121011',
+ 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
'uploader': 'E.T. ExTerrestrial Music',
- 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
- 'duration': 143,
+ 'uploader_id': '1571244',
+ 'timestamp': 1349920598,
+ 'upload_date': '20121011',
+ 'duration': 143.216,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
}
},
- # not streamable song
+ # geo-restricted
{
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
'info_dict': {
@@ -67,13 +106,15 @@ class SoundcloudIE(InfoExtractor):
'title': 'Goldrushed',
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
'uploader': 'The Royal Concept',
+ 'uploader_id': '9615865',
+ 'timestamp': 1337635207,
'upload_date': '20120521',
- 'duration': 227,
+ 'duration': 227.155,
'license': 'all-rights-reserved',
- },
- 'params': {
- # rtmp
- 'skip_download': True,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# private link
@@ -84,11 +125,17 @@ class SoundcloudIE(InfoExtractor):
'id': '123998367',
'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭',
- 'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
'upload_date': '20131209',
- 'duration': 9,
+ 'duration': 9.927,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# private link (alt format)
@@ -99,11 +146,17 @@ class SoundcloudIE(InfoExtractor):
'id': '123998367',
'ext': 'mp3',
'title': 'Youtube - Dl Test Video \'\' Ä↭',
- 'uploader': 'jaimeMF',
'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
'upload_date': '20131209',
- 'duration': 9,
+ 'duration': 9.927,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# downloadable song
@@ -116,9 +169,15 @@ class SoundcloudIE(InfoExtractor):
'title': 'Bus Brakes',
'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
'uploader': 'oddsamples',
+ 'uploader_id': '73680509',
+ 'timestamp': 1389232924,
'upload_date': '20140109',
- 'duration': 17,
+ 'duration': 17.346,
'license': 'cc-by-sa',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# private link, downloadable format
@@ -131,9 +190,15 @@ class SoundcloudIE(InfoExtractor):
'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
'uploader': 'Ori Uplift Music',
+ 'uploader_id': '12563093',
+ 'timestamp': 1504206263,
'upload_date': '20170831',
- 'duration': 7449,
+ 'duration': 7449.096,
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
},
# no album art, use avatar pic for thumbnail
@@ -146,176 +211,315 @@ class SoundcloudIE(InfoExtractor):
'title': 'Sideways (Prod. Mad Real)',
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
'uploader': 'garyvee',
+ 'uploader_id': '2366352',
+ 'timestamp': 1488152409,
'upload_date': '20170226',
- 'duration': 207,
+ 'duration': 207.012,
'thumbnail': r're:https?://.*\.jpg',
'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
},
'params': {
'skip_download': True,
},
},
+ {
+ 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
+ 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'info_dict': {
+ 'id': '583011102',
+ 'ext': 'mp3',
+ 'title': 'Mezzo Valzer',
+ 'description': 'md5:4138d582f81866a530317bae316e8b61',
+ 'uploader': 'Micronie',
+ 'uploader_id': '3352531',
+ 'timestamp': 1551394171,
+ 'upload_date': '20190228',
+ 'duration': 180.157,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ # with AAC HQ format available via OAuth token
+ 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
+ 'only_matching': True,
+ },
]
- _CLIENT_ID = 'LvWovRaJZlWCHql0bISuum8Bd2KX79mb'
-
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
- webpage)]
+ _API_V2_BASE = 'https://api-v2.soundcloud.com/'
+ _BASE_URL = 'https://soundcloud.com/'
+ _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
+
+ _ARTWORK_MAP = {
+ 'mini': 16,
+ 'tiny': 20,
+ 'small': 32,
+ 'badge': 47,
+ 't67x67': 67,
+ 'large': 100,
+ 't300x300': 300,
+ 'crop': 400,
+ 't500x500': 500,
+ 'original': 0,
+ }
- def report_resolve(self, video_id):
- """Report information extraction."""
- self.to_screen('%s: Resolving id' % video_id)
+ def _store_client_id(self, client_id):
+ self._downloader.cache.store('soundcloud', 'client_id', client_id)
+
+ def _update_client_id(self):
+ webpage = self._download_webpage('https://soundcloud.com/', None)
+ for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
+ script = self._download_webpage(src, None, fatal=False)
+ if script:
+ client_id = self._search_regex(
+ r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
+ script, 'client id', default=None)
+ if client_id:
+ self._CLIENT_ID = client_id
+ self._store_client_id(client_id)
+ return
+ raise ExtractorError('Unable to extract client id')
+
+ def _download_json(self, *args, **kwargs):
+ non_fatal = kwargs.get('fatal') is False
+ if non_fatal:
+ del kwargs['fatal']
+ query = kwargs.get('query', {}).copy()
+ for _ in range(2):
+ query['client_id'] = self._CLIENT_ID
+ kwargs['query'] = query
+ try:
+ return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self._store_client_id(None)
+ self._update_client_id()
+ continue
+ elif non_fatal:
+ self._downloader.report_warning(error_to_compat_str(e))
+ return False
+ raise
+
+ def _real_initialize(self):
+ self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
@classmethod
def _resolv_url(cls, url):
- return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
+ return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
- def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
+ def _extract_info_dict(self, info, full_title=None, secret_token=None):
track_id = compat_str(info['id'])
- name = full_title or track_id
- if quiet:
- self.report_extraction(name)
- thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
- if isinstance(thumbnail, compat_str):
- thumbnail = thumbnail.replace('-large', '-t500x500')
- result = {
- 'id': track_id,
- 'uploader': info.get('user', {}).get('username'),
- 'upload_date': unified_strdate(info.get('created_at')),
- 'title': info['title'],
- 'description': info.get('description'),
- 'thumbnail': thumbnail,
- 'duration': int_or_none(info.get('duration'), 1000),
- 'webpage_url': info.get('permalink_url'),
- 'license': info.get('license'),
- }
+ title = info['title']
+
+ format_urls = set()
formats = []
query = {'client_id': self._CLIENT_ID}
- if secret_token is not None:
+ if secret_token:
query['secret_token'] = secret_token
- if info.get('downloadable', False):
- # We can build a direct link to the song
- format_url = update_url_query(
- 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
- formats.append({
- 'format_id': 'download',
- 'ext': info.get('original_format', 'mp3'),
- 'url': format_url,
- 'vcodec': 'none',
- 'preference': 10,
- })
- # We have to retrieve the url
- format_dict = self._download_json(
- 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
- track_id, 'Downloading track url', query=query)
-
- for key, stream_url in format_dict.items():
- ext, abr = 'mp3', None
- mobj = re.search(r'_([^_]+)_(\d+)_url', key)
+ if info.get('downloadable') and info.get('has_downloads_left'):
+ download_url = update_url_query(
+ self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
+ redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
+ if redirect_url:
+ urlh = self._request_webpage(
+ HEADRequest(redirect_url), track_id, fatal=False)
+ if urlh:
+ format_url = urlh.geturl()
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': 'download',
+ 'ext': urlhandle_detect_ext(urlh) or 'mp3',
+ 'filesize': int_or_none(urlh.headers.get('Content-Length')),
+ 'url': format_url,
+ 'preference': 10,
+ })
+
+ def invalid_url(url):
+ return not url or url in format_urls
+
+ def add_format(f, protocol, is_preview=False):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
if mobj:
- ext, abr = mobj.groups()
- abr = int(abr)
- if key.startswith('http'):
- stream_formats = [{
- 'format_id': key,
- 'ext': ext,
- 'url': stream_url,
- }]
- elif key.startswith('rtmp'):
- # The url doesn't have an rtmp app, we have to extract the playpath
- url, path = stream_url.split('mp3:', 1)
- stream_formats = [{
- 'format_id': key,
- 'url': url,
- 'play_path': 'mp3:' + path,
- 'ext': 'flv',
- }]
- elif key.startswith('hls'):
- stream_formats = self._extract_m3u8_formats(
- stream_url, track_id, ext, entry_protocol='m3u8_native',
- m3u8_id=key, fatal=False)
- else:
- continue
-
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ ext = f.get('ext')
+ if ext == 'aac':
+ f['abr'] = '256'
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
+ if preview:
+ format_id_list.append('preview')
+ abr = f.get('abr')
if abr:
- for f in stream_formats:
- f['abr'] = abr
-
- formats.extend(stream_formats)
-
- if not formats:
- # We fallback to the stream_url in the original info, this
- # cannot be always used, sometimes it can give an HTTP 404 error
- formats.append({
- 'format_id': 'fallback',
- 'url': update_url_query(info['stream_url'], query),
- 'ext': 'mp3',
+ f['abr'] = int(abr)
+ if protocol == 'hls':
+ protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
+ else:
+ protocol = 'http'
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': protocol,
+ 'preference': -10 if preview else None,
})
+ formats.append(f)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
+ continue
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = self._download_json(
+ format_url, track_id, query=query, fatal=False)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
+ 'url': stream_url,
+ 'ext': ext,
+ }, 'http' if protocol == 'progressive' else protocol,
+ t.get('snipped') or '/preview/' in format_url)
for f in formats:
f['vcodec'] = 'none'
- self._check_formats(formats, track_id)
+ if not formats and info.get('policy') == 'BLOCK':
+ self.raise_geo_restricted()
self._sort_formats(formats)
- result['formats'] = formats
- return result
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats
+ }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
+ mobj = re.match(self._VALID_URL, url)
track_id = mobj.group('track_id')
- if track_id is not None:
- info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
+ query = {}
+ if track_id:
+ info_json_url = self._API_V2_BASE + 'tracks/' + track_id
full_title = track_id
token = mobj.group('secret_token')
if token:
- info_json_url += '&secret_token=' + token
- elif mobj.group('player'):
- query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- real_url = query['url'][0]
- # If the token is in the query of the original url we have to
- # manually add it
- if 'secret_token' in query:
- real_url += '?secret_token=' + query['secret_token'][0]
- return self.url_result(real_url)
+ query['secret_token'] = token
else:
- # extract uploader (which is in the url)
- uploader = mobj.group('uploader')
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group('title')
+ full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
token = mobj.group('token')
- full_title = resolve_title = '%s/%s' % (uploader, slug_title)
if token:
resolve_title += '/%s' % token
+ info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
- self.report_resolve(full_title)
-
- url = 'https://soundcloud.com/%s' % resolve_title
- info_json_url = self._resolv_url(url)
- info = self._download_json(info_json_url, full_title, 'Downloading info JSON')
+ info = self._download_json(
+ info_json_url, full_title, 'Downloading info JSON', query=query)
- return self._extract_info_dict(info, full_title, secret_token=token)
+ return self._extract_info_dict(info, full_title, token)
class SoundcloudPlaylistBaseIE(SoundcloudIE):
- @staticmethod
- def _extract_id(e):
- return compat_str(e['id']) if e.get('id') else None
-
- def _extract_track_entries(self, tracks):
- return [
- self.url_result(
- track['permalink_url'], SoundcloudIE.ie_key(),
- video_id=self._extract_id(track))
- for track in tracks if track.get('permalink_url')]
+ def _extract_set(self, playlist, token=None):
+ playlist_id = compat_str(playlist['id'])
+ tracks = playlist.get('tracks') or []
+ if not all([t.get('permalink_url') for t in tracks]) and token:
+ tracks = self._download_json(
+ self._API_V2_BASE + 'tracks', playlist_id,
+ 'Downloading tracks', query={
+ 'ids': ','.join([compat_str(t['id']) for t in tracks]),
+ 'playlistId': playlist_id,
+ 'playlistSecretToken': token,
+ })
+ entries = []
+ for track in tracks:
+ track_id = str_or_none(track.get('id'))
+ url = track.get('permalink_url')
+ if not url:
+ if not track_id:
+ continue
+ url = self._API_V2_BASE + 'tracks/' + track_id
+ if token:
+ url += '?secret_token=' + token
+ entries.append(self.url_result(
+ url, SoundcloudIE.ie_key(), track_id))
+ return self.playlist_result(
+ entries, playlist_id,
+ playlist.get('title'),
+ playlist.get('description'))
class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
@@ -326,6 +530,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
'info_dict': {
'id': '2284613',
'title': 'The Royal Concept EP',
+ 'description': 'md5:71d07087c7a449e8941a70a29e34671e',
},
'playlist_mincount': 5,
}, {
@@ -336,84 +541,72 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- # extract uploader (which is in the url)
- uploader = mobj.group('uploader')
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group('slug_title')
- full_title = '%s/sets/%s' % (uploader, slug_title)
- url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
-
+ full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
token = mobj.group('token')
if token:
full_title += '/' + token
- url += '/' + token
-
- self.report_resolve(full_title)
- resolv_url = self._resolv_url(url)
- info = self._download_json(resolv_url, full_title)
+ info = self._download_json(self._resolv_url(
+ self._BASE_URL + full_title), full_title)
if 'errors' in info:
msgs = (compat_str(err['error_message']) for err in info['errors'])
raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
- entries = self._extract_track_entries(info['tracks'])
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'id': '%s' % info['id'],
- 'title': info['title'],
- }
+ return self._extract_set(info, token)
-class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
- _API_BASE = 'https://api.soundcloud.com'
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
+class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title):
COMMON_QUERY = {
- 'limit': 50,
- 'client_id': self._CLIENT_ID,
+ 'limit': 80000,
'linked_partitioning': '1',
}
query = COMMON_QUERY.copy()
query['offset'] = 0
- next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
+ next_href = base_url
entries = []
for i in itertools.count():
response = self._download_json(
- next_href, playlist_id, 'Downloading track page %s' % (i + 1))
+ next_href, playlist_id,
+ 'Downloading track page %s' % (i + 1), query=query)
collection = response['collection']
- if not collection:
- break
- def resolve_permalink_url(candidates):
+ if not isinstance(collection, list):
+ collection = []
+
+ # Empty collection may be returned, in this case we proceed
+ # straight to next_href
+
+ def resolve_entry(candidates):
for cand in candidates:
- if isinstance(cand, dict):
- permalink_url = cand.get('permalink_url')
- entry_id = self._extract_id(cand)
- if permalink_url and permalink_url.startswith('http'):
- return permalink_url, entry_id
+ if not isinstance(cand, dict):
+ continue
+ permalink_url = url_or_none(cand.get('permalink_url'))
+ if not permalink_url:
+ continue
+ return self.url_result(
+ permalink_url,
+ SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ str_or_none(cand.get('id')), cand.get('title'))
for e in collection:
- permalink_url, entry_id = resolve_permalink_url((e, e.get('track'), e.get('playlist')))
- if permalink_url:
- entries.append(self.url_result(permalink_url, video_id=entry_id))
+ entry = resolve_entry((e, e.get('track'), e.get('playlist')))
+ if entry:
+ entries.append(entry)
next_href = response.get('next_href')
if not next_href:
break
- parsed_next_href = compat_urlparse.urlparse(response['next_href'])
- qs = compat_urlparse.parse_qs(parsed_next_href.query)
- qs.update(COMMON_QUERY)
- next_href = compat_urlparse.urlunparse(
- parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ next_href = response['next_href']
+ parsed_next_href = compat_urlparse.urlparse(next_href)
+ query = compat_urlparse.parse_qs(parsed_next_href.query)
+ query.update(COMMON_QUERY)
return {
'_type': 'playlist',
@@ -429,46 +622,53 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
(?:(?:www|m)\.)?soundcloud\.com/
(?P<user>[^/]+)
(?:/
- (?P<rsrc>tracks|sets|reposts|likes|spotlight)
+ (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
)?
/?(?:[?#].*)?$
'''
IE_NAME = 'soundcloud:user'
_TESTS = [{
- 'url': 'https://soundcloud.com/the-akashic-chronicler',
+ 'url': 'https://soundcloud.com/soft-cell-official',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (All)',
+ },
+ 'playlist_mincount': 28,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/tracks',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (All)',
+ 'id': '207965082',
+ 'title': 'Soft Cell (Tracks)',
},
- 'playlist_mincount': 74,
+ 'playlist_mincount': 27,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/tracks',
+ 'url': 'https://soundcloud.com/soft-cell-official/albums',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Tracks)',
+ 'id': '207965082',
+ 'title': 'Soft Cell (Albums)',
},
- 'playlist_mincount': 37,
+ 'playlist_mincount': 1,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/sets',
+ 'url': 'https://soundcloud.com/jcv246/sets',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Playlists)',
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Sets)',
},
'playlist_mincount': 2,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/reposts',
+ 'url': 'https://soundcloud.com/jcv246/reposts',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Reposts)',
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Reposts)',
},
- 'playlist_mincount': 7,
+ 'playlist_mincount': 6,
}, {
- 'url': 'https://soundcloud.com/the-akashic-chronicler/likes',
+ 'url': 'https://soundcloud.com/clalberg/likes',
'info_dict': {
- 'id': '114582580',
- 'title': 'The Akashic Chronicler (Likes)',
+ 'id': '11817582',
+ 'title': 'clalberg (Likes)',
},
- 'playlist_mincount': 321,
+ 'playlist_mincount': 5,
}, {
'url': 'https://soundcloud.com/grynpyret/spotlight',
'info_dict': {
@@ -479,37 +679,29 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
}]
_BASE_URL_MAP = {
- 'all': '%s/profile/soundcloud:users:%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_BASE,
- 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'reposts': '%s/profile/soundcloud:users:%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- }
-
- _TITLE_MAP = {
- 'all': 'All',
- 'tracks': 'Tracks',
- 'sets': 'Playlists',
- 'reposts': 'Reposts',
- 'likes': 'Likes',
- 'spotlight': 'Spotlight',
+ 'all': 'stream/users/%s',
+ 'tracks': 'users/%s/tracks',
+ 'albums': 'users/%s/albums',
+ 'sets': 'users/%s/playlists',
+ 'reposts': 'stream/users/%s/reposts',
+ 'likes': 'users/%s/likes',
+ 'spotlight': 'users/%s/spotlight',
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
uploader = mobj.group('user')
- url = 'https://soundcloud.com/%s/' % uploader
- resolv_url = self._resolv_url(url)
user = self._download_json(
- resolv_url, uploader, 'Downloading user info')
+ self._resolv_url(self._BASE_URL + uploader),
+ uploader, 'Downloading user info')
resource = mobj.group('rsrc') or 'all'
return self._extract_playlist(
- self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
- '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
+ self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
+ str_or_none(user.get('id')),
+ '%s (%s)' % (user['username'], resource.capitalize()))
class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
@@ -519,7 +711,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
'info_dict': {
'id': '286017854',
- 'title': 'Track station: your-text',
+ 'title': 'Track station: your text',
},
'playlist_mincount': 47,
}]
@@ -527,19 +719,17 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url):
track_name = self._match_id(url)
- webpage = self._download_webpage(url, track_name)
-
+ track = self._download_json(self._resolv_url(url), track_name)
track_id = self._search_regex(
- r'soundcloud:track-stations:(\d+)', webpage, 'track id')
+ r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
return self._extract_playlist(
- '%s/stations/soundcloud:track-stations:%s/tracks'
- % (self._API_V2_BASE, track_id),
- track_id, 'Track station: %s' % track_name)
+ self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
+ track_id, 'Track station: %s' % track['title'])
class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
- _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+ _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
IE_NAME = 'soundcloud:playlist'
_TESTS = [{
'url': 'https://api.soundcloud.com/playlists/4110309',
@@ -554,29 +744,17 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
- base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
- data_dict = {
- 'client_id': self._CLIENT_ID,
- }
+ query = {}
token = mobj.group('token')
-
if token:
- data_dict['secret_token'] = token
+ query['secret_token'] = token
- data = compat_urllib_parse_urlencode(data_dict)
data = self._download_json(
- base_url + data, playlist_id, 'Downloading playlist')
+ self._API_V2_BASE + 'playlists/' + playlist_id,
+ playlist_id, 'Downloading playlist', query=query)
- entries = self._extract_track_entries(data['tracks'])
-
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': data.get('title'),
- 'description': data.get('description'),
- 'entries': entries,
- }
+ return self._extract_set(data, token)
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
@@ -594,18 +772,17 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
_SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
def _get_collection(self, endpoint, collection_id, **query):
limit = min(
query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
self._MAX_RESULTS_PER_PAGE)
- query['limit'] = limit
- query['client_id'] = self._CLIENT_ID
- query['linked_partitioning'] = '1'
- query['offset'] = 0
- data = compat_urllib_parse_urlencode(query)
- next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
+ query.update({
+ 'limit': limit,
+ 'linked_partitioning': 1,
+ 'offset': 0,
+ })
+ next_url = update_url_query(self._API_V2_BASE + endpoint, query)
collected_results = 0
@@ -632,5 +809,5 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
break
def _get_n_results(self, query, n):
- tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
+ tracks = self._get_collection('search/tracks', query, limit=n, q=query)
return self.playlist_result(tracks, playlist_title=query)
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 67500b69c..61ca902ce 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -4,15 +4,20 @@ import re
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
+ merge_dicts,
+ orderedSet,
parse_duration,
parse_resolution,
str_to_int,
+ url_or_none,
+ urlencode_postdata,
)
class SpankBangIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
_TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7',
@@ -23,6 +28,8 @@ class SpankBangIE(InfoExtractor):
'description': 'dillion harper masturbates on a bed',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'silly2587',
+ 'timestamp': 1422571989,
+ 'upload_date': '20150129',
'age_limit': 18,
}
}, {
@@ -41,51 +48,101 @@ class SpankBangIE(InfoExtractor):
# 4k
'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2y3td/embed/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id, headers={
- 'Cookie': 'country=US'
- })
+ webpage = self._download_webpage(
+ url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
+ video_id, headers={'Cookie': 'country=US'})
- if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
+ if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
raise ExtractorError(
'Video %s is not available' % video_id, expected=True)
formats = []
- for mobj in re.finditer(
- r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
- webpage):
- format_id, format_url = mobj.group('id', 'url')
+
+ def extract_format(format_id, format_url):
+ f_url = url_or_none(format_url)
+ if not f_url:
+ return
f = parse_resolution(format_id)
- f.update({
- 'url': format_url,
- 'format_id': format_id,
- })
- formats.append(f)
- self._sort_formats(formats)
+ ext = determine_ext(f_url)
+ if format_id.startswith('m3u8') or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id.startswith('mpd') or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ f_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp4' or f.get('width') or f.get('height'):
+ f.update({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+ formats.append(f)
+
+ STREAM_URL_PREFIX = 'stream_url_'
+
+ for mobj in re.finditer(
+ r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
+ % STREAM_URL_PREFIX, webpage):
+ extract_format(mobj.group('id', 'url'))
+
+ if not formats:
+ stream_key = self._search_regex(
+ r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'stream key', group='value')
+
+ stream = self._download_json(
+ 'https://spankbang.com/api/videos/stream', video_id,
+ 'Downloading stream JSON', data=urlencode_postdata({
+ 'id': stream_key,
+ 'data': 0,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ for format_id, format_url in stream.items():
+ if format_url and isinstance(format_url, list):
+ format_url = format_url[0]
+ extract_format(format_id, format_url)
+
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+
+ info = self._search_json_ld(webpage, video_id, default={})
title = self._html_search_regex(
- r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
+ r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
description = self._search_regex(
r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
- webpage, 'description', fatal=False)
- thumbnail = self._og_search_thumbnail(webpage)
- uploader = self._search_regex(
- r'class="user"[^>]*><img[^>]+>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ uploader = self._html_search_regex(
+ (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
+ r'class="user"[^>]*><img[^>]+>([^<]+)'),
webpage, 'uploader', default=None)
duration = parse_duration(self._search_regex(
r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
- webpage, 'duration', fatal=False))
+ webpage, 'duration', default=None))
view_count = str_to_int(self._search_regex(
- r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False))
+ r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
age_limit = self._rta_search(webpage)
- return {
+ return merge_dicts({
'id': video_id,
- 'title': title,
+ 'title': title or video_id,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
@@ -93,4 +150,35 @@ class SpankBangIE(InfoExtractor):
'view_count': view_count,
'formats': formats,
'age_limit': age_limit,
- }
+ }, info
+ )
+
+
+class SpankBangPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
+ _TEST = {
+ 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
+ 'info_dict': {
+ 'id': 'ug0k',
+ 'title': 'Big Ass Titties',
+ },
+ 'playlist_mincount': 50,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
+
+ entries = [self.url_result(
+ 'https://spankbang.com/%s/video' % video_id,
+ ie=SpankBangIE.ie_key(), video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
+
+ title = self._html_search_regex(
+ r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
+ fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
index 44d8fa52f..35ab9ec37 100644
--- a/youtube_dl/extractor/spankwire.py
+++ b/youtube_dl/extractor/spankwire.py
@@ -3,34 +3,47 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
-)
from ..utils import (
- sanitized_Request,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ str_or_none,
str_to_int,
- unified_strdate,
+ url_or_none,
)
-from ..aes import aes_decrypt_text
class SpankwireIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?spankwire\.com/
+ (?:
+ [^/]+/video|
+ EmbedPlayer\.aspx/?\?.*?\bArticleId=
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
# download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
- 'md5': '8bbfde12b101204b39e4b9fe7eb67095',
+ 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd',
'info_dict': {
'id': '103545',
'ext': 'mp4',
'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
'description': 'Crazy Bitch X rated music video.',
+ 'duration': 222,
'uploader': 'oreusz',
'uploader_id': '124697',
- 'upload_date': '20070507',
+ 'timestamp': 1178587885,
+ 'upload_date': '20070508',
+ 'average_rating': float,
+ 'view_count': int,
+ 'comment_count': int,
'age_limit': 18,
- }
+ 'categories': list,
+ 'tags': list,
+ },
}, {
# download URL pattern: */mp4_<format_id>_<video_id>.mp4
'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
@@ -45,83 +58,125 @@ class SpankwireIE(InfoExtractor):
'upload_date': '20150822',
'age_limit': 18,
},
+ 'params': {
+ 'proxy': '127.0.0.1:8118'
+ },
+ 'skip': 'removed',
+ }, {
+ 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true',
+ 'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
+ webpage)
+
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- req = sanitized_Request('http://www.' + mobj.group('url'))
- req.add_header('Cookie', 'age_verified=1')
- webpage = self._download_webpage(req, video_id)
-
- title = self._html_search_regex(
- r'<h1>([^<]+)', webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
- webpage, 'description', fatal=False)
- thumbnail = self._html_search_regex(
- r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
- webpage, 'thumbnail', fatal=False)
-
- uploader = self._html_search_regex(
- r'by:\s*<a [^>]*>(.+?)</a>',
- webpage, 'uploader', fatal=False)
- uploader_id = self._html_search_regex(
- r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
- webpage, 'uploader id', fatal=False)
- upload_date = unified_strdate(self._html_search_regex(
- r'</a> on (.+?) at \d+:\d+',
- webpage, 'upload date', fatal=False))
-
- view_count = str_to_int(self._html_search_regex(
- r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
- webpage, 'view count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
- webpage, 'comment count', fatal=False))
-
- videos = re.findall(
- r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
- heights = [int(video[0]) for video in videos]
- video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
- if webpage.find(r'flashvars\.encrypted = "true"') != -1:
- password = self._search_regex(
- r'flashvars\.video_title = "([^"]+)',
- webpage, 'password').replace('+', ' ')
- video_urls = list(map(
- lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'),
- video_urls))
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id)
+
+ title = video['title']
formats = []
- for height, video_url in zip(heights, video_urls):
- path = compat_urllib_parse_urlparse(video_url).path
- m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
- if m:
- tbr = int(m.group('tbr'))
- height = int(m.group('height'))
- else:
- tbr = None
- formats.append({
- 'url': video_url,
- 'format_id': '%dp' % height,
- 'height': height,
- 'tbr': tbr,
+ videos = video.get('videos')
+ if isinstance(videos, dict):
+ for format_id, format_url in videos.items():
+ video_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id, 'height', default=None))
+ m = re.search(
+ r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url)
+ if m:
+ tbr = int(m.group('tbr'))
+ height = height or int(m.group('height'))
+ else:
+ tbr = None
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%dp' % height if height else format_id,
+ 'height': height,
+ 'tbr': tbr,
+ })
+ m3u8_url = url_or_none(video.get('HLS'))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id'))
+
+ view_count = str_to_int(video.get('viewed'))
+
+ thumbnails = []
+ for preference, t in enumerate(('', '2x'), start=0):
+ thumbnail_url = url_or_none(video.get('poster%s' % t))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
})
- self._sort_formats(formats)
- age_limit = self._rta_search(webpage)
+ def extract_names(key):
+ entries_list = video.get(key)
+ if not isinstance(entries_list, list):
+ return
+ entries = []
+ for entry in entries_list:
+ name = str_or_none(entry.get('name'))
+ if name:
+ entries.append(name)
+ return entries
+
+ categories = extract_names('categories')
+ tags = extract_names('tags')
- return {
+ uploader = None
+ info = {}
+
+ webpage = self._download_webpage(
+ 'https://www.spankwire.com/_/video%s/' % video_id, video_id,
+ fatal=False)
+ if webpage:
+ info = self._search_json_ld(webpage, video_id, default={})
+ thumbnail_url = None
+ if 'thumbnail' in info:
+ thumbnail_url = url_or_none(info['thumbnail'])
+ del info['thumbnail']
+ if not thumbnail_url:
+ thumbnail_url = self._og_search_thumbnail(webpage)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': 10,
+ })
+ uploader = self._html_search_regex(
+ r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+ if not view_count:
+ view_count = str_to_int(self._search_regex(
+ r'data-views=["\']([\d,.]+)', webpage, 'view count',
+ fatal=False))
+
+ return merge_dicts({
'id': video_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'thumbnails': thumbnails,
'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
+ 'uploader_id': str_or_none(video.get('userId')),
+ 'timestamp': int_or_none(video.get('time_approved_on')),
+ 'average_rating': float_or_none(video.get('rating')),
'view_count': view_count,
- 'comment_count': comment_count,
+ 'comment_count': int_or_none(video.get('comments')),
+ 'age_limit': 18,
+ 'categories': categories,
+ 'tags': tags,
'formats': formats,
- 'age_limit': age_limit,
- }
+ }, info)
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py
index e76522b45..aabff7a3c 100644
--- a/youtube_dl/extractor/spike.py
+++ b/youtube_dl/extractor/spike.py
@@ -8,23 +8,21 @@ class BellatorIE(MTVServicesInfoExtractor):
_TESTS = [{
'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
'info_dict': {
- 'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4',
- 'ext': 'mp4',
- 'title': 'Douglas Lima vs. Paul Daley - Round 1',
- 'description': 'md5:805a8dd29310fd611d32baba2f767885',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
+ 'title': 'Michael Page vs. Evangelista Cyborg',
+ 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05',
},
+ 'playlist_count': 3,
}, {
'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
'only_matching': True,
}]
- _FEED_URL = 'http://www.spike.com/feeds/mrss/'
+ _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
+ def _extract_mgid(self, webpage):
+ return self._extract_triforce_mgid(webpage)
+
class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@@ -44,3 +42,14 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
+
+ def _extract_mgid(self, webpage):
+ root_data = self._parse_json(self._search_regex(
+ r'window\.__DATA__\s*=\s*({.+})',
+ webpage, 'data'), None)
+
+ def find_sub_data(data, data_type):
+ return next(c for c in data['children'] if c.get('type') == data_type)
+
+ c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
+ return c['props']['media']['video']['config']['uri']
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py
index 54497c880..b9017fd2a 100644
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dl/extractor/sportbox.py
@@ -8,20 +8,24 @@ from ..utils import (
determine_ext,
int_or_none,
js_to_json,
+ merge_dicts,
)
-class SportBoxEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
+class SportBoxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
_TESTS = [{
'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
'info_dict': {
- 'id': '211355',
+ 'id': '109158',
'ext': 'mp4',
- 'title': '211355',
+ 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
+ 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 292,
'view_count': int,
+ 'timestamp': 1426237001,
+ 'upload_date': '20150313',
},
'params': {
# m3u8 download
@@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor):
}, {
'url': 'https://news.sportbox.ru/vdl/player/media/193095',
'only_matching': True,
+ }, {
+ 'url': 'https://news.sportbox.ru/vdl/player/media/109158',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://matchtv.ru/vdl/player/media/109158',
+ 'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage):
return re.findall(
- r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
+ r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
webpage)
def _real_extract(self, url):
@@ -46,13 +56,14 @@ class SportBoxEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
- wjplayer_data = self._parse_json(
+ sources = self._parse_json(
self._search_regex(
- r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
+ r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
+ webpage, 'sources'),
video_id, transform_source=js_to_json)
formats = []
- for source in wjplayer_data['sources']:
+ for source in sources:
src = source.get('src')
if not src:
continue
@@ -66,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor):
})
self._sort_formats(formats)
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'player options', default='{}'),
+ video_id, transform_source=js_to_json)
+ media_id = player['mediaId']
+
+ info = self._search_json_ld(webpage, media_id, default={})
+
view_count = int_or_none(self._search_regex(
r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
- return {
- 'id': video_id,
- 'title': video_id,
- 'thumbnail': wjplayer_data.get('poster'),
- 'duration': int_or_none(wjplayer_data.get('duration')),
+ return merge_dicts(info, {
+ 'id': media_id,
+ 'title': self._og_search_title(webpage, default=None) or media_id,
+ 'thumbnail': player.get('poster'),
+ 'duration': int_or_none(player.get('duration')),
'view_count': view_count,
'formats': formats,
- }
+ })
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
index a3c35a899..378fc7568 100644
--- a/youtube_dl/extractor/sportdeutschland.py
+++ b/youtube_dl/extractor/sportdeutschland.py
@@ -13,36 +13,18 @@ from ..utils import (
class SportDeutschlandIE(InfoExtractor):
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
_TESTS = [{
- 'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+ 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
'info_dict': {
- 'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
+ 'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
'ext': 'mp4',
- 'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
- 'categories': ['Badminton'],
+ 'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
+ 'categories': ['Badminton-Deutschland'],
'view_count': int,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': int,
- 'upload_date': 're:^201408[23][0-9]$',
+ 'upload_date': '20200201',
+ 'description': 're:.*', # meaningless description for THIS video
},
- 'params': {
- 'skip_download': 'Live stream',
- },
- }, {
- 'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
- 'info_dict': {
- 'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
- 'ext': 'mp4',
- 'upload_date': '20140825',
- 'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
- 'timestamp': 1408976060,
- 'duration': 2732,
- 'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'view_count': int,
- 'categories': ['Li-Ning Badminton WM 2014'],
-
- }
}]
def _real_extract(self, url):
@@ -50,7 +32,7 @@ class SportDeutschlandIE(InfoExtractor):
video_id = mobj.group('id')
sport_id = mobj.group('sport')
- api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
+ api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
sport_id, video_id)
req = sanitized_Request(api_url, headers={
'Accept': 'application/vnd.vidibus.v2.html+json',
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index bb73eb1d5..170dce87f 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -106,7 +106,16 @@ class SRGSSRIE(InfoExtractor):
class SRGSSRPlayIE(InfoExtractor):
IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
- _VALID_URL = r'https?://(?:(?:www|play)\.)?(?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/[^/]+/(?P<type>video|audio)/[^?]+\?id=(?P<id>[0-9a-f\-]{36}|\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|play)\.)?
+ (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
+ (?:
+ [^/]+/(?P<type>video|audio)/[^?]+|
+ popup(?P<type_2>video|audio)player
+ )
+ \?id=(?P<id>[0-9a-f\-]{36}|\d+)
+ '''
_TESTS = [{
'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
@@ -163,9 +172,15 @@ class SRGSSRPlayIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
+ }, {
+ 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- bu, media_type, media_id = re.match(self._VALID_URL, url).groups()
+ mobj = re.match(self._VALID_URL, url)
+ bu = mobj.group('bu')
+ media_type = mobj.group('type') or mobj.group('type_2')
+ media_id = mobj.group('id')
# other info can be extracted from url + '&layout=json'
return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR')
diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py
index 28baf901c..359dadaa3 100644
--- a/youtube_dl/extractor/srmediathek.py
+++ b/youtube_dl/extractor/srmediathek.py
@@ -1,14 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
-from .ard import ARDMediathekIE
+from .ard import ARDMediathekBaseIE
from ..utils import (
ExtractorError,
get_element_by_attribute,
)
-class SRMediathekIE(ARDMediathekIE):
+class SRMediathekIE(ARDMediathekBaseIE):
IE_NAME = 'sr:mediathek'
IE_DESC = 'Saarländischer Rundfunk'
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py
deleted file mode 100644
index fcaa5ac0b..000000000
--- a/youtube_dl/extractor/streamango.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_chr
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- js_to_json,
-)
-
-
-class StreamangoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?streamango\.com/(?:f|embed)/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
- 'md5': 'e992787515a182f55e38fc97588d802a',
- 'info_dict': {
- 'id': 'clapasobsptpkdfe',
- 'ext': 'mp4',
- 'title': '20170315_150006.mp4',
- }
- }, {
- # no og:title
- 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
- 'info_dict': {
- 'id': 'foqebrpftarclpob',
- 'ext': 'mp4',
- 'title': 'foqebrpftarclpob',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'gone',
- }, {
- 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- def decrypt_src(encoded, val):
- ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
- encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
- decoded = ''
- sm = [None] * 4
- i = 0
- str_len = len(encoded)
- while i < str_len:
- for j in range(4):
- sm[j % 4] = ALPHABET.index(encoded[i])
- i += 1
- char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
- decoded += compat_chr(char_code)
- if sm[2] != 0x40:
- char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
- decoded += compat_chr(char_code)
- if sm[3] != 0x40:
- char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
- decoded += compat_chr(char_code)
- return decoded
-
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._og_search_title(webpage, default=video_id)
-
- formats = []
- for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
- mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
- if mobj is None:
- continue
-
- format_ = format_.replace(mobj.group(0), '')
-
- video = self._parse_json(
- format_, video_id, transform_source=js_to_json,
- fatal=False) or {}
-
- mobj = re.search(
- r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
- mobj.group(1))
- if mobj is None:
- continue
-
- src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
- if not src:
- continue
-
- ext = determine_ext(src, default_ext=None)
- if video.get('type') == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'url': src,
- 'ext': ext or 'mp4',
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- 'tbr': int_or_none(video.get('bitrate')),
- })
-
- if not formats:
- error = self._search_regex(
- r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
- 'error', default=None)
- if not error and '>Sorry' in webpage:
- error = 'Video %s is not available' % video_id
- if error:
- raise ExtractorError(error, expected=True)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'url': url,
- 'title': title,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py
index 4a410611d..b97bb4374 100644
--- a/youtube_dl/extractor/streamcloud.py
+++ b/youtube_dl/extractor/streamcloud.py
@@ -45,7 +45,7 @@ class StreamcloudIE(InfoExtractor):
value="([^"]*)"
''', orig_webpage)
- self._sleep(12, video_id)
+ self._sleep(6, video_id)
webpage = self._download_webpage(
url, video_id, data=urlencode_postdata(fields), headers={
diff --git a/youtube_dl/extractor/stretchinternet.py b/youtube_dl/extractor/stretchinternet.py
index ae2ac1b42..4dbead2ba 100644
--- a/youtube_dl/extractor/stretchinternet.py
+++ b/youtube_dl/extractor/stretchinternet.py
@@ -5,44 +5,28 @@ from ..utils import int_or_none
class StretchInternetIE(InfoExtractor):
- _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/portal\.htm\?.*?\beventId=(?P<id>\d+)'
+ _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)'
_TEST = {
- 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=313900&streamType=video',
+ 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video',
'info_dict': {
- 'id': '313900',
+ 'id': '573272',
'ext': 'mp4',
- 'title': 'Augustana (S.D.) Baseball vs University of Mary',
- 'description': 'md5:7578478614aae3bdd4a90f578f787438',
- 'timestamp': 1490468400,
- 'upload_date': '20170325',
+ 'title': 'University of Mary Wrestling vs. Upper Iowa',
+ 'timestamp': 1575668361,
+ 'upload_date': '20191206',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
- stream = self._download_json(
- 'https://neo-client.stretchinternet.com/streamservice/v1/media/stream/v%s'
- % video_id, video_id)
-
- video_url = 'https://%s' % stream['source']
-
event = self._download_json(
- 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
- video_id, query={
- 'clientID': 99997,
- 'eventID': video_id,
- 'token': 'asdf',
- })['event']
-
- title = event.get('title') or event['mobileTitle']
- description = event.get('customText')
- timestamp = int_or_none(event.get('longtime'))
+ 'https://api.stretchinternet.com/trinity/event/tcg/' + video_id,
+ video_id)[0]
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'url': video_url,
+ 'title': event['title'],
+ 'timestamp': int_or_none(event.get('dateCreated'), 1000),
+ 'url': 'https://' + event['media'][0]['url'],
}
diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py
new file mode 100644
index 000000000..bae8b71f4
--- /dev/null
+++ b/youtube_dl/extractor/stv.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+ float_or_none,
+ int_or_none,
+)
+
+
+class STVPlayerIE(InfoExtractor):
+ IE_NAME = 'stv:player'
+ _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
+ _TEST = {
+ 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
+ 'md5': '5adf9439c31d554f8be0707c7abe7e0a',
+ 'info_dict': {
+ 'id': '5333973339001',
+ 'ext': 'mp4',
+ 'upload_date': '20170301',
+ 'title': '60 seconds on set with Laura Norton',
+ 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!",
+ 'timestamp': 1488388054,
+ 'uploader_id': '1486976045',
+ },
+ 'skip': 'this resource is unavailable outside of the UK',
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
+ _PTYPE_MAP = {
+ 'episode': 'episodes',
+ 'video': 'shortform',
+ }
+
+ def _real_extract(self, url):
+ ptype, video_id = re.match(self._VALID_URL, url).groups()
+ resp = self._download_json(
+ 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
+ video_id)
+
+ result = resp['results']
+ video = result['video']
+ video_id = compat_str(video['id'])
+
+ subtitles = {}
+ _subtitles = result.get('_subtitles') or {}
+ for ext, sub_url in _subtitles.items():
+ subtitles.setdefault('en', []).append({
+ 'ext': 'vtt' if ext == 'webvtt' else ext,
+ 'url': sub_url,
+ })
+
+ programme = result.get('programme') or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id,
+ 'description': result.get('summary'),
+ 'duration': float_or_none(video.get('length'), 1000),
+ 'subtitles': subtitles,
+ 'view_count': int_or_none(result.get('views')),
+ 'series': programme.get('name') or programme.get('shortName'),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/youtube_dl/extractor/sverigesradio.py b/youtube_dl/extractor/sverigesradio.py
new file mode 100644
index 000000000..aa0691f0d
--- /dev/null
+++ b/youtube_dl/extractor/sverigesradio.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ str_or_none,
+)
+
+
+class SverigesRadioBaseIE(InfoExtractor):
+ _BASE_URL = 'https://sverigesradio.se/sida/playerajax/'
+ _QUALITIES = ['low', 'medium', 'high']
+ _EXT_TO_CODEC_MAP = {
+ 'mp3': 'mp3',
+ 'm4a': 'aac',
+ }
+ _CODING_FORMAT_TO_ABR_MAP = {
+ 5: 128,
+ 11: 192,
+ 12: 32,
+ 13: 96,
+ }
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+ query = {
+ 'id': audio_id,
+ 'type': self._AUDIO_TYPE,
+ }
+
+ item = self._download_json(
+ self._BASE_URL + 'audiometadata', audio_id,
+ 'Downloading audio JSON metadata', query=query)['items'][0]
+ title = item['subtitle']
+
+ query['format'] = 'iis'
+ urls = []
+ formats = []
+ for quality in self._QUALITIES:
+ query['quality'] = quality
+ audio_url_data = self._download_json(
+ self._BASE_URL + 'getaudiourl', audio_id,
+ 'Downloading %s format JSON metadata' % quality,
+ fatal=False, query=query) or {}
+ audio_url = audio_url_data.get('audioUrl')
+ if not audio_url or audio_url in urls:
+ continue
+ urls.append(audio_url)
+ ext = determine_ext(audio_url)
+ coding_format = audio_url_data.get('codingFormat')
+ abr = int_or_none(self._search_regex(
+ r'_a(\d+)\.m4a', audio_url, 'audio bitrate',
+ default=None)) or self._CODING_FORMAT_TO_ABR_MAP.get(coding_format)
+ formats.append({
+ 'abr': abr,
+ 'acodec': self._EXT_TO_CODEC_MAP.get(ext),
+ 'ext': ext,
+ 'format_id': str_or_none(coding_format),
+ 'vcodec': 'none',
+ 'url': audio_url,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': audio_id,
+ 'title': title,
+ 'formats': formats,
+ 'series': item.get('title'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('displayimageurl'),
+ 'description': item.get('description'),
+ }
+
+
+class SverigesRadioPublicationIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:publication'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/sida/(?:artikel|gruppsida)\.aspx\?.*?\bartikel=(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://sverigesradio.se/sida/artikel.aspx?programid=83&artikel=7038546',
+ 'md5': '6a4917e1923fccb080e5a206a5afa542',
+ 'info_dict': {
+ 'id': '7038546',
+ 'ext': 'm4a',
+ 'duration': 132,
+ 'series': 'Nyheter (Ekot)',
+ 'title': 'Esa Teittinen: Sanningen har inte kommit fram',
+ 'description': 'md5:daf7ce66a8f0a53d5465a5984d3839df',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ },
+ }, {
+ 'url': 'https://sverigesradio.se/sida/gruppsida.aspx?programid=3304&grupp=6247&artikel=7146887',
+ 'only_matching': True,
+ }]
+ _AUDIO_TYPE = 'publication'
+
+
+class SverigesRadioEpisodeIE(SverigesRadioBaseIE):
+ IE_NAME = 'sverigesradio:episode'
+ _VALID_URL = r'https?://(?:www\.)?sverigesradio\.se/(?:sida/)?avsnitt/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://sverigesradio.se/avsnitt/1140922?programid=1300',
+ 'md5': '20dc4d8db24228f846be390b0c59a07c',
+ 'info_dict': {
+ 'id': '1140922',
+ 'ext': 'mp3',
+ 'duration': 3307,
+ 'series': 'Konflikt',
+ 'title': 'Metoo och valen',
+ 'description': 'md5:fcb5c1f667f00badcc702b196f10a27e',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ }
+ }
+ _AUDIO_TYPE = 'episode'
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index 0901c3163..e12389cad 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -4,19 +4,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
+from ..compat import compat_str
from ..utils import (
determine_ext,
dict_get,
int_or_none,
- orderedSet,
+ str_or_none,
strip_or_none,
try_get,
- urljoin,
- compat_str,
)
@@ -237,23 +232,23 @@ class SVTPlayIE(SVTPlayBaseIE):
class SVTSeriesIE(SVTPlayBaseIE):
- _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
+ _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
_TESTS = [{
'url': 'https://www.svtplay.se/rederiet',
'info_dict': {
- 'id': 'rederiet',
+ 'id': '14445680',
'title': 'Rederiet',
- 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
},
'playlist_mincount': 318,
}, {
- 'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
+ 'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
'info_dict': {
- 'id': 'rederiet-sasong2',
+ 'id': 'season-2-14445680',
'title': 'Rederiet - Säsong 2',
- 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
},
- 'playlist_count': 12,
+ 'playlist_mincount': 12,
}]
@classmethod
@@ -261,83 +256,87 @@ class SVTSeriesIE(SVTPlayBaseIE):
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
def _real_extract(self, url):
- series_id = self._match_id(url)
-
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
- season_slug = qs.get('tab', [None])[0]
-
- if season_slug:
- series_id += '-%s' % season_slug
-
- webpage = self._download_webpage(
- url, series_id, 'Downloading series page')
-
- root = self._parse_json(
- self._search_regex(
- self._SVTPLAY_RE, webpage, 'content', group='json'),
- series_id)
+ series_slug, season_id = re.match(self._VALID_URL, url).groups()
+
+ series = self._download_json(
+ 'https://api.svt.se/contento/graphql', series_slug,
+ 'Downloading series page', query={
+ 'query': '''{
+ listablesBySlug(slugs: ["%s"]) {
+ associatedContent(include: [productionPeriod, season]) {
+ items {
+ item {
+ ... on Episode {
+ videoSvtId
+ }
+ }
+ }
+ id
+ name
+ }
+ id
+ longDescription
+ name
+ shortDescription
+ }
+}''' % series_slug,
+ })['data']['listablesBySlug'][0]
season_name = None
entries = []
- for season in root['relatedVideoContent']['relatedVideosAccordion']:
+ for season in series['associatedContent']:
if not isinstance(season, dict):
continue
- if season_slug:
- if season.get('slug') != season_slug:
+ if season_id:
+ if season.get('id') != season_id:
continue
season_name = season.get('name')
- videos = season.get('videos')
- if not isinstance(videos, list):
+ items = season.get('items')
+ if not isinstance(items, list):
continue
- for video in videos:
- content_url = video.get('contentUrl')
- if not content_url or not isinstance(content_url, compat_str):
+ for item in items:
+ video = item.get('item') or {}
+ content_id = video.get('videoSvtId')
+ if not content_id or not isinstance(content_id, compat_str):
continue
- entries.append(
- self.url_result(
- urljoin(url, content_url),
- ie=SVTPlayIE.ie_key(),
- video_title=video.get('title')
- ))
-
- metadata = root.get('metaData')
- if not isinstance(metadata, dict):
- metadata = {}
+ entries.append(self.url_result(
+ 'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
- title = metadata.get('title')
- season_name = season_name or season_slug
+ title = series.get('name')
+ season_name = season_name or season_id
if title and season_name:
title = '%s - %s' % (title, season_name)
- elif season_slug:
- title = season_slug
+ elif season_id:
+ title = season_id
return self.playlist_result(
- entries, series_id, title, metadata.get('description'))
+ entries, season_id or series.get('id'), title,
+ dict_get(series, ('longDescription', 'shortDescription')))
class SVTPageIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
_TESTS = [{
- 'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
+ 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
'info_dict': {
- 'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
- 'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
+ 'id': '25298267',
+ 'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
},
- 'playlist_count': 7,
+ 'playlist_count': 4,
}, {
- 'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
+ 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
'info_dict': {
- 'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
- 'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
+ 'id': '24243746',
+ 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
},
- 'playlist_count': 1,
+ 'playlist_count': 2,
}, {
# only programTitle
'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
'info_dict': {
- 'id': '2900353',
+ 'id': '8439V2K',
'ext': 'mp4',
'title': 'Stjärnorna skojar till det - under SVT-intervjun',
'duration': 27,
@@ -356,16 +355,26 @@ class SVTPageIE(InfoExtractor):
return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
def _real_extract(self, url):
- playlist_id = self._match_id(url)
+ path, display_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, playlist_id)
+ article = self._download_json(
+ 'https://api.svt.se/nss-api/page/' + path, display_id,
+ query={'q': 'articles'})['articles']['content'][0]
- entries = [
- self.url_result(
- 'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
- for video_id in orderedSet(re.findall(
- r'data-video-id=["\'](\d+)', webpage))]
+ entries = []
- title = strip_or_none(self._og_search_title(webpage, default=None))
+ def _process_content(content):
+ if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
+ video_id = compat_str(content['image']['svtId'])
+ entries.append(self.url_result(
+ 'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
- return self.playlist_result(entries, playlist_id, title)
+ for media in article.get('media', []):
+ _process_content(media)
+
+ for obj in article.get('structuredBody', []):
+ _process_content(obj.get('content') or {})
+
+ return self.playlist_result(
+ entries, str_or_none(article.get('id')),
+ strip_or_none(article.get('title')))
diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py
index 784f8ed66..e8a7c65e0 100644
--- a/youtube_dl/extractor/tbs.py
+++ b/youtube_dl/extractor/tbs.py
@@ -16,7 +16,7 @@ from ..utils import (
class TBSIE(TurnerBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))'
_TESTS = [{
'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
'info_dict': {
@@ -40,12 +40,12 @@ class TBSIE(TurnerBaseIE):
}]
def _real_extract(self, url):
- site, display_id = re.match(self._VALID_URL, url).groups()
+ site, path, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
webpage, 'drupal setting'), display_id)
- video_data = drupal_settings['turner_playlist'][0]
+ video_data = next(v for v in drupal_settings['turner_playlist'] if v.get('url') == path)
media_id = video_data['mediaID']
title = video_data['title']
diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py
new file mode 100644
index 000000000..a75369dbe
--- /dev/null
+++ b/youtube_dl/extractor/teachable.py
@@ -0,0 +1,298 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .wistia import WistiaIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ get_element_by_class,
+ strip_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeachableBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'teachable'
+ _URL_PREFIX = 'teachable:'
+
+ _SITES = {
+ # Only notable ones here
+ 'v1.upskillcourses.com': 'upskill',
+ 'gns3.teachable.com': 'gns3',
+ 'academyhacker.com': 'academyhacker',
+ 'stackskills.com': 'stackskills',
+ 'market.saleshacker.com': 'saleshacker',
+ 'learnability.org': 'learnability',
+ 'edurila.com': 'edurila',
+ 'courses.workitdaily.com': 'workitdaily',
+ }
+
+ _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, site):
+ if self._logged_in:
+ return
+
+ username, password = self._get_login_info(
+ netrc_machine=self._SITES.get(site, site))
+ if username is None:
+ return
+
+ login_page, urlh = self._download_webpage_handle(
+ 'https://%s/sign_in' % site, None,
+ 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']user-signout',
+ r'<a[^>]+\bhref=["\']/sign_out',
+ r'Log\s+[Oo]ut\s*<'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_url = urlh.geturl()
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
+ 'post url', default=login_url, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = urljoin(login_url, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': login_url,
+ })
+
+ if '>I accept the new Privacy Policy<' in response:
+ raise ExtractorError(
+ 'Unable to login: %s asks you to accept new Privacy Policy. '
+ 'Go to https://%s/ and accept.' % (site, site), expected=True)
+
+ # Successful login
+ if is_logged(response):
+ self._logged_in = True
+ return
+
+ message = get_element_by_class('alert', response)
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % clean_html(message), expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+
+class TeachableIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /courses/[^/]+/lectures/(?P<id>\d+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+
+ _TESTS = [{
+ 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
+ 'info_dict': {
+ 'id': 'untlgzk1v7',
+ 'ext': 'bin',
+ 'title': 'Overview',
+ 'description': 'md5:071463ff08b86c208811130ea1c2464c',
+ 'duration': 736.4,
+ 'timestamp': 1542315762,
+ 'upload_date': '20181115',
+ 'chapter': 'Welcome',
+ 'chapter_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _is_teachable(webpage):
+ return 'teachableTracker.linker:autoLink' in webpage and re.search(
+ r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
+ webpage)
+
+ @staticmethod
+ def _extract_url(webpage, source_url):
+ if not TeachableIE._is_teachable(webpage):
+ return
+ if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
+ return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site') or mobj.group('site_t')
+ video_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ url = url[len(self._URL_PREFIX):]
+
+ webpage = self._download_webpage(url, video_id)
+
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if not wistia_urls:
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']lecture-contents-locked',
+ r'>\s*Lecture contents locked',
+ r'id=["\']lecture-locked',
+ # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313
+ r'class=["\'](?:inner-)?lesson-locked',
+ r'>LESSON LOCKED<')):
+ self.raise_login_required('Lecture contents locked')
+ raise ExtractorError('Unable to find video URL')
+
+ title = self._og_search_title(webpage, default=None)
+
+ chapter = None
+ chapter_number = None
+ section_item = self._search_regex(
+ r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
+ webpage, 'section item', default=None, group='li')
+ if section_item:
+ chapter_number = int_or_none(self._search_regex(
+ r'data-ss-position=["\'](\d+)', section_item, 'section id',
+ default=None))
+ if chapter_number is not None:
+ sections = []
+ for s in re.findall(
+ r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage):
+ section = strip_or_none(clean_html(s))
+ if not section:
+ sections = []
+ break
+ sections.append(section)
+ if chapter_number <= len(sections):
+ chapter = sections[chapter_number - 1]
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': wistia_url,
+ 'ie_key': WistiaIE.ie_key(),
+ 'title': title,
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ } for wistia_url in wistia_urls]
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class TeachableCourseIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+ _TESTS = [{
+ 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
+ 'info_dict': {
+ 'id': 'essential-web-developer-course',
+ 'title': 'The Essential Web Developer Course (Free)',
+ },
+ 'playlist_count': 192,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/enrolled/423415',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TeachableIE.suitable(url) else super(
+ TeachableCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site') or mobj.group('site_t')
+ course_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ prefix = self._URL_PREFIX
+ url = url[len(prefix):]
+
+ webpage = self._download_webpage(url, course_id)
+
+ url_base = 'https://%s/' % site
+
+ entries = []
+
+ for mobj in re.finditer(
+ r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
+ webpage):
+ li = mobj.group('li')
+ if 'fa-youtube-play' not in li:
+ continue
+ lecture_url = self._search_regex(
+ r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
+ 'lecture url', default=None, group='url')
+ if not lecture_url:
+ continue
+ lecture_id = self._search_regex(
+ r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
+ title = self._html_search_regex(
+ r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
+ 'title', default=None)
+ entry_url = urljoin(url_base, lecture_url)
+ if prefixed:
+ entry_url = self._URL_PREFIX + entry_url
+ entries.append(
+ self.url_result(
+ entry_url,
+ ie=TeachableIE.ie_key(), video_id=lecture_id,
+ video_title=clean_html(title)))
+
+ course_title = self._html_search_regex(
+ (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
+ r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
+ webpage, 'course title', fatal=False)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
index e89759714..624cdb3ad 100644
--- a/youtube_dl/extractor/teachingchannel.py
+++ b/youtube_dl/extractor/teachingchannel.py
@@ -1,35 +1,33 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from .ooyala import OoyalaIE
class TeachingChannelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)'
+ _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P<id>[^/?&#]+)'
_TEST = {
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
- 'md5': '3d6361864d7cac20b57c8784da17166f',
'info_dict': {
- 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
+ 'id': '3swwlzkT',
'ext': 'mp4',
'title': 'A History of Teaming',
'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
- 'duration': 422.255,
+ 'duration': 422,
+ 'upload_date': '20170316',
+ 'timestamp': 1489691297,
},
'params': {
'skip_download': True,
},
- 'add_ie': ['Ooyala'],
+ 'add_ie': ['JWPlatform'],
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- title = mobj.group('title')
- webpage = self._download_webpage(url, title)
- ooyala_code = self._search_regex(
- r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ mid = self._search_regex(
+ r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})',
+ webpage, 'media id')
- return OoyalaIE._build_url_result(ooyala_code)
+ return self.url_result('jwplatform:' + mid, 'JWPlatform', mid)
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 73469cc5d..5793b711f 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -16,7 +16,7 @@ from ..utils import (
class TeamcocoIE(TurnerBaseIE):
- _VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+ _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [
{
'url': 'http://teamcoco.com/video/mary-kay-remote',
@@ -79,50 +79,68 @@ class TeamcocoIE(TurnerBaseIE):
}, {
'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
'only_matching': True,
+ }, {
+ 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+ 'only_matching': True,
}
]
+ _RECORD_TEMPL = '''id
+ title
+ teaser
+ publishOn
+ thumb {
+ preview
+ }
+ tags {
+ name
+ }
+ duration
+ turnerMediaId
+ turnerMediaAuthToken'''
def _graphql_call(self, query_template, object_type, object_id):
find_object = 'find' + object_type
return self._download_json(
- 'http://teamcoco.com/graphql/', object_id, data=json.dumps({
+ 'https://teamcoco.com/graphql', object_id, data=json.dumps({
'query': query_template % (find_object, object_id)
- }))['data'][find_object]
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['data'][find_object]
def _real_extract(self, url):
display_id = self._match_id(url)
response = self._graphql_call('''{
- %s(slug: "%s") {
+ %%s(slug: "%%s") {
... on RecordSlug {
record {
+ %s
+ }
+ }
+ ... on PageSlug {
+ child {
id
- title
- teaser
- publishOn
- thumb {
- preview
- }
- file {
- url
- }
- tags {
- name
- }
- duration
- turnerMediaId
- turnerMediaAuthToken
}
}
... on NotFoundSlug {
status
}
}
-}''', 'Slug', display_id)
+}''' % self._RECORD_TEMPL, 'Slug', display_id)
if response.get('status'):
raise ExtractorError('This video is no longer available.', expected=True)
- record = response['record']
+ child = response.get('child')
+ if child:
+ record = self._graphql_call('''{
+ %%s(id: "%%s") {
+ ... on Video {
+ %s
+ }
+ }
+}''' % self._RECORD_TEMPL, 'Record', child['id'])
+ else:
+ record = response['record']
video_id = record['id']
info = {
@@ -145,20 +163,21 @@ class TeamcocoIE(TurnerBaseIE):
'accessTokenType': 'jws',
}))
else:
- video_sources = self._graphql_call('''{
- %s(id: "%s") {
- src
- }
-}''', 'RecordVideoSource', video_id) or {}
+ video_sources = self._download_json(
+ 'https://teamcoco.com/_truman/d/' + video_id,
+ video_id)['meta']['src']
+ if isinstance(video_sources, dict):
+ video_sources = video_sources.values()
formats = []
get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
- for format_id, src in video_sources.get('src', {}).items():
+ for src in video_sources:
if not isinstance(src, dict):
continue
src_url = src.get('src')
if not src_url:
continue
+ format_id = src.get('label')
ext = determine_ext(src_url, mimetype2ext(src.get('type')))
if format_id == 'hls' or ext == 'm3u8':
# compat_urllib_parse.urljoin does not work here
@@ -180,9 +199,6 @@ class TeamcocoIE(TurnerBaseIE):
'format_id': format_id,
'quality': get_quality(format_id),
})
- if not formats:
- formats = self._extract_m3u8_formats(
- record['file']['url'], video_id, 'mp4', fatal=False)
self._sort_formats(formats)
info['formats'] = formats
diff --git a/youtube_dl/extractor/teamtreehouse.py b/youtube_dl/extractor/teamtreehouse.py
new file mode 100644
index 000000000..d347e97ef
--- /dev/null
+++ b/youtube_dl/extractor/teamtreehouse.py
@@ -0,0 +1,140 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ get_element_by_id,
+ parse_duration,
+ remove_end,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeamTreeHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
+ _TESTS = [{
+ # Course
+ 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
+ 'info_dict': {
+ 'id': 'introduction-to-user-authentication-in-php',
+ 'title': 'Introduction to User Authentication in PHP',
+ 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # WorkShop
+ 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
+ 'info_dict': {
+ 'id': 'deploying-a-react-app',
+ 'title': 'Deploying a React App',
+ 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ # Video
+ 'url': 'https://teamtreehouse.com/library/application-overview-2',
+ 'info_dict': {
+ 'id': 'application-overview-2',
+ 'ext': 'mp4',
+ 'title': 'Application Overview',
+ 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
+ },
+ 'expected_warnings': ['This is just a preview'],
+ }]
+ _NETRC_MACHINE = 'teamtreehouse'
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ signin_page = self._download_webpage(
+ 'https://teamtreehouse.com/signin',
+ None, 'Downloading signin page')
+ data = self._form_hidden_inputs('new_user_session', signin_page)
+ data.update({
+ 'user_session[email]': email,
+ 'user_session[password]': password,
+ })
+ error_message = get_element_by_class('error-message', self._download_webpage(
+ 'https://teamtreehouse.com/person_session',
+ None, 'Logging in', data=urlencode_postdata(data)))
+ if error_message:
+ raise ExtractorError(clean_html(error_message), expected=True)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = self._html_search_meta(['og:title', 'twitter:title'], webpage)
+ description = self._html_search_meta(
+ ['description', 'og:description', 'twitter:description'], webpage)
+ entries = self._parse_html5_media_entries(url, webpage, display_id)
+ if entries:
+ info = entries[0]
+
+ for subtitles in info.get('subtitles', {}).values():
+ for subtitle in subtitles:
+ subtitle['ext'] = determine_ext(subtitle['url'], 'srt')
+
+ is_preview = 'data-preview="true"' in webpage
+ if is_preview:
+ self.report_warning(
+ 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id)
+ duration = 30
+ else:
+ duration = float_or_none(self._search_regex(
+ r'data-duration="(\d+)"', webpage, 'duration'), 1000)
+ if not duration:
+ duration = parse_duration(get_element_by_id(
+ 'video-duration', webpage))
+
+ info.update({
+ 'id': display_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ })
+ return info
+ else:
+ def extract_urls(html, extract_info=None):
+ for path in re.findall(r'<a[^>]+href="([^"]+)"', html):
+ page_url = urljoin(url, path)
+ entry = {
+ '_type': 'url_transparent',
+ 'id': self._match_id(page_url),
+ 'url': page_url,
+ 'id_key': self.ie_key(),
+ }
+ if extract_info:
+ entry.update(extract_info)
+ entries.append(entry)
+
+ workshop_videos = self._search_regex(
+ r'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
+ webpage, 'workshop videos', default=None)
+ if workshop_videos:
+ extract_urls(workshop_videos)
+ else:
+ stages_path = self._search_regex(
+ r'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
+ webpage, 'stages path')
+ if stages_path:
+ stages_page = self._download_webpage(
+ urljoin(url, stages_path), display_id, 'Downloading stages page')
+ for chapter_number, (chapter, steps_list) in enumerate(re.findall(r'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page), 1):
+ extract_urls(steps_list, {
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ })
+ title = remove_end(title, ' Course')
+
+ return self.playlist_result(
+ entries, display_id, title, description)
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 212ac80ab..63e2455b2 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -5,8 +5,12 @@ import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+ compat_str,
+ compat_urlparse
+)
from ..utils import (
+ extract_attributes,
float_or_none,
int_or_none,
try_get,
@@ -20,7 +24,7 @@ class TEDIE(InfoExtractor):
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
(
- (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist
+ (?P<type_playlist>playlists(?:/(?P<playlist_id>\d+))?) # We have a playlist
|
((?P<type_talk>talks)) # We have a simple talk
|
@@ -84,6 +88,7 @@ class TEDIE(InfoExtractor):
'info_dict': {
'id': '10',
'title': 'Who are the hackers?',
+ 'description': 'md5:49a0dbe8fb76d81a0e64b4a80af7f15a'
},
'playlist_mincount': 6,
}, {
@@ -128,7 +133,7 @@ class TEDIE(InfoExtractor):
def _extract_info(self, webpage):
info_json = self._search_regex(
- r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
+ r'(?s)q\(\s*"\w+.init"\s*,\s*({.+?})\)\s*</script>',
webpage, 'info json')
return json.loads(info_json)
@@ -150,22 +155,22 @@ class TEDIE(InfoExtractor):
webpage = self._download_webpage(url, name,
'Downloading playlist webpage')
- info = self._extract_info(webpage)
- playlist_info = try_get(
- info, lambda x: x['__INITIAL_DATA__']['playlist'],
- dict) or info['playlist']
+ playlist_entries = []
+ for entry in re.findall(r'(?s)<[^>]+data-ga-context=["\']playlist["\'][^>]*>', webpage):
+ attrs = extract_attributes(entry)
+ entry_url = compat_urlparse.urljoin(url, attrs['href'])
+ playlist_entries.append(self.url_result(entry_url, self.ie_key()))
+
+ final_url = self._og_search_url(webpage, fatal=False)
+ playlist_id = (
+ re.match(self._VALID_URL, final_url).group('playlist_id')
+ if final_url else None)
- playlist_entries = [
- self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
- for talk in try_get(
- info, lambda x: x['__INITIAL_DATA__']['talks'],
- dict) or info['talks']
- ]
return self.playlist_result(
- playlist_entries,
- playlist_id=compat_str(playlist_info['id']),
- playlist_title=playlist_info['title'])
+ playlist_entries, playlist_id=playlist_id,
+ playlist_title=self._og_search_title(webpage, fatal=False),
+ playlist_description=self._og_search_description(webpage))
def _talk_info(self, url, video_name):
webpage = self._download_webpage(url, video_name)
@@ -177,20 +182,29 @@ class TEDIE(InfoExtractor):
title = talk_info['title'].strip()
- native_downloads = try_get(
- talk_info,
- (lambda x: x['downloads']['nativeDownloads'],
- lambda x: x['nativeDownloads']),
- dict) or {}
+ downloads = talk_info.get('downloads') or {}
+ native_downloads = downloads.get('nativeDownloads') or talk_info.get('nativeDownloads') or {}
formats = [{
'url': format_url,
'format_id': format_id,
- 'format': format_id,
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
+
+ subtitled_downloads = downloads.get('subtitledDownloads') or {}
+ for lang, subtitled_download in subtitled_downloads.items():
+ for q in self._NATIVE_FORMATS:
+ q_url = subtitled_download.get(q)
+ if not q_url:
+ continue
+ formats.append({
+ 'url': q_url,
+ 'format_id': '%s-%s' % (q, lang),
+ 'language': lang,
+ })
+
if formats:
for f in formats:
- finfo = self._NATIVE_FORMATS.get(f['format_id'])
+ finfo = self._NATIVE_FORMATS.get(f['format_id'].split('-')[0])
if finfo:
f.update(finfo)
@@ -203,51 +217,52 @@ class TEDIE(InfoExtractor):
ext_url = None
if service.lower() == 'youtube':
ext_url = external.get('code')
- return {
- '_type': 'url',
- 'url': ext_url or external['uri'],
- }
+
+ return self.url_result(ext_url or external['uri'])
resources_ = player_talk.get('resources') or talk_info.get('resources')
http_url = None
for format_id, resources in resources_.items():
- if not isinstance(resources, dict):
- continue
- if format_id == 'h264':
- for resource in resources:
- h264_url = resource.get('file')
- if not h264_url:
- continue
- bitrate = int_or_none(resource.get('bitrate'))
- formats.append({
- 'url': h264_url,
- 'format_id': '%s-%sk' % (format_id, bitrate),
- 'tbr': bitrate,
- })
- if re.search(r'\d+k', h264_url):
- http_url = h264_url
- elif format_id == 'rtmp':
- streamer = talk_info.get('streamer')
- if not streamer:
+ if format_id == 'hls':
+ if not isinstance(resources, dict):
continue
- for resource in resources:
- formats.append({
- 'format_id': '%s-%s' % (format_id, resource.get('name')),
- 'url': streamer,
- 'play_path': resource['file'],
- 'ext': 'flv',
- 'width': int_or_none(resource.get('width')),
- 'height': int_or_none(resource.get('height')),
- 'tbr': int_or_none(resource.get('bitrate')),
- })
- elif format_id == 'hls':
stream_url = url_or_none(resources.get('stream'))
if not stream_url:
continue
formats.extend(self._extract_m3u8_formats(
stream_url, video_name, 'mp4', m3u8_id=format_id,
fatal=False))
+ else:
+ if not isinstance(resources, list):
+ continue
+ if format_id == 'h264':
+ for resource in resources:
+ h264_url = resource.get('file')
+ if not h264_url:
+ continue
+ bitrate = int_or_none(resource.get('bitrate'))
+ formats.append({
+ 'url': h264_url,
+ 'format_id': '%s-%sk' % (format_id, bitrate),
+ 'tbr': bitrate,
+ })
+ if re.search(r'\d+k', h264_url):
+ http_url = h264_url
+ elif format_id == 'rtmp':
+ streamer = talk_info.get('streamer')
+ if not streamer:
+ continue
+ for resource in resources:
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, resource.get('name')),
+ 'url': streamer,
+ 'play_path': resource['file'],
+ 'ext': 'flv',
+ 'width': int_or_none(resource.get('width')),
+ 'height': int_or_none(resource.get('height')),
+ 'tbr': int_or_none(resource.get('bitrate')),
+ })
m3u8_formats = list(filter(
lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none',
@@ -267,6 +282,8 @@ class TEDIE(InfoExtractor):
'format_id': m3u8_format['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
+ if f.get('acodec') == 'none':
+ del f['acodec']
formats.append(f)
audio_download = talk_info.get('audioDownload')
diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py
index 25573e49f..3e1a7a9e6 100644
--- a/youtube_dl/extractor/tele5.py
+++ b/youtube_dl/extractor/tele5.py
@@ -1,13 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
from .nexx import NexxIE
from ..compat import compat_urlparse
+from ..utils import (
+ NO_DEFAULT,
+ smuggle_url,
+)
class Tele5IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
'info_dict': {
@@ -21,10 +29,37 @@ class Tele5IE(InfoExtractor):
'skip_download': True,
},
}, {
- 'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+ # jwplatform, nexx unavailable
+ 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ 'info_dict': {
+ 'id': 'WJuiOlUp',
+ 'ext': 'mp4',
+ 'upload_date': '20200603',
+ 'timestamp': 1591214400,
+ 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
+ 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ }, {
+ 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
'only_matching': True,
}, {
- 'url': 'https://www.tele5.de/tv/dark-matter/videos',
+ 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/anders-ist-sevda/',
'only_matching': True,
}]
@@ -32,13 +67,42 @@ class Tele5IE(InfoExtractor):
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
- if not video_id:
+ NEXX_ID_RE = r'\d{6,}'
+ JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
+
+ def nexx_result(nexx_id):
+ return self.url_result(
+ 'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
+ ie=NexxIE.ie_key(), video_id=nexx_id)
+
+ nexx_id = jwplatform_id = None
+
+ if video_id:
+ if re.match(NEXX_ID_RE, video_id):
+ return nexx_result(video_id)
+ elif re.match(JWPLATFORM_ID_RE, video_id):
+ jwplatform_id = video_id
+
+ if not nexx_id:
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- video_id = self._html_search_regex(
- r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
- webpage, 'video id')
+
+ def extract_id(pattern, name, default=NO_DEFAULT):
+ return self._html_search_regex(
+ (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
+ r'\s+id\s*=\s*["\']player_(%s)' % pattern,
+ r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
+ default=default)
+
+ nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
+ if nexx_id:
+ return nexx_result(nexx_id)
+
+ if not jwplatform_id:
+ jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
return self.url_result(
- 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
- ie=NexxIE.ie_key(), video_id=video_id)
+ smuggle_url(
+ 'jwplatform:%s' % jwplatform_id,
+ {'geo_countries': self._GEO_COUNTRIES}),
+ ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
index d37e1b055..9ba3da341 100644
--- a/youtube_dl/extractor/telecinco.py
+++ b/youtube_dl/extractor/telecinco.py
@@ -11,6 +11,7 @@ from ..utils import (
determine_ext,
int_or_none,
str_or_none,
+ try_get,
urljoin,
)
@@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
'info_dict': {
'id': '1876350223',
'title': 'Bacalao con kokotxas al pil-pil',
- 'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
+ 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
},
'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7',
@@ -56,6 +57,26 @@ class TelecincoIE(InfoExtractor):
'duration': 50,
},
}, {
+ # video in opening's content
+ 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
+ 'info_dict': {
+ 'id': '2907195140',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'description': 'md5:73f340a7320143d37ab895375b2bf13a',
+ },
+ 'playlist': [{
+ 'md5': 'adb28c37238b675dad0f042292f209a7',
+ 'info_dict': {
+ 'id': 'TpI2EttSDAReWpJ1o0NVh2',
+ 'ext': 'mp4',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'duration': 1015,
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
'only_matching': True,
}, {
@@ -135,17 +156,28 @@ class TelecincoIE(InfoExtractor):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
article = self._parse_json(self._search_regex(
- r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
+ r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
webpage, 'article'), display_id)['article']
title = article.get('title')
- description = clean_html(article.get('leadParagraph'))
+ description = clean_html(article.get('leadParagraph')) or ''
if article.get('editorialType') != 'VID':
entries = []
- for p in article.get('body', []):
+ body = [article.get('opening')]
+ body.extend(try_get(article, lambda x: x['body'], list) or [])
+ for p in body:
+ if not isinstance(p, dict):
+ continue
content = p.get('content')
- if p.get('type') != 'video' or not content:
+ if not content:
+ continue
+ type_ = p.get('type')
+ if type_ == 'paragraph':
+ content_str = str_or_none(content)
+ if content_str:
+ description += content_str
continue
- entries.append(self._parse_content(content, url))
+ if type_ == 'video' and isinstance(content, dict):
+ entries.append(self._parse_content(content, url))
return self.playlist_result(
entries, str_or_none(article.get('id')), title, description)
content = article['opening']['content']
diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py
index 0f576c1ab..2dc020537 100644
--- a/youtube_dl/extractor/telegraaf.py
+++ b/youtube_dl/extractor/telegraaf.py
@@ -4,21 +4,25 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
determine_ext,
- remove_end,
+ int_or_none,
+ parse_iso8601,
+ try_get,
)
class TelegraafIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
+ _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
_TEST = {
- 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
+ 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
'info_dict': {
- 'id': '24353229',
+ 'id': 'gaMItuoSeUg2',
'ext': 'mp4',
- 'title': 'Tikibad ontruimd wegens brand',
- 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 33,
+ 'title': 'Historisch scheepswrak slaat na 100 jaar los',
+ 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 55,
+ 'timestamp': 1572805527,
+ 'upload_date': '20191103',
},
'params': {
# m3u8 download
@@ -27,23 +31,30 @@ class TelegraafIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id = self._match_id(url)
+ article_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ video_id = self._download_json(
+ 'https://www.telegraaf.nl/graphql', article_id, query={
+ 'query': '''{
+ article(uid: %s) {
+ videos {
+ videoId
+ }
+ }
+}''' % article_id,
+ })['data']['article']['videos'][0]['videoId']
- player_url = self._html_search_regex(
- r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
- player_page = self._download_webpage(
- player_url, video_id, note='Download player webpage')
- playlist_url = self._search_regex(
- r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
- playlist_data = self._download_json(playlist_url, video_id)
+ item = self._download_json(
+ 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
+ video_id)['items'][0]
+ title = item['title']
- item = playlist_data['items'][0]
formats = []
- locations = item['locations']
+ locations = item.get('locations') or {}
for location in locations.get('adaptive', []):
- manifest_url = location['src']
+ manifest_url = location.get('src')
+ if not manifest_url:
+ continue
ext = determine_ext(manifest_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
@@ -54,25 +65,25 @@ class TelegraafIE(InfoExtractor):
else:
self.report_warning('Unknown adaptive format %s' % ext)
for location in locations.get('progressive', []):
+ src = try_get(location, lambda x: x['sources'][0]['src'])
+ if not src:
+ continue
+ label = location.get('label')
formats.append({
- 'url': location['sources'][0]['src'],
- 'width': location.get('width'),
- 'height': location.get('height'),
- 'format_id': 'http-%s' % location['label'],
+ 'url': src,
+ 'width': int_or_none(location.get('width')),
+ 'height': int_or_none(location.get('height')),
+ 'format_id': 'http' + ('-%s' % label if label else ''),
})
self._sort_formats(formats)
- title = remove_end(self._og_search_title(webpage), ' - VIDEO')
- description = self._og_search_description(webpage)
- duration = item.get('duration')
- thumbnail = item.get('poster')
-
return {
'id': video_id,
'title': title,
- 'description': description,
+ 'description': item.get('description'),
'formats': formats,
- 'duration': duration,
- 'thumbnail': thumbnail,
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('poster'),
+ 'timestamp': parse_iso8601(item.get('datecreated'), ' '),
}
diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py
index 6965c127b..c82c94b3a 100644
--- a/youtube_dl/extractor/telequebec.py
+++ b/youtube_dl/extractor/telequebec.py
@@ -7,6 +7,7 @@ from ..utils import (
int_or_none,
smuggle_url,
try_get,
+ unified_timestamp,
)
@@ -22,7 +23,13 @@ class TeleQuebecBaseIE(InfoExtractor):
class TeleQuebecIE(TeleQuebecBaseIE):
- _VALID_URL = r'https?://zonevideo\.telequebec\.tv/media/(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ zonevideo\.telequebec\.tv/media|
+ coucou\.telequebec\.tv/videos
+ )/(?P<id>\d+)
+ '''
_TESTS = [{
# available till 01.01.2023
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
@@ -31,8 +38,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
'ext': 'mp4',
'title': 'Un petit choc et puis repart!',
'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
- 'upload_date': '20180222',
- 'timestamp': 1519326631,
},
'params': {
'skip_download': True,
@@ -41,6 +46,9 @@ class TeleQuebecIE(TeleQuebecBaseIE):
# no description
'url': 'http://zonevideo.telequebec.tv/media/30261',
'only_matching': True,
+ }, {
+ 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -61,6 +69,52 @@ class TeleQuebecIE(TeleQuebecBaseIE):
return info
+class TeleQuebecSquatIE(InfoExtractor):
+ _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://squat.telequebec.tv/videos/9314',
+ 'info_dict': {
+ 'id': 'd59ae78112d542e793d83cc9d3a5b530',
+ 'ext': 'mp4',
+ 'title': 'Poupeflekta',
+ 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
+ 'duration': 1351,
+ 'timestamp': 1569057600,
+ 'upload_date': '20190921',
+ 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
+ 'season': 'Saison 3',
+ 'season_number': 3,
+ 'episode_number': 57,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
+ video_id)
+
+ media_id = video['sourceId']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
+ 'ie_key': TeleQuebecIE.ie_key(),
+ 'id': media_id,
+ 'title': video.get('titre'),
+ 'description': video.get('description'),
+ 'timestamp': unified_timestamp(video.get('datePublication')),
+ 'series': video.get('container'),
+ 'season': video.get('saison'),
+ 'season_number': int_or_none(video.get('noSaison')),
+ 'episode_number': int_or_none(video.get('episode')),
+ }
+
+
class TeleQuebecEmissionIE(TeleQuebecBaseIE):
_VALID_URL = r'''(?x)
https?://
diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py
new file mode 100644
index 000000000..af325fea8
--- /dev/null
+++ b/youtube_dl/extractor/tenplay.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_age_limit,
+ parse_iso8601,
+ smuggle_url,
+)
+
+
+class TenPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
+ _TESTS = [{
+ 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
+ 'info_dict': {
+ 'id': '6060533435001',
+ 'ext': 'mp4',
+ 'title': 'MasterChef - S1 Ep. 1',
+ 'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c',
+ 'age_limit': 10,
+ 'timestamp': 1240828200,
+ 'upload_date': '20090427',
+ 'uploader_id': '2199827728001',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ data = self._download_json(
+ 'https://10play.com.au/api/video/' + content_id, content_id)
+ video = data.get('video') or {}
+ metadata = data.get('metaData') or {}
+ brightcove_id = video.get('videoId') or metadata['showContentVideoId']
+ brightcove_url = smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['AU']})
+
+ return {
+ '_type': 'url_transparent',
+ 'url': brightcove_url,
+ 'id': content_id,
+ 'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
+ 'description': video.get('description'),
+ 'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
+ 'series': metadata.get('showName'),
+ 'season': metadata.get('showContentSeason'),
+ 'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/youtube_dl/extractor/testurl.py b/youtube_dl/extractor/testurl.py
index 46918adb0..84a14a0bd 100644
--- a/youtube_dl/extractor/testurl.py
+++ b/youtube_dl/extractor/testurl.py
@@ -61,8 +61,4 @@ class TestURLIE(InfoExtractor):
self.to_screen('Test URL: %s' % tc['url'])
- return {
- '_type': 'url',
- 'url': tc['url'],
- 'id': video_id,
- }
+ return self.url_result(tc['url'], video_id=video_id)
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py
index 903f47380..55e2a0721 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dl/extractor/tf1.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
class TF1IE(InfoExtractor):
@@ -43,12 +44,49 @@ class TF1IE(InfoExtractor):
}, {
'url': 'http://www.tf1.fr/hd1/documentaire/videos/mylene-farmer-d-une-icone.html',
'only_matching': True,
+ }, {
+ 'url': 'https://www.tf1.fr/tmc/quotidien-avec-yann-barthes/videos/quotidien-premiere-partie-11-juin-2019.html',
+ 'info_dict': {
+ 'id': '13641379',
+ 'ext': 'mp4',
+ 'title': 'md5:f392bc52245dc5ad43771650c96fb620',
+ 'description': 'md5:44bc54f0a21322f5b91d68e76a544eae',
+ 'upload_date': '20190611',
+ },
+ 'params': {
+ # Sometimes wat serves the whole file with the --test option
+ 'skip_download': True,
+ },
}]
def _real_extract(self, url):
video_id = self._match_id(url)
+
webpage = self._download_webpage(url, video_id)
- wat_id = self._html_search_regex(
- r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
- webpage, 'wat id', group='id')
+
+ wat_id = None
+
+ data = self._parse_json(
+ self._search_regex(
+ r'__APOLLO_STATE__\s*=\s*({.+?})\s*(?:;|</script>)', webpage,
+ 'data', default='{}'), video_id, fatal=False)
+
+ if data:
+ try:
+ wat_id = next(
+ video.get('streamId')
+ for key, video in data.items()
+ if isinstance(video, dict)
+ and video.get('slug') == video_id)
+ if not isinstance(wat_id, compat_str) or not wat_id.isdigit():
+ wat_id = None
+ except StopIteration:
+ pass
+
+ if not wat_id:
+ wat_id = self._html_search_regex(
+ (r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1',
+ r'(["\']?)streamId\1\s*:\s*(["\']?)(?P<id>\d+)\2'),
+ webpage, 'wat id', group='id')
+
return self.url_result('wat:%s' % wat_id, 'Wat')
diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py
index 0e2370cd8..0631cb7ab 100644
--- a/youtube_dl/extractor/tfo.py
+++ b/youtube_dl/extractor/tfo.py
@@ -17,14 +17,12 @@ class TFOIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
_TEST = {
'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
- 'md5': '47c987d0515561114cf03d1226a9d4c7',
+ 'md5': 'cafbe4f47a8dae0ca0159937878100d6',
'info_dict': {
- 'id': '100463871',
+ 'id': '7da3d50e495c406b8fc0b997659cc075',
'ext': 'mp4',
'title': 'Video Game Hackathon',
'description': 'md5:558afeba217c6c8d96c60e5421795c07',
- 'upload_date': '20160212',
- 'timestamp': 1455310233,
}
}
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index ffef5bf06..07055513a 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE):
smil_url, video_id, note=note, query={'format': 'SMIL'},
headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
- if error_element is not None and error_element.attrib['src'].startswith(
- 'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
- raise ExtractorError(error_element.attrib['abstract'], expected=True)
+ if error_element is not None:
+ exception = find_xpath_attr(
+ error_element, _x('.//smil:param'), 'name', 'exception')
+ if exception is not None:
+ if exception.get('value') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(error_element.attrib['abstract'])
+ elif error_element.attrib['src'].startswith(
+ 'http://link.theplatform.%s/s/errorFiles/Unavailable.'
+ % self._TP_TLD):
+ raise ExtractorError(
+ error_element.attrib['abstract'], expected=True)
smil_formats = self._parse_smil_formats(
meta, smil_url, video_id, namespace=default_ns,
@@ -201,7 +209,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
return [m.group('url')]
# Are whitesapces ignored in URLs?
- # https://github.com/rg3/youtube-dl/issues/12044
+ # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
@@ -263,7 +271,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if smuggled_data.get('force_smil_url', False):
smil_url = url
- # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
+ # Explicitly specified SMIL (see https://github.com/ytdl-org/youtube-dl/issues/7385)
elif '/guid/' in url:
headers = {}
source_url = smuggled_data.get('source_url')
@@ -335,7 +343,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
- main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
formats = []
subtitles = {}
@@ -348,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))
- for asset_type in item['plfile$assetTypes']:
+ file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+ for asset_type in file_asset_types:
if asset_type in asset_types:
continue
asset_types.append(asset_type)
diff --git a/youtube_dl/extractor/thesun.py b/youtube_dl/extractor/thesun.py
index 22d003776..15d4a6932 100644
--- a/youtube_dl/extractor/thesun.py
+++ b/youtube_dl/extractor/thesun.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from .ooyala import OoyalaIE
+from ..utils import extract_attributes
class TheSunIE(InfoExtractor):
@@ -16,6 +16,7 @@ class TheSunIE(InfoExtractor):
},
'playlist_count': 2,
}
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url):
article_id = self._match_id(url)
@@ -23,10 +24,15 @@ class TheSunIE(InfoExtractor):
webpage = self._download_webpage(url, article_id)
entries = []
- for ooyala_id in re.findall(
- r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
+ for video in re.findall(
+ r'<video[^>]+data-video-id-pending=[^>]+>',
webpage):
- entries.append(OoyalaIE._build_url_result(ooyala_id))
+ attrs = extract_attributes(video)
+ video_id = attrs['data-video-id-pending']
+ account_id = attrs.get('data-account', '5067014667001')
+ entries.append(self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id),
+ 'BrightcoveNew', video_id))
return self.playlist_result(
entries, article_id, self._og_search_title(webpage, fatal=False))
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
index 6ab147ad7..a3d9b4017 100644
--- a/youtube_dl/extractor/thisoldhouse.py
+++ b/youtube_dl/extractor/thisoldhouse.py
@@ -2,43 +2,46 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import try_get
class ThisOldHouseIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
- 'md5': '568acf9ca25a639f0c4ff905826b662f',
'info_dict': {
- 'id': '2REGtUDQ',
+ 'id': '5dcdddf673c3f956ef5db202',
'ext': 'mp4',
'title': 'How to Build a Storage Bench',
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
'timestamp': 1442548800,
'upload_date': '20150918',
- }
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True,
}, {
'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
+ 'only_matching': True,
+ }, {
+ # iframe www.thisoldhouse.com
+ 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+ 'only_matching': True,
}]
+ _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
- (r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
- r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
- webpage, 'video id', default=None, group='id')
- if not video_id:
- drupal_settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), display_id)
- video_id = try_get(
- drupal_settings, lambda x: x['jwplatform']['video_id'],
- compat_str) or list(drupal_settings['comScore'])[0]
- return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
+ r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
+ webpage, 'video id')
+ return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py
new file mode 100644
index 000000000..66088b9ab
--- /dev/null
+++ b/youtube_dl/extractor/tiktok.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class TikTokBaseIE(InfoExtractor):
+ def _extract_aweme(self, data):
+ video = data['video']
+ description = str_or_none(try_get(data, lambda x: x['desc']))
+ width = int_or_none(try_get(data, lambda x: video['width']))
+ height = int_or_none(try_get(data, lambda x: video['height']))
+
+ format_urls = set()
+ formats = []
+ for format_id in (
+ 'play_addr_lowbr', 'play_addr', 'play_addr_h264',
+ 'download_addr'):
+ for format in try_get(
+ video, lambda x: x[format_id]['url_list'], list) or []:
+ format_url = url_or_none(format)
+ if not format_url:
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'height': height,
+ 'width': width,
+ })
+ self._sort_formats(formats)
+
+ thumbnail = url_or_none(try_get(
+ video, lambda x: x['cover']['url_list'][0], compat_str))
+ uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
+ timestamp = int_or_none(data.get('create_time'))
+ comment_count = int_or_none(data.get('comment_count')) or int_or_none(
+ try_get(data, lambda x: x['statistics']['comment_count']))
+ repost_count = int_or_none(try_get(
+ data, lambda x: x['statistics']['share_count']))
+
+ aweme_id = data['aweme_id']
+
+ return {
+ 'id': aweme_id,
+ 'title': uploader or aweme_id,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'comment_count': comment_count,
+ 'repost_count': repost_count,
+ 'formats': formats,
+ }
+
+
+class TikTokIE(TikTokBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?tiktok\.com/v|
+ (?:www\.)?tiktok\.com/share/video
+ )
+ /(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://m.tiktok.com/v/6606727368545406213.html',
+ 'md5': 'd584b572e92fcd48888051f238022420',
+ 'info_dict': {
+ 'id': '6606727368545406213',
+ 'ext': 'mp4',
+ 'title': 'Zureeal',
+ 'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
+ 'thumbnail': r're:^https?://.*~noop.image',
+ 'uploader': 'Zureeal',
+ 'timestamp': 1538248586,
+ 'upload_date': '20180929',
+ 'comment_count': int,
+ 'repost_count': int,
+ }
+ }, {
+ 'url': 'https://www.tiktok.com/share/video/6606727368545406213',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://m.tiktok.com/v/%s.html' % video_id, video_id)
+ data = self._parse_json(self._search_regex(
+ r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
+ return self._extract_aweme(data)
+
+
+class TikTokUserIE(TikTokBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:m\.)?tiktok\.com/h5/share/usr|
+ (?:www\.)?tiktok\.com/share/user
+ )
+ /(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
+ 'info_dict': {
+ 'id': '188294915489964032',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ 'url': 'https://www.tiktok.com/share/user/188294915489964032',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ data = self._download_json(
+ 'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
+ query={'_signature': '_'})
+ entries = []
+ for aweme in data['aweme_list']:
+ try:
+ entry = self._extract_aweme(aweme)
+ except ExtractorError:
+ continue
+ entry['extractor_key'] = TikTokIE.ie_key()
+ entries.append(entry)
+ return self.playlist_result(entries, user_id)
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dl/extractor/tnaflix.py
index 0c2f8f119..b3573c6e0 100644
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@@ -18,8 +18,9 @@ from ..utils import (
class TNAFlixNetworkBaseIE(InfoExtractor):
# May be overridden in descendants if necessary
_CONFIG_REGEX = [
- r'flashvars\.config\s*=\s*escape\("([^"]+)"',
- r'<input[^>]+name="config\d?" value="([^"]+)"',
+ r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
+ r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
+ r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
]
_HOST = 'tna'
_VKEY_SUFFIX = ''
@@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
cfg_url = self._proto_relative_url(self._html_search_regex(
- self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
+ self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
+ group='url'), 'http:')
if not cfg_url:
inputs = self._hidden_inputs(webpage)
@@ -94,7 +96,7 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
cfg_xml = self._download_xml(
cfg_url, display_id, 'Downloading metadata',
- transform_source=fix_xml_ampersands)
+ transform_source=fix_xml_ampersands, headers={'Referer': url})
formats = []
diff --git a/youtube_dl/extractor/toggle.py b/youtube_dl/extractor/toggle.py
index 5e5efda0f..ca2e36efe 100644
--- a/youtube_dl/extractor/toggle.py
+++ b/youtube_dl/extractor/toggle.py
@@ -17,9 +17,9 @@ from ..utils import (
class ToggleIE(InfoExtractor):
IE_NAME = 'toggle'
- _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
+ 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': {
'id': '343115',
'ext': 'mp4',
@@ -33,7 +33,7 @@ class ToggleIE(InfoExtractor):
}
}, {
'note': 'DRM-protected video',
- 'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
+ 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
'info_dict': {
'id': '341413',
'ext': 'wvm',
@@ -48,7 +48,7 @@ class ToggleIE(InfoExtractor):
}, {
# this also tests correct video id extraction
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
- 'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
+ 'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
'info_dict': {
'id': '332861',
'ext': 'mp4',
@@ -65,19 +65,22 @@ class ToggleIE(InfoExtractor):
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
- 'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
+ 'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
'only_matching': True,
}, {
- 'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
+ 'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
'only_matching': True,
}, {
- 'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
+ 'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
'only_matching': True,
}, {
- 'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+ 'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
'only_matching': True,
}, {
- 'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
+ 'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
'only_matching': True,
}]
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py
index 2e7876cc5..44b022fca 100644
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -1,24 +1,21 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
+import json
-from .common import InfoExtractor
+from .radiocanada import RadioCanadaIE
+from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
int_or_none,
- js_to_json,
- urlencode_postdata,
- extract_attributes,
- smuggle_url,
+ merge_dicts,
)
-class TouTvIE(InfoExtractor):
+class TouTvIE(RadioCanadaIE):
_NETRC_MACHINE = 'toutv'
IE_NAME = 'tou.tv'
_VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/S[0-9]+[EC][0-9]+)?)'
- _access_token = None
- _claims = None
_TESTS = [{
'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17',
@@ -41,80 +38,56 @@ class TouTvIE(InfoExtractor):
'url': 'https://ici.tou.tv/l-age-adulte/S01C501',
'only_matching': True,
}]
+ _CLIENT_KEY = '90505c8d-9c34-4f34-8da1-3a85bdc6d4f4'
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
- state = 'http://ici.tou.tv/'
- webpage = self._download_webpage(state, None, 'Downloading homepage')
- toutvlogin = self._parse_json(self._search_regex(
- r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json)
- authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize'
- login_webpage = self._download_webpage(
- authorize_url, None, 'Downloading login page', query={
- 'client_id': toutvlogin['clientId'],
- 'redirect_uri': 'https://ici.tou.tv/login/loginCallback',
- 'response_type': 'token',
- 'scope': 'media-drmt openid profile email id.write media-validation.read.privileged',
- 'state': state,
- })
-
- def extract_form_url_and_data(wp, default_form_url, form_spec_re=''):
- form, form_elem = re.search(
- r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups()
- form_data = self._hidden_inputs(form)
- form_url = extract_attributes(form_elem).get('action') or default_form_url
- return form_url, form_data
-
- post_url, form_data = extract_form_url_and_data(
- login_webpage,
- 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login',
- r'(?:id|name)="Form-login"')
- form_data.update({
- 'login-email': email,
- 'login-password': password,
- })
- consent_webpage = self._download_webpage(
- post_url, None, 'Logging in', data=urlencode_postdata(form_data))
- post_url, form_data = extract_form_url_and_data(
- consent_webpage,
- 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent')
- _, urlh = self._download_webpage_handle(
- post_url, None, 'Following Redirection',
- data=urlencode_postdata(form_data))
- self._access_token = self._search_regex(
- r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
- urlh.geturl(), 'access token')
- self._claims = self._download_json(
- 'https://services.radio-canada.ca/media/validation/v2/getClaims',
- None, 'Extracting Claims', query={
- 'token': self._access_token,
- 'access_token': self._access_token,
- })['claims']
+ try:
+ self._access_token = self._download_json(
+ 'https://services.radio-canada.ca/toutv/profiling/accounts/login',
+ None, 'Logging in', data=json.dumps({
+ 'ClientId': self._CLIENT_KEY,
+ 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20',
+ 'Email': email,
+ 'Password': password,
+ 'Scope': 'id.write media-validation.read',
+ }).encode(), headers={
+ 'Authorization': 'client-key ' + self._CLIENT_KEY,
+ 'Content-Type': 'application/json;charset=utf-8',
+ })['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), None)['Message']
+ raise ExtractorError(error, expected=True)
+ raise
+ self._claims = self._call_api('validation/v2/getClaims')['claims']
def _real_extract(self, url):
path = self._match_id(url)
- metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path)
+ metadata = self._download_json(
+ 'https://services.radio-canada.ca/toutv/presentation/%s' % path, path, query={
+ 'client_key': self._CLIENT_KEY,
+ 'device': 'web',
+ 'version': 4,
+ })
# IsDrm does not necessarily mean the video is DRM protected (see
- # https://github.com/rg3/youtube-dl/issues/13994).
+ # https://github.com/ytdl-org/youtube-dl/issues/13994).
if metadata.get('IsDrm'):
self.report_warning('This video is probably DRM protected.', path)
video_id = metadata['IdMedia']
details = metadata['Details']
- title = details['OriginalTitle']
- video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id)
- if self._access_token and self._claims:
- video_url = smuggle_url(video_url, {
- 'access_token': self._access_token,
- 'claims': self._claims,
- })
- return {
- '_type': 'url_transparent',
- 'url': video_url,
+ return merge_dicts({
'id': video_id,
- 'title': title,
+ 'title': details.get('OriginalTitle'),
+ 'description': details.get('Description'),
'thumbnail': details.get('ImageUrl'),
'duration': int_or_none(details.get('LengthInSeconds')),
- }
+ 'series': metadata.get('ProgramTitle'),
+ 'season_number': int_or_none(metadata.get('SeasonNumber')),
+ 'season': metadata.get('SeasonTitle'),
+ 'episode_number': int_or_none(metadata.get('EpisodeNumber')),
+ 'episode': metadata.get('EpisodeTitle'),
+ }, self._extract_info(metadata.get('AppCode', 'toutv'), video_id))
diff --git a/youtube_dl/extractor/trunews.py b/youtube_dl/extractor/trunews.py
new file mode 100644
index 000000000..cca5b5ceb
--- /dev/null
+++ b/youtube_dl/extractor/trunews.py
@@ -0,0 +1,34 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TruNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'info_dict': {
+ 'id': '5c5a21e65d3c196e1c0020cc',
+ 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'ext': 'mp4',
+ 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
+ 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
+ 'duration': 3685,
+ 'timestamp': 1549411440,
+ 'upload_date': '20190206',
+ },
+ 'add_ie': ['Zype'],
+ }
+ _ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ zype_id = self._download_json(
+ 'https://api.zype.com/videos', display_id, query={
+ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
+ 'per_page': 1,
+ 'active': 'true',
+ 'friendly_title': display_id,
+ })['response'][0]['_id']
+ return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id)
diff --git a/youtube_dl/extractor/trutv.py b/youtube_dl/extractor/trutv.py
index 3a5782525..ce892c8c5 100644
--- a/youtube_dl/extractor/trutv.py
+++ b/youtube_dl/extractor/trutv.py
@@ -4,44 +4,72 @@ from __future__ import unicode_literals
import re
from .turner import TurnerBaseIE
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
class TruTVIE(TurnerBaseIE):
- _VALID_URL = r'https?://(?:www\.)?trutv\.com(?:(?P<path>/shows/[^/]+/videos/[^/?#]+?)\.html|/full-episodes/[^/]+/(?P<id>\d+))'
+ _VALID_URL = r'https?://(?:www\.)?trutv\.com/(?:shows|full-episodes)/(?P<series_slug>[0-9A-Za-z-]+)/(?:videos/(?P<clip_slug>[0-9A-Za-z-]+)|(?P<id>\d+))'
_TEST = {
- 'url': 'http://www.trutv.com/shows/10-things/videos/you-wont-believe-these-sports-bets.html',
- 'md5': '2cdc844f317579fed1a7251b087ff417',
+ 'url': 'https://www.trutv.com/shows/the-carbonaro-effect/videos/sunlight-activated-flower.html',
'info_dict': {
- 'id': '/shows/10-things/videos/you-wont-believe-these-sports-bets',
+ 'id': 'f16c03beec1e84cd7d1a51f11d8fcc29124cc7f1',
'ext': 'mp4',
- 'title': 'You Won\'t Believe These Sports Bets',
- 'description': 'Jamie Lee sits down with a bookie to discuss the bizarre world of illegal sports betting.',
- 'upload_date': '20130305',
- }
+ 'title': 'Sunlight-Activated Flower',
+ 'description': "A customer is stunned when he sees Michael's sunlight-activated flower.",
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
}
def _real_extract(self, url):
- path, video_id = re.match(self._VALID_URL, url).groups()
- auth_required = False
- if path:
- data_src = 'http://www.trutv.com/video/cvp/v2/xml/content.xml?id=%s.xml' % path
+ series_slug, clip_slug, video_id = re.match(self._VALID_URL, url).groups()
+
+ if video_id:
+ path = 'episode'
+ display_id = video_id
else:
- webpage = self._download_webpage(url, video_id)
- video_id = self._search_regex(
- r"TTV\.TVE\.episodeId\s*=\s*'([^']+)';",
- webpage, 'video id', default=video_id)
- auth_required = self._search_regex(
- r'TTV\.TVE\.authRequired\s*=\s*(true|false);',
- webpage, 'auth required', default='false') == 'true'
- data_src = 'http://www.trutv.com/tveverywhere/services/cvpXML.do?titleId=' + video_id
- return self._extract_cvp_info(
- data_src, path, {
- 'secure': {
- 'media_src': 'http://androidhls-secure.cdn.turner.com/trutv/big',
- 'tokenizer_src': 'http://www.trutv.com/tveverywhere/processors/services/token_ipadAdobe.do',
- },
- }, {
+ path = 'series/clip'
+ display_id = clip_slug
+
+ data = self._download_json(
+ 'https://api.trutv.com/v2/web/%s/%s/%s' % (path, series_slug, display_id),
+ display_id)
+ video_data = data['episode'] if video_id else data['info']
+ media_id = video_data['mediaId']
+ title = video_data['title'].strip()
+
+ info = self._extract_ngtv_info(
+ media_id, {}, {
'url': url,
'site_name': 'truTV',
- 'auth_required': auth_required,
+ 'auth_required': video_data.get('isAuthRequired'),
})
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('srcUrl')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'id': media_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(video_data.get('publicationDate')),
+ 'series': video_data.get('showTitle'),
+ 'season_number': int_or_none(video_data.get('seasonNum')),
+ 'episode_number': int_or_none(video_data.get('episodeNum')),
+ })
+ return info
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
index edbb0aa69..ae584ad69 100644
--- a/youtube_dl/extractor/tumblr.py
+++ b/youtube_dl/extractor/tumblr.py
@@ -4,7 +4,6 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
@@ -151,7 +150,7 @@ class TumblrIE(InfoExtractor):
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
webpage, urlh = self._download_webpage_handle(url, video_id)
- redirect_url = compat_str(urlh.geturl())
+ redirect_url = urlh.geturl()
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
raise ExtractorError(
'This Tumblr may contain sensitive media. '
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py
deleted file mode 100644
index 362318b24..000000000
--- a/youtube_dl/extractor/tutv.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_parse_qs,
-)
-
-
-class TutvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
- _TEST = {
- 'url': 'http://tu.tv/videos/robots-futbolistas',
- 'md5': '0cd9e28ad270488911b0d2a72323395d',
- 'info_dict': {
- 'id': '2973058',
- 'ext': 'mp4',
- 'title': 'Robots futbolistas',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
-
- data_content = self._download_webpage(
- 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
- video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
-
- return {
- 'id': internal_id,
- 'url': video_url,
- 'title': self._og_search_title(webpage),
- }
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
index d5071e8a5..4a19b9be6 100644
--- a/youtube_dl/extractor/tv2.py
+++ b/youtube_dl/extractor/tv2.py
@@ -4,13 +4,17 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
float_or_none,
js_to_json,
parse_iso8601,
remove_end,
+ strip_or_none,
+ try_get,
)
@@ -20,7 +24,7 @@ class TV2IE(InfoExtractor):
'url': 'http://www.tv2.no/v/916509/',
'info_dict': {
'id': '916509',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
'timestamp': 1431715610,
@@ -29,22 +33,40 @@ class TV2IE(InfoExtractor):
'view_count': int,
'categories': list,
},
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
}
+ _API_DOMAIN = 'sumo.tv2.no'
+ _PROTOCOLS = ('HDS', 'HLS', 'DASH')
+ _GEO_COUNTRIES = ['NO']
def _real_extract(self, url):
video_id = self._match_id(url)
+ api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
formats = []
format_urls = []
- for protocol in ('HDS', 'HLS'):
- data = self._download_json(
- 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
- video_id, 'Downloading play JSON')['playback']
- for item in data['items']['item']:
+ for protocol in self._PROTOCOLS:
+ try:
+ data = self._download_json(
+ api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
+ video_id, 'Downloading play JSON')['playback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), video_id)['error']
+ error_code = error.get('code')
+ if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif error_code == 'SESSION_NOT_AUTHENTICATED':
+ self.raise_login_required()
+ raise ExtractorError(error['description'])
+ raise
+ items = try_get(data, lambda x: x['items']['item'])
+ if not items:
+ continue
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ if not isinstance(item, dict):
+ continue
video_url = item.get('url')
if not video_url or video_url in format_urls:
continue
@@ -57,9 +79,13 @@ class TV2IE(InfoExtractor):
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False))
+ if not data.get('drmProtected'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, format_id, fatal=False))
elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
pass
else:
@@ -69,34 +95,30 @@ class TV2IE(InfoExtractor):
'tbr': int_or_none(item.get('bitrate')),
'filesize': int_or_none(item.get('fileSize')),
})
+ if not formats and data.get('drmProtected'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats)
asset = self._download_json(
- 'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
- video_id, 'Downloading metadata JSON')['asset']
-
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
title = asset['title']
- description = asset.get('description')
- timestamp = parse_iso8601(asset.get('createTime'))
- duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
- view_count = int_or_none(asset.get('views'))
- categories = asset.get('keywords', '').split(',')
thumbnails = [{
'id': thumbnail.get('@type'),
'url': thumbnail.get('url'),
- } for _, thumbnail in asset.get('imageVersions', {}).items()]
+ } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
return {
'id': video_id,
'url': video_url,
'title': title,
- 'description': description,
+ 'description': strip_or_none(asset.get('description')),
'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'duration': duration,
- 'view_count': view_count,
- 'categories': categories,
+ 'timestamp': parse_iso8601(asset.get('createTime')),
+ 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
+ 'view_count': int_or_none(asset.get('views')),
+ 'categories': asset.get('keywords', '').split(','),
'formats': formats,
}
@@ -108,7 +130,7 @@ class TV2ArticleIE(InfoExtractor):
'info_dict': {
'id': '6930542',
'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret',
- 'description': 'md5:339573779d3eea3542ffe12006190954',
+ 'description': 'De fire siktede nekter fortsatt for å ha stjålet pingvinbabyene, men innrømmer å ha åpnet luken til de små kyllingene.',
},
'playlist_count': 2,
}, {
@@ -126,7 +148,7 @@ class TV2ArticleIE(InfoExtractor):
if not assets:
# New embed pattern
- for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage):
+ for v in re.findall(r'(?s)TV2ContentboxVideo\(({.+?})\)', webpage):
video = self._parse_json(
v, playlist_id, transform_source=js_to_json, fatal=False)
if not video:
@@ -143,3 +165,28 @@ class TV2ArticleIE(InfoExtractor):
description = remove_end(self._og_search_description(webpage), ' - TV2.no')
return self.playlist_result(entries, playlist_id, title, description)
+
+
+class KatsomoIE(TV2IE):
+ _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
+ 'info_dict': {
+ 'id': '1181321',
+ 'ext': 'mp4',
+ 'title': 'MTV Uutiset Live',
+ 'description': 'Päätöksen teki Pelicansin hallitus.',
+ 'timestamp': 1575116484,
+ 'upload_date': '20191130',
+ 'duration': 37.12,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ _API_DOMAIN = 'api.katsomo.fi'
+ _PROTOCOLS = ('HLS', 'MPD')
+ _GEO_COUNTRIES = ['FI']
diff --git a/youtube_dl/extractor/tv2dk.py b/youtube_dl/extractor/tv2dk.py
new file mode 100644
index 000000000..8bda9348d
--- /dev/null
+++ b/youtube_dl/extractor/tv2dk.py
@@ -0,0 +1,154 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ js_to_json,
+ url_or_none,
+)
+
+
+class TV2DKIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ tvsyd|
+ tv2ostjylland|
+ tvmidtvest|
+ tv2fyn|
+ tv2east|
+ tv2lorry|
+ tv2nord
+ )\.dk/
+ (:[^/]+/)*
+ (?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
+ 'info_dict': {
+ 'id': '0_52jmwa0p',
+ 'ext': 'mp4',
+ 'title': '19:30 - 28. okt. 2019',
+ 'timestamp': 1572290248,
+ 'upload_date': '20191028',
+ 'uploader_id': 'tvsyd',
+ 'duration': 1347,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ entries = []
+ for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
+ video = extract_attributes(video_el)
+ kaltura_id = video.get('data-entryid')
+ if not kaltura_id:
+ continue
+ partner_id = video.get('data-partnerid')
+ if not partner_id:
+ continue
+ entries.append(self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
+ video_id=kaltura_id))
+ return self.playlist_result(entries)
+
+
+class TV2DKBornholmPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
+ 'info_dict': {
+ 'id': '781021',
+ 'ext': 'mp4',
+ 'title': '12Nyheder-27.11.19',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+ data=json.dumps({
+ 'playlist_id': video_id,
+ 'serienavn': '',
+ }).encode(), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['d']
+
+ # TODO: generalize flowplayer
+ title = self._search_regex(
+ r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
+ group='value')
+ sources = self._parse_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
+ video_id, js_to_json)
+
+ formats = []
+ srcs = set()
+ for source in sources:
+ src = url_or_none(source.get('src'))
+ if not src:
+ continue
+ if src in srcs:
+ continue
+ srcs.add(src)
+ ext = determine_ext(src)
+ src_type = source.get('type')
+ if src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tv3.py b/youtube_dl/extractor/tv3.py
deleted file mode 100644
index 3867ec90d..000000000
--- a/youtube_dl/extractor/tv3.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class TV3IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
- _TEST = {
- 'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
- 'info_dict': {
- 'id': '4659127992001',
- 'ext': 'mp4',
- 'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
- 'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
- 'uploader_id': '3812193411001',
- 'upload_date': '20151213',
- 'timestamp': 1449975272,
- },
- 'expected_warnings': [
- 'Failed to download MPD manifest'
- ],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
- return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py
index 51923e44a..c498b0191 100644
--- a/youtube_dl/extractor/tv4.py
+++ b/youtube_dl/extractor/tv4.py
@@ -72,8 +72,13 @@ class TV4IE(InfoExtractor):
video_id = self._match_id(url)
info = self._download_json(
- 'http://www.tv4play.se/player/assets/%s.json' % video_id,
- video_id, 'Downloading video info JSON')
+ 'https://playback-api.b17g.net/asset/%s' % video_id,
+ video_id, 'Downloading video info JSON', query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls,dash',
+ 'drm': 'widevine',
+ })['metadata']
title = info['title']
@@ -94,7 +99,7 @@ class TV4IE(InfoExtractor):
manifest_url.replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
formats.extend(self._extract_ism_formats(
- re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
+ re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
video_id, ism_id='mss', fatal=False))
if not formats and info.get('is_geo_restricted'):
@@ -111,5 +116,9 @@ class TV4IE(InfoExtractor):
'timestamp': parse_iso8601(info.get('broadcast_date_time')),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('image'),
- 'is_live': info.get('is_live') is True,
+ 'is_live': info.get('isLive') is True,
+ 'series': info.get('seriesTitle'),
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode': info.get('episodeTitle'),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
}
diff --git a/youtube_dl/extractor/tv5mondeplus.py b/youtube_dl/extractor/tv5mondeplus.py
index 88b6baa31..b7fe082b9 100644
--- a/youtube_dl/extractor/tv5mondeplus.py
+++ b/youtube_dl/extractor/tv5mondeplus.py
@@ -3,31 +3,51 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- clean_html,
determine_ext,
extract_attributes,
- get_element_by_class,
int_or_none,
parse_duration,
- parse_iso8601,
)
class TV5MondePlusIE(InfoExtractor):
IE_DESC = 'TV5MONDE+'
- _VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
- _TEST = {
- 'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
- 'md5': '12130fc199f020673138a83466542ec6',
+ _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # movie
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
+ 'md5': '8cbde5ea7b296cf635073e27895e227f',
'info_dict': {
- 'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
+ 'id': '822a4756-0712-7329-1859-a13ac7fd1407',
+ 'display_id': 'rendez-vous-a-atlit',
'ext': 'mp4',
- 'title': 'Tdah, mon amour - Enfants',
- 'description': 'md5:230e3aca23115afcf8006d1bece6df74',
- 'upload_date': '20170401',
- 'timestamp': 1491022860,
- }
- }
+ 'title': 'Rendez-vous à Atlit',
+ 'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
+ 'upload_date': '20200130',
+ },
+ }, {
+ # series episode
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
+ 'info_dict': {
+ 'id': '0df7007c-4900-3936-c601-87a13a93a068',
+ 'display_id': 'c-est-la-vie-ennemie-juree',
+ 'ext': 'mp4',
+ 'title': "C'est la vie - Ennemie jurée",
+ 'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
+ 'upload_date': '20200130',
+ 'series': "C'est la vie",
+ 'episode': 'Ennemie jurée',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
+ 'only_matching': True,
+ }]
_GEO_BYPASS = False
def _real_extract(self, url):
@@ -37,11 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
- series = get_element_by_class('video-detail__title', webpage)
- title = episode = get_element_by_class(
- 'video-detail__subtitle', webpage) or series
- if series and series != title:
- title = '%s - %s' % (series, title)
+ title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
vpl_data = extract_attributes(self._search_regex(
r'(<[^>]+class="video_player_loader"[^>]+>)',
webpage, 'video player loader'))
@@ -65,15 +81,37 @@ class TV5MondePlusIE(InfoExtractor):
})
self._sort_formats(formats)
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ series = self._html_search_regex(
+ r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
+ 'series', default=None)
+
+ if series and series != title:
+ title = '%s - %s' % (series, title)
+
+ upload_date = self._search_regex(
+ r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
+ webpage, 'upload date', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('_', '')
+
+ video_id = self._search_regex(
+ (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+
return {
- 'id': display_id,
+ 'id': video_id,
'display_id': display_id,
'title': title,
- 'description': clean_html(get_element_by_class('video-detail__description', webpage)),
+ 'description': description,
'thumbnail': vpl_data.get('data-image'),
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
- 'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
+ 'upload_date': upload_date,
'formats': formats,
- 'episode': episode,
'series': series,
+ 'episode': episode,
}
diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py
index 0b863df2f..443f46e8a 100644
--- a/youtube_dl/extractor/tva.py
+++ b/youtube_dl/extractor/tva.py
@@ -9,8 +9,8 @@ from ..utils import (
class TVAIE(InfoExtractor):
- _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
- _TEST = {
+ _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
+ _TESTS = [{
'url': 'https://videos.tva.ca/details/_5596811470001',
'info_dict': {
'id': '5596811470001',
@@ -24,7 +24,10 @@ class TVAIE(InfoExtractor):
# m3u8 download
'skip_download': True,
}
- }
+ }, {
+ 'url': 'https://video.tva.ca/details/_5596811470001',
+ 'only_matching': True,
+ }]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dl/extractor/tvigle.py
index 3475ef4c3..180259aba 100644
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dl/extractor/tvigle.py
@@ -9,6 +9,8 @@ from ..utils import (
float_or_none,
int_or_none,
parse_age_limit,
+ try_get,
+ url_or_none,
)
@@ -23,11 +25,10 @@ class TvigleIE(InfoExtractor):
_TESTS = [
{
'url': 'http://www.tvigle.ru/video/sokrat/',
- 'md5': '36514aed3657d4f70b4b2cef8eb520cd',
'info_dict': {
'id': '1848932',
'display_id': 'sokrat',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Сократ',
'description': 'md5:d6b92ffb7217b4b8ebad2e7665253c17',
'duration': 6586,
@@ -37,7 +38,6 @@ class TvigleIE(InfoExtractor):
},
{
'url': 'http://www.tvigle.ru/video/vladimir-vysotskii/vedushchii-teleprogrammy-60-minut-ssha-o-vladimire-vysotskom/',
- 'md5': 'e7efe5350dd5011d0de6550b53c3ba7b',
'info_dict': {
'id': '5142516',
'ext': 'flv',
@@ -62,7 +62,7 @@ class TvigleIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_regex(
(r'<div[^>]+class=["\']player["\'][^>]+id=["\'](\d+)',
- r'var\s+cloudId\s*=\s*["\'](\d+)',
+ r'cloudId\s*=\s*["\'](\d+)',
r'class="video-preview current_playing" id="(\d+)"'),
webpage, 'video id')
@@ -90,21 +90,40 @@ class TvigleIE(InfoExtractor):
age_limit = parse_age_limit(item.get('ageRestrictions'))
formats = []
- for vcodec, fmts in item['videos'].items():
+ for vcodec, url_or_fmts in item['videos'].items():
if vcodec == 'hls':
- continue
- for format_id, video_url in fmts.items():
- if format_id == 'm3u8':
+ m3u8_url = url_or_none(url_or_fmts)
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif vcodec == 'dash':
+ mpd_url = url_or_none(url_or_fmts)
+ if not mpd_url:
+ continue
+ formats.extend(self._extract_mpd_formats(
+ mpd_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ if not isinstance(url_or_fmts, dict):
continue
- height = self._search_regex(
- r'^(\d+)[pP]$', format_id, 'height', default=None)
- formats.append({
- 'url': video_url,
- 'format_id': '%s-%s' % (vcodec, format_id),
- 'vcodec': vcodec,
- 'height': int_or_none(height),
- 'filesize': int_or_none(item.get('video_files_size', {}).get(vcodec, {}).get(format_id)),
- })
+ for format_id, video_url in url_or_fmts.items():
+ if format_id == 'm3u8':
+ continue
+ video_url = url_or_none(video_url)
+ if not video_url:
+ continue
+ height = self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)
+ filesize = int_or_none(try_get(
+ item, lambda x: x['video_files_size'][vcodec][format_id]))
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%s-%s' % (vcodec, format_id),
+ 'vcodec': vcodec,
+ 'height': int_or_none(height),
+ 'filesize': filesize,
+ })
self._sort_formats(formats)
return {
diff --git a/youtube_dl/extractor/tvland.py b/youtube_dl/extractor/tvland.py
index 957cf1ea2..791144128 100644
--- a/youtube_dl/extractor/tvland.py
+++ b/youtube_dl/extractor/tvland.py
@@ -1,32 +1,35 @@
# coding: utf-8
from __future__ import unicode_literals
-from .mtv import MTVServicesInfoExtractor
+from .spike import ParamountNetworkIE
-class TVLandIE(MTVServicesInfoExtractor):
+class TVLandIE(ParamountNetworkIE):
IE_NAME = 'tvland.com'
_VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
_FEED_URL = 'http://www.tvland.com/feeds/mrss/'
_TESTS = [{
# Geo-restricted. Without a proxy metadata are still there. With a
# proxy it redirects to http://m.tvland.com/app/
- 'url': 'http://www.tvland.com/episodes/hqhps2/everybody-loves-raymond-the-invasion-ep-048',
+ 'url': 'https://www.tvland.com/episodes/s04pzf/everybody-loves-raymond-the-dog-season-1-ep-19',
'info_dict': {
- 'description': 'md5:80973e81b916a324e05c14a3fb506d29',
- 'title': 'The Invasion',
+ 'description': 'md5:84928e7a8ad6649371fbf5da5e1ad75a',
+ 'title': 'The Dog',
},
- 'playlist': [],
+ 'playlist_mincount': 5,
}, {
- 'url': 'http://www.tvland.com/video-clips/zea2ev/younger-younger--hilary-duff---little-lies',
+ 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6',
'md5': 'e2c6389401cf485df26c79c247b08713',
'info_dict': {
- 'id': 'b8697515-4bbe-4e01-83d5-fa705ce5fa88',
+ 'id': '891f7d3c-5b5b-4753-b879-b7ba1a601757',
'ext': 'mp4',
- 'title': 'Younger|December 28, 2015|2|NO-EPISODE#|Younger: Hilary Duff - Little Lies',
- 'description': 'md5:7d192f56ca8d958645c83f0de8ef0269',
- 'upload_date': '20151228',
- 'timestamp': 1451289600,
+ 'title': 'Younger|April 30, 2019|6|NO-EPISODE#|A First Look at Younger Season 6',
+ 'description': 'md5:595ea74578d3a888ae878dfd1c7d4ab2',
+ 'upload_date': '20190430',
+ 'timestamp': 1556658000,
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
'url': 'http://www.tvland.com/full-episodes/iu0hz6/younger-a-kiss-is-just-a-kiss-season-3-ep-301',
diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dl/extractor/tvn24.py
index 6590e1fd0..de0fb5063 100644
--- a/youtube_dl/extractor/tvn24.py
+++ b/youtube_dl/extractor/tvn24.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
+ NO_DEFAULT,
unescapeHTML,
)
@@ -17,10 +18,22 @@ class TVN24IE(InfoExtractor):
'id': '1584444',
'ext': 'mp4',
'title': '"Święta mają być wesołe, dlatego, ludziska, wszyscy pod jemiołę"',
- 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości "Szkła kontaktowego".',
+ 'description': 'Wyjątkowe orędzie Artura Andrusa, jednego z gości Szkła kontaktowego.',
'thumbnail': 're:https?://.*[.]jpeg',
}
}, {
+ # different layout
+ 'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
+ 'info_dict': {
+ 'id': '1771763',
+ 'ext': 'mp4',
+ 'title': 'Pnącza, ptaki i iglaki (odc. 691 /HGTV odc. 29)',
+ 'thumbnail': 're:https?://.*',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
'only_matching': True,
}, {
@@ -35,18 +48,21 @@ class TVN24IE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ display_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(url, display_id)
- title = self._og_search_title(webpage)
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
+ webpage, 'title')
- def extract_json(attr, name, fatal=True):
+ def extract_json(attr, name, default=NO_DEFAULT, fatal=True):
return self._parse_json(
self._search_regex(
r'\b%s=(["\'])(?P<json>(?!\1).+?)\1' % attr, webpage,
- name, group='json', fatal=fatal) or '{}',
- video_id, transform_source=unescapeHTML, fatal=fatal)
+ name, group='json', default=default, fatal=fatal) or '{}',
+ display_id, transform_source=unescapeHTML, fatal=fatal)
quality_data = extract_json('data-quality', 'formats')
@@ -59,16 +75,24 @@ class TVN24IE(InfoExtractor):
})
self._sort_formats(formats)
- description = self._og_search_description(webpage)
+ description = self._og_search_description(webpage, default=None)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_regex(
r'\bdata-poster=(["\'])(?P<url>(?!\1).+?)\1', webpage,
'thumbnail', group='url')
+ video_id = None
+
share_params = extract_json(
- 'data-share-params', 'share params', fatal=False)
+ 'data-share-params', 'share params', default=None)
if isinstance(share_params, dict):
- video_id = share_params.get('id') or video_id
+ video_id = share_params.get('id')
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-vid-id=["\'](\d+)', webpage, 'video id',
+ default=None) or self._search_regex(
+ r',(\d+)\.html', url, 'video id', default=display_id)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py
index 60937616f..9c8a8a0dc 100644
--- a/youtube_dl/extractor/tvnow.py
+++ b/youtube_dl/extractor/tvnow.py
@@ -10,8 +10,9 @@ from ..utils import (
int_or_none,
parse_iso8601,
parse_duration,
- try_get,
+ str_or_none,
update_url_query,
+ urljoin,
)
@@ -24,8 +25,7 @@ class TVNowBaseIE(InfoExtractor):
def _call_api(self, path, video_id, query):
return self._download_json(
- 'https://api.tvnow.de/v3/' + path,
- video_id, query=query)
+ 'https://api.tvnow.de/v3/' + path, video_id, query=query)
def _extract_video(self, info, display_id):
video_id = compat_str(info['id'])
@@ -47,15 +47,23 @@ class TVNowBaseIE(InfoExtractor):
r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
'.ism/' + suffix, manifest_url))
- formats = self._extract_mpd_formats(
- url_repl('dash', '.mpd'), video_id,
- mpd_id='dash', fatal=False)
- formats.extend(self._extract_ism_formats(
- url_repl('hss', 'Manifest'),
- video_id, ism_id='mss', fatal=False))
- formats.extend(self._extract_m3u8_formats(
- url_repl('hls', '.m3u8'), video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
+ def make_urls(proto, suffix):
+ urls = [url_repl(proto, suffix)]
+ hd_url = urls[0].replace('/manifest/', '/ngvod/')
+ if hd_url != urls[0]:
+ urls.append(hd_url)
+ return urls
+
+ for man_url in make_urls('dash', '.mpd'):
+ formats = self._extract_mpd_formats(
+ man_url, video_id, mpd_id='dash', fatal=False)
+ for man_url in make_urls('hss', 'Manifest'):
+ formats.extend(self._extract_ism_formats(
+ man_url, video_id, ism_id='mss', fatal=False))
+ for man_url in make_urls('hls', '.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+ fatal=False))
if formats:
break
else:
@@ -108,6 +116,11 @@ class TVNowIE(TVNowBaseIE):
(?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
'''
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
+ else super(TVNowIE, cls).suitable(url))
+
_TESTS = [{
'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
'info_dict': {
@@ -116,7 +129,6 @@ class TVNowIE(TVNowBaseIE):
'ext': 'mp4',
'title': 'Der neue Porsche 911 GT 3',
'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
- 'thumbnail': r're:^https?://.*\.jpg$',
'timestamp': 1495994400,
'upload_date': '20170528',
'duration': 5283,
@@ -161,136 +173,314 @@ class TVNowIE(TVNowBaseIE):
info = self._call_api(
'movies/' + display_id, display_id, query={
'fields': ','.join(self._VIDEO_FIELDS),
- 'station': mobj.group(1),
})
return self._extract_video(info, display_id)
-class TVNowListBaseIE(TVNowBaseIE):
- _SHOW_VALID_URL = r'''(?x)
- (?P<base_url>
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/[^/]+/
- (?P<show_id>[^/]+)
- )
+class TVNowNewIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:shows|serien))/
+ (?P<show>[^/]+)-\d+/
+ [^/]+/
+ episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
'''
- def _extract_list_info(self, display_id, show_id):
- fields = list(self._SHOW_FIELDS)
- fields.extend('formatTabs.%s' % field for field in self._SEASON_FIELDS)
- fields.extend(
- 'formatTabs.formatTabPages.container.movies.%s' % field
- for field in self._VIDEO_FIELDS)
- return self._call_api(
- 'formats/seo', display_id, query={
- 'fields': ','.join(fields),
- 'name': show_id + '.php'
- })
-
-
-class TVNowListIE(TVNowListBaseIE):
- _VALID_URL = r'%s/(?:list|jahr)/(?P<id>[^?\#&]+)' % TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'only_matching': True,
+ }]
- _SHOW_FIELDS = ('title', )
- _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
- _VIDEO_FIELDS = ('id', 'headline', 'seoUrl', )
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
+ show, episode = mobj.group('show', 'episode')
+ return self.url_result(
+ # Rewrite new URLs to the old format and use extraction via old API
+ # at api.tvnow.de as a loophole for bypassing premium content checks
+ '%s/%s/%s' % (base_url, show, episode),
+ ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
+
+
+class TVNowNewBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, query={}):
+ result = self._download_json(
+ 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
+ error = result.get('error')
+ if error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ return result
+
+
+r"""
+TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
+when api.tvnow.de is shut down. This version can't bypass premium checks though.
+class TVNowIE(TVNowNewBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:shows|serien)/[^/]+/
+ (?:[^/]+/)+
+ (?P<display_id>[^/?$&]+)-(?P<id>\d+)
+ '''
_TESTS = [{
- 'url': 'https://www.tvnow.de/rtl/30-minuten-deutschland/list/aktuell',
+ # episode with annual navigation
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
'info_dict': {
- 'id': '28296',
- 'title': '30 Minuten Deutschland - Aktuell',
+ 'id': '331082',
+ 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'ext': 'mp4',
+ 'title': 'Der neue Porsche 911 GT 3',
+ 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1495994400,
+ 'upload_date': '20170528',
+ 'duration': 5283,
+ 'series': 'GRIP - Das Motormagazin',
+ 'season_number': 14,
+ 'episode_number': 405,
+ 'episode': 'Der neue Porsche 911 GT 3',
},
- 'playlist_mincount': 1,
}, {
- 'url': 'https://www.tvnow.de/vox/ab-ins-beet/list/staffel-14',
+ # rtl2, episode with season navigation
+ 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
+ 'only_matching': True,
+ }, {
+ # rtlnitro
+ 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
'only_matching': True,
}, {
- 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/2018/3',
+ # superrtl
+ 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
+ 'only_matching': True,
+ }, {
+ # ntv
+ 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
+ 'only_matching': True,
+ }, {
+ # vox
+ 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
'only_matching': True,
}]
- @classmethod
- def suitable(cls, url):
- return (False if TVNowIE.suitable(url)
- else super(TVNowListIE, cls).suitable(url))
+ def _extract_video(self, info, url, display_id):
+ config = info['config']
+ source = config['source']
- def _real_extract(self, url):
- base_url, show_id, season_id = re.match(self._VALID_URL, url).groups()
+ video_id = compat_str(info.get('id') or source['videoId'])
+ title = source['title'].strip()
- list_info = self._extract_list_info(season_id, show_id)
+ paths = []
+ for manifest_url in (info.get('manifest') or {}).values():
+ if not manifest_url:
+ continue
+ manifest_url = update_url_query(manifest_url, {'filter': ''})
+ path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+ if path in paths:
+ continue
+ paths.append(path)
- season = next(
- season for season in list_info['formatTabs']['items']
- if season.get('seoheadline') == season_id)
+ def url_repl(proto, suffix):
+ return re.sub(
+ r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+ r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+ '.ism/' + suffix, manifest_url))
- title = list_info.get('title')
- headline = season.get('headline')
- if title and headline:
- title = '%s - %s' % (title, headline)
+ formats = self._extract_mpd_formats(
+ url_repl('dash', '.mpd'), video_id,
+ mpd_id='dash', fatal=False)
+ formats.extend(self._extract_ism_formats(
+ url_repl('hss', 'Manifest'),
+ video_id, ism_id='mss', fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ url_repl('hls', '.m3u8'), video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ if formats:
+ break
else:
- title = headline or title
+ if try_get(info, lambda x: x['rights']['isDrm']):
+ raise ExtractorError(
+ 'Video %s is DRM protected' % video_id, expected=True)
+ if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
+ raise self.raise_geo_restricted()
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+ self._sort_formats(formats)
+
+ description = source.get('description')
+ thumbnail = url_or_none(source.get('poster'))
+ timestamp = unified_timestamp(source.get('previewStart'))
+ duration = parse_duration(source.get('length'))
+
+ series = source.get('format')
+ season_number = int_or_none(self._search_regex(
+ r'staffel-(\d+)', url, 'season number', default=None))
+ episode_number = int_or_none(self._search_regex(
+ r'episode-(\d+)', url, 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'episode': title,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+ info = self._call_api('player/' + video_id, video_id)
+ return self._extract_video(info, video_id, display_id)
+"""
+
+
+class TVNowListBaseIE(TVNowNewBaseIE):
+ _SHOW_VALID_URL = r'''(?x)
+ (?P<base_url>
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
+ [^/?#&]+-(?P<show_id>\d+)
+ )
+ '''
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url)
+ else super(TVNowListBaseIE, cls).suitable(url))
+
+ def _extract_items(self, url, show_id, list_id, query):
+ items = self._call_api(
+ 'teaserrow/format/episode/' + show_id, list_id,
+ query=query)['items']
entries = []
- for container in season['formatTabPages']['items']:
- items = try_get(
- container, lambda x: x['container']['movies']['items'],
- list) or []
- for info in items:
- seo_url = info.get('seoUrl')
- if not seo_url:
- continue
- video_id = info.get('id')
- entries.append(self.url_result(
- '%s/%s/player' % (base_url, seo_url), TVNowIE.ie_key(),
- compat_str(video_id) if video_id else None))
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_url = urljoin(url, item.get('url'))
+ if not item_url:
+ continue
+ video_id = str_or_none(item.get('id') or item.get('videoId'))
+ item_title = item.get('subheadline') or item.get('text')
+ entries.append(self.url_result(
+ item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
+ video_title=item_title))
- return self.playlist_result(
- entries, compat_str(season.get('id') or season_id), title)
+ return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
-class TVNowShowIE(TVNowListBaseIE):
- _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+class TVNowSeasonIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
+ 'info_dict': {
+ 'id': '1815/13',
+ },
+ 'playlist_mincount': 22,
+ }]
+
+ def _real_extract(self, url):
+ _, show_id, season_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_items(
+ url, show_id, season_id, {'season': season_id})
- _SHOW_FIELDS = ('id', 'title', )
- _SEASON_FIELDS = ('id', 'headline', 'seoheadline', )
- _VIDEO_FIELDS = ()
+class TVNowAnnualIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
_TESTS = [{
- 'url': 'https://www.tvnow.at/vox/ab-ins-beet',
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
'info_dict': {
- 'id': 'ab-ins-beet',
- 'title': 'Ab ins Beet!',
+ 'id': '1669/2017-05',
},
- 'playlist_mincount': 7,
- }, {
- 'url': 'https://www.tvnow.at/vox/ab-ins-beet/list',
- 'only_matching': True,
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ _, show_id, year, month = re.match(self._VALID_URL, url).groups()
+ return self._extract_items(
+ url, show_id, '%s-%s' % (year, month), {
+ 'year': int(year),
+ 'month': int(month),
+ })
+
+
+class TVNowShowIE(TVNowListBaseIE):
+ _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ # annual navigationType
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
+ 'info_dict': {
+ 'id': '1669',
+ },
+ 'playlist_mincount': 73,
}, {
- 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/jahr/',
- 'only_matching': True,
+ # season navigationType
+ 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
+ 'info_dict': {
+ 'id': '11471',
+ },
+ 'playlist_mincount': 3,
}]
@classmethod
def suitable(cls, url):
- return (False if TVNowIE.suitable(url) or TVNowListIE.suitable(url)
+ return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
else super(TVNowShowIE, cls).suitable(url))
def _real_extract(self, url):
base_url, show_id = re.match(self._VALID_URL, url).groups()
- list_info = self._extract_list_info(show_id, show_id)
+ result = self._call_api(
+ 'teaserrow/format/navigation/' + show_id, show_id)
+
+ items = result['items']
entries = []
- for season_info in list_info['formatTabs']['items']:
- season_url = season_info.get('seoheadline')
- if not season_url:
- continue
- season_id = season_info.get('id')
- entries.append(self.url_result(
- '%s/list/%s' % (base_url, season_url), TVNowListIE.ie_key(),
- compat_str(season_id) if season_id else None,
- season_info.get('headline')))
+ navigation = result.get('navigationType')
+ if navigation == 'annual':
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ year = int_or_none(item.get('year'))
+ if year is None:
+ continue
+ months = item.get('months')
+ if not isinstance(months, list):
+ continue
+ for month_dict in months:
+ if not isinstance(month_dict, dict) or not month_dict:
+ continue
+ month_number = int_or_none(list(month_dict.keys())[0])
+ if month_number is None:
+ continue
+ entries.append(self.url_result(
+ '%s/%04d-%02d' % (base_url, year, month_number),
+ ie=TVNowAnnualIE.ie_key()))
+ elif navigation == 'season':
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ season_number = int_or_none(item.get('season'))
+ if season_number is None:
+ continue
+ entries.append(self.url_result(
+ '%s/staffel-%d' % (base_url, season_number),
+ ie=TVNowSeasonIE.ie_key()))
+ else:
+ raise ExtractorError('Unknown navigationType')
- return self.playlist_result(entries, show_id, list_info.get('title'))
+ return self.playlist_result(entries, show_id)
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dl/extractor/tvp.py
index 3954f0b93..accff75b5 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dl/extractor/tvp.py
@@ -1,14 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
clean_html,
- get_element_by_attribute,
+ determine_ext,
ExtractorError,
+ get_element_by_attribute,
+ orderedSet,
)
@@ -19,12 +21,12 @@ class TVPIE(InfoExtractor):
_TESTS = [{
'url': 'https://vod.tvp.pl/video/czas-honoru,i-seria-odc-13,194536',
- 'md5': '8aa518c15e5cc32dfe8db400dc921fbb',
+ 'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
'info_dict': {
'id': '194536',
'ext': 'mp4',
- 'title': 'Czas honoru, I seria – odc. 13',
- 'description': 'md5:381afa5bca72655fe94b05cfe82bf53d',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ 'description': 'md5:437f48b93558370b031740546b696e24',
},
}, {
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
@@ -45,6 +47,7 @@ class TVPIE(InfoExtractor):
'title': 'Wiadomości, 28.09.2017, 19:30',
'description': 'Wydanie główne codziennego serwisu informacyjnego.'
},
+ 'skip': 'HTTP Error 404: Not Found',
}, {
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
'only_matching': True,
@@ -75,8 +78,10 @@ class TVPIE(InfoExtractor):
return {
'_type': 'url_transparent',
'url': 'tvp:' + video_id,
- 'description': self._og_search_description(webpage, default=None),
- 'thumbnail': self._og_search_thumbnail(webpage),
+ 'description': self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'description', webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
'ie_key': 'TVPEmbed',
}
@@ -87,6 +92,15 @@ class TVPEmbedIE(InfoExtractor):
_VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)'
_TESTS = [{
+ 'url': 'tvp:194536',
+ 'md5': 'a21eb0aa862f25414430f15fdfb9e76c',
+ 'info_dict': {
+ 'id': '194536',
+ 'ext': 'mp4',
+ 'title': 'Czas honoru, odc. 13 – Władek',
+ },
+ }, {
+ # not available
'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268',
'md5': '8c9cd59d16edabf39331f93bf8a766c7',
'info_dict': {
@@ -94,6 +108,7 @@ class TVPEmbedIE(InfoExtractor):
'ext': 'mp4',
'title': 'Panorama, 07.12.2015, 15:40',
},
+ 'skip': 'Transmisja została zakończona lub materiał niedostępny',
}, {
'url': 'tvp:22670268',
'only_matching': True,
@@ -105,10 +120,13 @@ class TVPEmbedIE(InfoExtractor):
webpage = self._download_webpage(
'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id)
- error_massage = get_element_by_attribute('class', 'msg error', webpage)
- if error_massage:
+ error = self._html_search_regex(
+ r'(?s)<p[^>]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)</p>',
+ webpage, 'error', default=None) or clean_html(
+ get_element_by_attribute('class', 'msg error', webpage))
+ if error:
raise ExtractorError('%s said: %s' % (
- self.IE_NAME, clean_html(error_massage)), expected=True)
+ self.IE_NAME, clean_html(error)), expected=True)
title = self._search_regex(
r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1',
@@ -180,48 +198,55 @@ class TVPEmbedIE(InfoExtractor):
}
-class TVPSeriesIE(InfoExtractor):
+class TVPWebsiteIE(InfoExtractor):
IE_NAME = 'tvp:series'
- _VALID_URL = r'https?://vod\.tvp\.pl/(?:[^/]+/){2}(?P<id>[^/]+)/?$'
+ _VALID_URL = r'https?://vod\.tvp\.pl/website/(?P<display_id>[^,]+),(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://vod.tvp.pl/filmy-fabularne/filmy-za-darmo/ogniem-i-mieczem',
+ # series
+ 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video',
'info_dict': {
- 'title': 'Ogniem i mieczem',
- 'id': '4278026',
+ 'id': '38678312',
},
- 'playlist_count': 4,
+ 'playlist_count': 115,
}, {
- 'url': 'http://vod.tvp.pl/audycje/podroze/boso-przez-swiat',
+ # film
+ 'url': 'https://vod.tvp.pl/website/gloria,35139666',
'info_dict': {
- 'title': 'Boso przez świat',
- 'id': '9329207',
+ 'id': '36637049',
+ 'ext': 'mp4',
+ 'title': 'Gloria, Gloria',
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'playlist_count': 86,
+ 'add_ie': ['TVPEmbed'],
+ }, {
+ 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id, tries=5)
-
- title = self._html_search_regex(
- r'(?s) id=[\'"]path[\'"]>(?:.*? / ){2}(.*?)</span>', webpage, 'series')
- playlist_id = self._search_regex(r'nodeId:\s*(\d+)', webpage, 'playlist id')
- playlist = self._download_webpage(
- 'http://vod.tvp.pl/vod/seriesAjax?type=series&nodeId=%s&recommend'
- 'edId=0&sort=&page=0&pageSize=10000' % playlist_id, display_id, tries=5,
- note='Downloading playlist')
-
- videos_paths = re.findall(
- '(?s)class="shortTitle">.*?href="(/[^"]+)', playlist)
- entries = [
- self.url_result('http://vod.tvp.pl%s' % v_path, ie=TVPIE.ie_key())
- for v_path in videos_paths]
+ def _entries(self, display_id, playlist_id):
+ url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id)
+ for page_num in itertools.count(1):
+ page = self._download_webpage(
+ url, display_id, 'Downloading page %d' % page_num,
+ query={'page': page_num})
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'display_id': display_id,
- 'title': title,
- 'entries': entries,
- }
+ video_ids = orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\']/video/%s,[^,]+,(\d+)' % display_id,
+ page))
+
+ if not video_ids:
+ break
+
+ for video_id in video_ids:
+ yield self.url_result(
+ 'tvp:%s' % video_id, ie=TVPEmbedIE.ie_key(),
+ video_id=video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, playlist_id = mobj.group('display_id', 'id')
+ return self.playlist_result(
+ self._entries(display_id, playlist_id), playlist_id)
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
index 8f1ff3b76..3c2450dd0 100644
--- a/youtube_dl/extractor/tvplay.py
+++ b/youtube_dl/extractor/tvplay.py
@@ -6,7 +6,6 @@ import re
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
- compat_str,
compat_urlparse,
)
from ..utils import (
@@ -15,9 +14,7 @@ from ..utils import (
int_or_none,
parse_iso8601,
qualities,
- smuggle_url,
try_get,
- unsmuggle_url,
update_url_query,
url_or_none,
)
@@ -235,11 +232,6 @@ class TVPlayIE(InfoExtractor):
]
def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass({
- 'countries': smuggled_data.get('geo_countries'),
- })
-
video_id = self._match_id(url)
geo_country = self._search_regex(
r'https?://[^/]+\.([a-z]{2})', url,
@@ -285,8 +277,6 @@ class TVPlayIE(InfoExtractor):
'ext': ext,
}
if video_url.startswith('rtmp'):
- if smuggled_data.get('skip_rtmp'):
- continue
m = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
if not m:
@@ -347,115 +337,80 @@ class ViafreeIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
- viafree\.
- (?:
- (?:dk|no)/programmer|
- se/program
- )
- /(?:[^/]+/)+(?P<id>[^/?#&]+)
+ viafree\.(?P<country>dk|no|se)
+ /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+)
'''
_TESTS = [{
- 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
+ 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
'info_dict': {
- 'id': '395375',
+ 'id': '757786',
'ext': 'mp4',
- 'title': 'Husräddarna S02E02',
- 'description': 'md5:4db5c933e37db629b5a2f75dfb34829e',
- 'series': 'Husräddarna',
- 'season': 'Säsong 2',
+ 'title': 'Det beste vorspielet - Sesong 2 - Episode 1',
+ 'description': 'md5:b632cb848331404ccacd8cd03e83b4c3',
+ 'series': 'Det beste vorspielet',
'season_number': 2,
- 'duration': 2576,
- 'timestamp': 1400596321,
- 'upload_date': '20140520',
+ 'duration': 1116,
+ 'timestamp': 1471200600,
+ 'upload_date': '20160814',
},
'params': {
'skip_download': True,
},
- 'add_ie': [TVPlayIE.ie_key()],
}, {
# with relatedClips
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
- 'info_dict': {
- 'id': '758770',
- 'ext': 'mp4',
- 'title': 'Sommaren med YouTube-stjärnorna S01E01',
- 'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f',
- 'series': 'Sommaren med YouTube-stjärnorna',
- 'season': 'Säsong 1',
- 'season_number': 1,
- 'duration': 1326,
- 'timestamp': 1470905572,
- 'upload_date': '20160811',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [TVPlayIE.ie_key()],
+ 'only_matching': True,
}, {
# Different og:image URL schema
'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
'only_matching': True,
}, {
- 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
+ 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
'only_matching': True,
}, {
'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
'only_matching': True,
}]
+ _GEO_BYPASS = False
@classmethod
def suitable(cls, url):
return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
def _real_extract(self, url):
- video_id = self._match_id(url)
+ country, path = re.match(self._VALID_URL, url).groups()
+ content = self._download_json(
+ 'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path)
+ program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program']
+ guid = program['guid']
+ meta = content['meta']
+ title = meta['title']
- webpage = self._download_webpage(url, video_id)
+ try:
+ stream_href = self._download_json(
+ program['_links']['streamLink']['href'], guid,
+ headers=self.geo_verification_headers())['embedded']['prioritizedStreams'][0]['links']['stream']['href']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_geo_restricted(countries=[country])
+ raise
+
+ formats = self._extract_m3u8_formats(stream_href, guid, 'mp4')
+ self._sort_formats(formats)
+ episode = program.get('episode') or {}
- data = self._parse_json(
- self._search_regex(
- r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script',
- webpage, 'data', default='{}'),
- video_id, transform_source=lambda x: re.sub(
- r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*',
- 'null', x), fatal=False)
-
- video_id = None
-
- if data:
- video_id = try_get(
- data, lambda x: x['context']['dispatcher']['stores'][
- 'ContentPageProgramStore']['currentVideo']['id'],
- compat_str)
-
- # Fallback #1 (extract from og:image URL schema)
- if not video_id:
- thumbnail = self._og_search_thumbnail(webpage, default=None)
- if thumbnail:
- video_id = self._search_regex(
- # Patterns seen:
- # http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg
- # http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg
- r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/',
- thumbnail, 'video id', default=None)
-
- # Fallback #2. Extract from raw JSON string.
- # May extract wrong video id if relatedClips is present.
- if not video_id:
- video_id = self._search_regex(
- r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
- webpage, 'video id')
-
- return self.url_result(
- smuggle_url(
- 'mtg:%s' % video_id,
- {
- 'geo_countries': [
- compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]],
- # rtmp host mtgfs.fplive.net for viafree is unresolvable
- 'skip_rtmp': True,
- }),
- ie=TVPlayIE.ie_key(), video_id=video_id)
+ return {
+ 'id': guid,
+ 'title': title,
+ 'thumbnail': meta.get('image'),
+ 'description': meta.get('description'),
+ 'series': episode.get('seriesTitle'),
+ 'episode_number': int_or_none(episode.get('episodeNumber')),
+ 'season_number': int_or_none(episode.get('seasonNumber')),
+ 'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000),
+ 'timestamp': parse_iso8601(try_get(program, lambda x: x['availability']['start'])),
+ 'formats': formats,
+ }
class TVPlayHomeIE(InfoExtractor):
@@ -493,10 +448,9 @@ class TVPlayHomeIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
video_id = self._search_regex(
- r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
- default=None)
+ r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
- if video_id:
+ if len(video_id) < 8:
return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
@@ -537,8 +491,9 @@ class TVPlayHomeIE(InfoExtractor):
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None))
episode = self._search_regex(
- r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
- default=None, group='value')
+ (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'episode', default=None, group='value')
episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None))
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
index 4b3b3e705..74d14049b 100644
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ b/youtube_dl/extractor/twentyfourvideo.py
@@ -14,7 +14,18 @@ from ..utils import (
class TwentyFourVideoIE(InfoExtractor):
IE_NAME = '24video'
- _VALID_URL = r'https?://(?P<host>(?:www\.)?24video\.(?:net|me|xxx|sexy?|tube|adult))/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<host>
+ (?:(?:www|porno?)\.)?24video\.
+ (?:net|me|xxx|sexy?|tube|adult|site|vip)
+ )/
+ (?:
+ video/(?:(?:view|xml)/)?|
+ player/new24_play\.swf\?id=
+ )
+ (?P<id>\d+)
+ '''
_TESTS = [{
'url': 'http://www.24video.net/video/view/1044982',
@@ -42,6 +53,18 @@ class TwentyFourVideoIE(InfoExtractor):
}, {
'url': 'http://www.24video.tube/video/view/2363750',
'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.site/video/view/2640421',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.vip/video/view/1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porn.24video.net/video/2640421-vsya-takay',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py
new file mode 100644
index 000000000..2dbe89f5b
--- /dev/null
+++ b/youtube_dl/extractor/twitcasting.py
@@ -0,0 +1,81 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import urlencode_postdata
+
+import re
+
+
+class TwitCastingIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
+ 'md5': '745243cad58c4681dc752490f7540d7f',
+ 'info_dict': {
+ 'id': '2357609',
+ 'ext': 'mp4',
+ 'title': 'Live #2357609',
+ 'uploader_id': 'ivetesangalo',
+ 'description': "Moi! I'm live on TwitCasting from my iPhone.",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',
+ 'info_dict': {
+ 'id': '3689740',
+ 'ext': 'mp4',
+ 'title': 'Live playing something #3689740',
+ 'uploader_id': 'mttbernardini',
+ 'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'videopassword': 'abc',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ uploader_id = mobj.group('uploader_id')
+
+ video_password = self._downloader.params.get('videopassword')
+ request_data = None
+ if video_password:
+ request_data = urlencode_postdata({
+ 'password': video_password,
+ })
+ webpage = self._download_webpage(url, video_id, data=request_data)
+
+ title = self._html_search_regex(
+ r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
+ webpage, 'title', default=None) or self._html_search_meta(
+ 'twitter:title', webpage, fatal=True)
+
+ m3u8_url = self._search_regex(
+ (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
+ webpage, 'm3u8 url', group='url')
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:description', webpage)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
index 401615683..e211cd4c8 100644
--- a/youtube_dl/extractor/twitch.py
+++ b/youtube_dl/extractor/twitch.py
@@ -17,12 +17,12 @@ from ..compat import (
from ..utils import (
clean_html,
ExtractorError,
- float_or_none,
int_or_none,
orderedSet,
parse_duration,
parse_iso8601,
qualities,
+ str_or_none,
try_get,
unified_timestamp,
update_url_query,
@@ -52,8 +52,14 @@ class TwitchBaseIE(InfoExtractor):
def _call_api(self, path, item_id, *args, **kwargs):
headers = kwargs.get('headers', {}).copy()
- headers['Client-ID'] = self._CLIENT_ID
- kwargs['headers'] = headers
+ headers.update({
+ 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
+ 'Client-ID': self._CLIENT_ID,
+ })
+ kwargs.update({
+ 'headers': headers,
+ 'expected_status': (400, 410),
+ })
response = self._download_json(
'%s/%s' % (self._API_BASE, path), item_id,
*args, **compat_kwargs(kwargs))
@@ -134,9 +140,14 @@ class TwitchBaseIE(InfoExtractor):
def _prefer_source(self, formats):
try:
source = next(f for f in formats if f['format_id'] == 'Source')
- source['preference'] = 10
+ source['quality'] = 10
except StopIteration:
- pass # No Source stream present
+ for f in formats:
+ if '/chunked/' in f['url']:
+ f.update({
+ 'quality': 10,
+ 'format_note': 'Source',
+ })
self._sort_formats(formats)
@@ -183,12 +194,27 @@ class TwitchItemBaseIE(TwitchBaseIE):
is_live = False
else:
is_live = None
+ _QUALITIES = ('small', 'medium', 'large')
+ quality_key = qualities(_QUALITIES)
+ thumbnails = []
+ preview = info.get('preview')
+ if isinstance(preview, dict):
+ for thumbnail_id, thumbnail_url in preview.items():
+ thumbnail_url = url_or_none(thumbnail_url)
+ if not thumbnail_url:
+ continue
+ if thumbnail_id not in _QUALITIES:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail_id),
+ })
return {
'id': info['_id'],
'title': info.get('title') or 'Untitled Broadcast',
'description': info.get('description'),
'duration': int_or_none(info.get('length')),
- 'thumbnail': info.get('preview'),
+ 'thumbnails': thumbnails,
'uploader': info.get('channel', {}).get('display_name'),
'uploader_id': info.get('channel', {}).get('name'),
'timestamp': parse_iso8601(info.get('recorded_at')),
@@ -243,7 +269,7 @@ class TwitchVodIE(TwitchItemBaseIE):
https?://
(?:
(?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
- player\.twitch\.tv/\?.*?\bvideo=v
+ player\.twitch\.tv/\?.*?\bvideo=v?
)
(?P<id>\d+)
'''
@@ -301,6 +327,9 @@ class TwitchVodIE(TwitchItemBaseIE):
}, {
'url': 'https://www.twitch.tv/northernlion/video/291940395',
'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?video=480452374',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -312,13 +341,14 @@ class TwitchVodIE(TwitchItemBaseIE):
'Downloading %s access token' % self._ITEM_TYPE)
formats = self._extract_m3u8_formats(
- '%s/vod/%s?%s' % (
+ '%s/vod/%s.m3u8?%s' % (
self._USHER_BASE, item_id,
compat_urllib_parse_urlencode({
'allow_source': 'true',
'allow_audio_only': 'true',
'allow_spectre': 'true',
'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
'nauth': access_token['token'],
'nauthsig': access_token['sig'],
})),
@@ -336,9 +366,8 @@ class TwitchVodIE(TwitchItemBaseIE):
info['subtitles'] = {
'rechat': [{
'url': update_url_query(
- 'https://rechat.twitch.tv/rechat-messages', {
- 'video_id': 'v%s' % item_id,
- 'start': info['timestamp'],
+ 'https://api.twitch.tv/v5/videos/%s/comments' % item_id, {
+ 'client_id': self._CLIENT_ID,
}),
'ext': 'json',
}],
@@ -566,11 +595,19 @@ class TwitchStreamIE(TwitchBaseIE):
else super(TwitchStreamIE, cls).suitable(url))
def _real_extract(self, url):
- channel_id = self._match_id(url)
+ channel_name = self._match_id(url)
+
+ access_token = self._call_api(
+ 'api/channels/%s/access_token' % channel_name, channel_name,
+ 'Downloading access token JSON')
+
+ token = access_token['token']
+ channel_id = compat_str(self._parse_json(
+ token, channel_name)['channel_id'])
stream = self._call_api(
- 'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
- 'Downloading stream JSON').get('stream')
+ 'kraken/streams/%s?stream_type=all' % channel_id,
+ channel_id, 'Downloading stream JSON').get('stream')
if not stream:
raise ExtractorError('%s is offline' % channel_id, expected=True)
@@ -579,11 +616,9 @@ class TwitchStreamIE(TwitchBaseIE):
# (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
# an invalid m3u8 URL. Working around by use of original channel name from stream
# JSON and fallback to lowercase if it's not available.
- channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
-
- access_token = self._call_api(
- 'api/channels/%s/access_token' % channel_id, channel_id,
- 'Downloading channel access token')
+ channel_name = try_get(
+ stream, lambda x: x['channel']['name'],
+ compat_str) or channel_name.lower()
query = {
'allow_source': 'true',
@@ -591,13 +626,14 @@ class TwitchStreamIE(TwitchBaseIE):
'allow_spectre': 'true',
'p': random.randint(1000000, 10000000),
'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
'segment_preference': '4',
'sig': access_token['sig'].encode('utf-8'),
- 'token': access_token['token'].encode('utf-8'),
+ 'token': token.encode('utf-8'),
}
formats = self._extract_m3u8_formats(
'%s/api/channel/hls/%s.m3u8?%s'
- % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)),
+ % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)),
channel_id, 'mp4')
self._prefer_source(formats)
@@ -620,8 +656,8 @@ class TwitchStreamIE(TwitchBaseIE):
})
return {
- 'id': compat_str(stream['_id']),
- 'display_id': channel_id,
+ 'id': str_or_none(stream.get('_id')) or channel_id,
+ 'display_id': channel_name,
'title': title,
'description': description,
'thumbnails': thumbnails,
@@ -636,7 +672,14 @@ class TwitchStreamIE(TwitchBaseIE):
class TwitchClipsIE(TwitchBaseIE):
IE_NAME = 'twitch:clips'
- _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
+ (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
+ )
+ (?P<id>[^/?#&]+)
+ '''
_TESTS = [{
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
@@ -659,68 +702,95 @@ class TwitchClipsIE(TwitchBaseIE):
}, {
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
'only_matching': True,
+ }, {
+ 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- status = self._download_json(
- 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
- video_id)
+ clip = self._download_json(
+ 'https://gql.twitch.tv/gql', video_id, data=json.dumps({
+ 'query': '''{
+ clip(slug: "%s") {
+ broadcaster {
+ displayName
+ }
+ createdAt
+ curator {
+ displayName
+ id
+ }
+ durationSeconds
+ id
+ tiny: thumbnailURL(width: 86, height: 45)
+ small: thumbnailURL(width: 260, height: 147)
+ medium: thumbnailURL(width: 480, height: 272)
+ title
+ videoQualities {
+ frameRate
+ quality
+ sourceURL
+ }
+ viewCount
+ }
+}''' % video_id,
+ }).encode(), headers={
+ 'Client-ID': self._CLIENT_ID,
+ })['data']['clip']
+
+ if not clip:
+ raise ExtractorError(
+ 'This clip is no longer available', expected=True)
formats = []
-
- for option in status['quality_options']:
+ for option in clip.get('videoQualities', []):
if not isinstance(option, dict):
continue
- source = url_or_none(option.get('source'))
+ source = url_or_none(option.get('sourceURL'))
if not source:
continue
formats.append({
'url': source,
'format_id': option.get('quality'),
'height': int_or_none(option.get('quality')),
- 'fps': int_or_none(option.get('frame_rate')),
+ 'fps': int_or_none(option.get('frameRate')),
})
-
self._sort_formats(formats)
- info = {
+ thumbnails = []
+ for thumbnail_id in ('tiny', 'small', 'medium'):
+ thumbnail_url = clip.get(thumbnail_id)
+ if not thumbnail_url:
+ continue
+ thumb = {
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ }
+ mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
+ if mobj:
+ thumb.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ thumbnails.append(thumb)
+
+ return {
+ 'id': clip.get('id') or video_id,
+ 'title': clip.get('title') or video_id,
'formats': formats,
+ 'duration': int_or_none(clip.get('durationSeconds')),
+ 'views': int_or_none(clip.get('viewCount')),
+ 'timestamp': unified_timestamp(clip.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
+ 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
+ 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
}
-
- clip = self._call_api(
- 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
- 'Accept': 'application/vnd.twitchtv.v5+json',
- })
-
- if clip:
- quality_key = qualities(('tiny', 'small', 'medium'))
- thumbnails = []
- thumbnails_dict = clip.get('thumbnails')
- if isinstance(thumbnails_dict, dict):
- for thumbnail_id, thumbnail_url in thumbnails_dict.items():
- thumbnails.append({
- 'id': thumbnail_id,
- 'url': thumbnail_url,
- 'preference': quality_key(thumbnail_id),
- })
-
- info.update({
- 'id': clip.get('tracking_id') or video_id,
- 'title': clip.get('title') or video_id,
- 'duration': float_or_none(clip.get('duration')),
- 'views': int_or_none(clip.get('views')),
- 'timestamp': unified_timestamp(clip.get('created_at')),
- 'thumbnails': thumbnails,
- 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
- 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
- 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
- })
- else:
- info.update({
- 'title': video_id,
- 'id': video_id,
- })
-
- return info
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
index de41065d6..4284487db 100644
--- a/youtube_dl/extractor/twitter.py
+++ b/youtube_dl/extractor/twitter.py
@@ -4,32 +4,67 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
- determine_ext,
dict_get,
ExtractorError,
float_or_none,
int_or_none,
- remove_end,
try_get,
+ strip_or_none,
+ unified_timestamp,
+ update_url_query,
xpath_text,
)
-from .periscope import PeriscopeIE
+from .periscope import (
+ PeriscopeBaseIE,
+ PeriscopeIE,
+)
class TwitterBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.twitter.com/1.1/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
+ _GUEST_TOKEN = None
+
+ def _extract_variant_formats(self, variant, video_id):
+ variant_url = variant.get('url')
+ if not variant_url:
+ return []
+ elif '.m3u8' in variant_url:
+ return self._extract_m3u8_formats(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ else:
+ tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
+ f = {
+ 'url': variant_url,
+ 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'tbr': tbr,
+ }
+ self._search_dimensions_in_video_url(f, variant_url)
+ return [f]
+
def _extract_formats_from_vmap_url(self, vmap_url, video_id):
vmap_data = self._download_xml(vmap_url, video_id)
- video_url = xpath_text(vmap_data, './/MediaFile').strip()
- if determine_ext(video_url) == 'm3u8':
- return self._extract_m3u8_formats(
- video_url, video_id, ext='mp4', m3u8_id='hls',
- entry_protocol='m3u8_native')
- return [{
- 'url': video_url,
- }]
+ formats = []
+ urls = []
+ for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
+ video_variant.attrib['url'] = compat_urllib_parse_unquote(
+ video_variant.attrib['url'])
+ urls.append(video_variant.attrib['url'])
+ formats.extend(self._extract_variant_formats(
+ video_variant.attrib, video_id))
+ video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
+ if video_url not in urls:
+ formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
+ return formats
@staticmethod
def _search_dimensions_in_video_url(a_format, video_url):
@@ -40,10 +75,30 @@ class TwitterBaseIE(InfoExtractor):
'height': int(m.group('height')),
})
-
-class TwitterCardIE(TwitterBaseIE):
+ def _call_api(self, path, video_id, query={}):
+ headers = {
+ 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
+ }
+ if not self._GUEST_TOKEN:
+ self._GUEST_TOKEN = self._download_json(
+ self._API_BASE + 'guest/activate.json', video_id,
+ 'Downloading guest token', data=b'',
+ headers=headers)['guest_token']
+ headers['x-guest-token'] = self._GUEST_TOKEN
+ try:
+ return self._download_json(
+ self._API_BASE + path, video_id, headers=headers, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(),
+ video_id)['errors'][0]['message'], expected=True)
+ raise
+
+
+class TwitterCardIE(InfoExtractor):
IE_NAME = 'twitter:card'
- _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
@@ -51,19 +106,28 @@ class TwitterCardIE(TwitterBaseIE):
'info_dict': {
'id': '560070183650213889',
'ext': 'mp4',
- 'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*\.jpg$',
+ 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
+ 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'thumbnail': r're:^https?://.*\.jpg',
'duration': 30.033,
+ 'timestamp': 1422366112,
+ 'upload_date': '20150127',
},
},
{
'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
- 'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
+ 'md5': '7137eca597f72b9abbe61e5ae0161399',
'info_dict': {
'id': '623160978427936768',
'ext': 'mp4',
- 'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*$',
+ 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
+ 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
+ 'uploader': 'NASA',
+ 'uploader_id': 'NASA',
+ 'timestamp': 1437408129,
+ 'upload_date': '20150720',
},
},
{
@@ -75,7 +139,7 @@ class TwitterCardIE(TwitterBaseIE):
'title': 'Ubuntu 11.10 Overview',
'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
'upload_date': '20111013',
- 'uploader': 'OMG! Ubuntu!',
+ 'uploader': 'OMG! UBUNTU!',
'uploader_id': 'omgubuntu',
},
'add_ie': ['Youtube'],
@@ -99,189 +163,30 @@ class TwitterCardIE(TwitterBaseIE):
'info_dict': {
'id': '705235433198714880',
'ext': 'mp4',
- 'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader': 'Brent Yarina',
+ 'uploader_id': 'BTNBrentYarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
},
+ 'skip': 'This content is no longer available.',
}, {
'url': 'https://twitter.com/i/videos/752274308186120192',
'only_matching': True,
},
]
- _API_BASE = 'https://api.twitter.com/1.1'
-
- def _parse_media_info(self, media_info, video_id):
- formats = []
- for media_variant in media_info.get('variants', []):
- media_url = media_variant['url']
- if media_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
- elif media_url.endswith('.mpd'):
- formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
- else:
- tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
- a_format = {
- 'url': media_url,
- 'format_id': 'http-%d' % tbr if tbr else 'http',
- 'tbr': tbr,
- }
- # Reported bitRate may be zero
- if not a_format['tbr']:
- del a_format['tbr']
-
- self._search_dimensions_in_video_url(a_format, media_url)
-
- formats.append(a_format)
- return formats
-
- def _extract_mobile_formats(self, username, video_id):
- webpage = self._download_webpage(
- 'https://mobile.twitter.com/%s/status/%s' % (username, video_id),
- video_id, 'Downloading mobile webpage',
- headers={
- # A recent mobile UA is necessary for `gt` cookie
- 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0',
- })
- main_script_url = self._html_search_regex(
- r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL')
- main_script = self._download_webpage(
- main_script_url, video_id, 'Downloading main script')
- bearer_token = self._search_regex(
- r'BEARER_TOKEN\s*:\s*"([^"]+)"',
- main_script, 'bearer token')
- # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
- api_data = self._download_json(
- '%s/statuses/show/%s.json' % (self._API_BASE, video_id),
- video_id, 'Downloading API data',
- headers={
- 'Authorization': 'Bearer ' + bearer_token,
- })
- media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {}
- return self._parse_media_info(media_info, video_id)
-
def _real_extract(self, url):
- path, video_id = re.search(self._VALID_URL, url).groups()
-
- config = None
- formats = []
- duration = None
+ status_id = self._match_id(url)
+ return self.url_result(
+ 'https://twitter.com/statuses/' + status_id,
+ TwitterIE.ie_key(), status_id)
- urls = [url]
- if path.startswith('cards/'):
- urls.append('https://twitter.com/i/videos/' + video_id)
-
- for u in urls:
- webpage = self._download_webpage(u, video_id)
-
- iframe_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
- webpage, 'video iframe', default=None)
- if iframe_url:
- return self.url_result(iframe_url)
-
- config = self._parse_json(self._html_search_regex(
- r'data-(?:player-)?config="([^"]+)"', webpage,
- 'data player config', default='{}'),
- video_id)
-
- if config.get('source_type') == 'vine':
- return self.url_result(config['player_url'], 'Vine')
-
- periscope_url = PeriscopeIE._extract_url(webpage)
- if periscope_url:
- return self.url_result(periscope_url, PeriscopeIE.ie_key())
-
- video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
-
- if video_url:
- if determine_ext(video_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
- else:
- f = {
- 'url': video_url,
- }
-
- self._search_dimensions_in_video_url(f, video_url)
-
- formats.append(f)
-
- vmap_url = config.get('vmapUrl') or config.get('vmap_url')
- if vmap_url:
- formats.extend(
- self._extract_formats_from_vmap_url(vmap_url, video_id))
-
- media_info = None
-
- for entity in config.get('status', {}).get('entities', []):
- if 'mediaInfo' in entity:
- media_info = entity['mediaInfo']
-
- if media_info:
- formats.extend(self._parse_media_info(media_info, video_id))
- duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
-
- username = config.get('user', {}).get('screen_name')
- if username:
- formats.extend(self._extract_mobile_formats(username, video_id))
-
- if formats:
- title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
- thumbnail = config.get('posterImageUrl') or config.get('image_src')
- duration = float_or_none(config.get('duration'), scale=1000) or duration
- break
-
- if not formats:
- headers = {
- 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
- 'Referer': url,
- }
- ct0 = self._get_cookies(url).get('ct0')
- if ct0:
- headers['csrf_token'] = ct0.value
- guest_token = self._download_json(
- '%s/guest/activate.json' % self._API_BASE, video_id,
- 'Downloading guest token', data=b'',
- headers=headers)['guest_token']
- headers['x-guest-token'] = guest_token
- self._set_cookie('api.twitter.com', 'gt', guest_token)
- config = self._download_json(
- '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
- video_id, headers=headers)
- track = config['track']
- vmap_url = track.get('vmapUrl')
- if vmap_url:
- formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
- else:
- playback_url = track['playbackUrl']
- if determine_ext(playback_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- playback_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
- else:
- formats = [{
- 'url': playback_url,
- }]
- title = 'Twitter web player'
- thumbnail = config.get('posterImage')
- duration = float_or_none(track.get('durationMs'), scale=1000)
- self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class TwitterIE(InfoExtractor):
+class TwitterIE(TwitterBaseIE):
IE_NAME = 'twitter'
- _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
- _TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
- _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://twitter.com/freethenipple/status/643211948184596480',
@@ -290,10 +195,13 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4',
'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
+ 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
'uploader': 'FREE THE NIPPLE',
'uploader_id': 'freethenipple',
'duration': 12.922,
+ 'timestamp': 1442188653,
+ 'upload_date': '20150913',
+ 'age_limit': 18,
},
}, {
'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
@@ -315,19 +223,23 @@ class TwitterIE(InfoExtractor):
'id': '665052190608723968',
'ext': 'mp4',
'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
- 'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."',
+ 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
'uploader_id': 'starwars',
'uploader': 'Star Wars',
+ 'timestamp': 1447395772,
+ 'upload_date': '20151113',
},
}, {
'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
'info_dict': {
'id': '705235433198714880',
'ext': 'mp4',
- 'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
- 'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
'uploader_id': 'BTNBrentYarina',
'uploader': 'Brent Yarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
},
'params': {
# The same video as https://twitter.com/i/videos/tweet/705235433198714880
@@ -339,12 +251,14 @@ class TwitterIE(InfoExtractor):
'info_dict': {
'id': '700207533655363584',
'ext': 'mp4',
- 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
- 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
+ 'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel',
+ 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
'thumbnail': r're:^https?://.*\.jpg',
- 'uploader': 'JG',
- 'uploader_id': 'jaydingeer',
+ 'uploader': 'simon vetugo',
+ 'uploader_id': 'simonvertugo',
'duration': 30.0,
+ 'timestamp': 1455777459,
+ 'upload_date': '20160218',
},
}, {
'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
@@ -352,10 +266,9 @@ class TwitterIE(InfoExtractor):
'info_dict': {
'id': 'MIOxnrUteUd',
'ext': 'mp4',
- 'title': 'Vince Mancini - Vine of the day',
- 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
- 'uploader': 'Vince Mancini',
- 'uploader_id': 'Filmdrunk',
+ 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
+ 'uploader': 'TAKUMA',
+ 'uploader_id': '1004126642786242560',
'timestamp': 1402826626,
'upload_date': '20140615',
},
@@ -366,21 +279,22 @@ class TwitterIE(InfoExtractor):
'id': '719944021058060289',
'ext': 'mp4',
'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
- 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
- 'uploader_id': 'captainamerica',
+ 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
+ 'uploader_id': 'CaptainAmerica',
'uploader': 'Captain America',
'duration': 3.17,
+ 'timestamp': 1460483005,
+ 'upload_date': '20160412',
},
}, {
'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
'info_dict': {
'id': '1zqKVVlkqLaKB',
'ext': 'mp4',
- 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence',
- 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"',
+ 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
'upload_date': '20160923',
- 'uploader_id': 'OPP_HSD',
- 'uploader': 'Sgt Kerry Schmidt',
+ 'uploader_id': '1PmKqpJdOJQoY',
+ 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
'timestamp': 1474613214,
},
'add_ie': ['Periscope'],
@@ -391,10 +305,12 @@ class TwitterIE(InfoExtractor):
'id': '852138619213144067',
'ext': 'mp4',
'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
- 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
+ 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
'uploader': 'عالم الأخبار',
'uploader_id': 'news_al3alm',
'duration': 277.4,
+ 'timestamp': 1492000653,
+ 'upload_date': '20170412',
},
}, {
'url': 'https://twitter.com/i/web/status/910031516746514432',
@@ -403,10 +319,12 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4',
'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
+ 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
'uploader': 'Préfet de Guadeloupe',
'uploader_id': 'Prefet971',
'duration': 47.48,
+ 'timestamp': 1505803395,
+ 'upload_date': '20170919',
},
'params': {
'skip_download': True, # requires ffmpeg
@@ -419,87 +337,185 @@ class TwitterIE(InfoExtractor):
'ext': 'mp4',
'title': 're:.*?Shep is on a roll today.*?',
'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
+ 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
'uploader': 'Lis Power',
'uploader_id': 'LisPower1',
'duration': 111.278,
+ 'timestamp': 1527623489,
+ 'upload_date': '20180529',
},
'params': {
'skip_download': True, # requires ffmpeg
},
+ }, {
+ 'url': 'https://twitter.com/foobar/status/1087791357756956680',
+ 'info_dict': {
+ 'id': '1087791357756956680',
+ 'ext': 'mp4',
+ 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'duration': 61.567,
+ 'timestamp': 1548184644,
+ 'upload_date': '20190122',
+ },
+ }, {
+ # not available in Periscope
+ 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
+ 'info_dict': {
+ 'id': '1vOGwqejwoWxB',
+ 'ext': 'mp4',
+ 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
+ 'uploader': 'Vivi',
+ 'uploader_id': '1eVjYOLGkGrQL',
+ },
+ 'add_ie': ['TwitterBroadcast'],
+ }, {
+ # Twitch Clip Embed
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }, {
+ # promo_video_website card
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user_id = mobj.group('user_id')
- twid = mobj.group('id')
-
- webpage, urlh = self._download_webpage_handle(
- self._TEMPLATE_STATUSES_URL % twid, twid)
-
- if 'twitter.com/account/suspended' in urlh.geturl():
- raise ExtractorError('Account suspended by Twitter.', expected=True)
-
- if user_id is None:
- mobj = re.match(self._VALID_URL, urlh.geturl())
- user_id = mobj.group('user_id')
-
- username = remove_end(self._og_search_title(webpage), ' on Twitter')
-
- title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
+ twid = self._match_id(url)
+ status = self._call_api(
+ 'statuses/show/%s.json' % twid, twid, {
+ 'cards_platform': 'Web-12',
+ 'include_cards': 1,
+ 'include_reply_count': 1,
+ 'include_user_entities': 0,
+ 'tweet_mode': 'extended',
+ })
+ title = description = status['full_text'].replace('\n', ' ')
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
title = re.sub(r'\s+(https?://[^ ]+)', '', title)
+ user = status.get('user') or {}
+ uploader = user.get('name')
+ if uploader:
+ title = '%s - %s' % (uploader, title)
+ uploader_id = user.get('screen_name')
+
+ tags = []
+ for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
+ hashtag_text = hashtag.get('text')
+ if not hashtag_text:
+ continue
+ tags.append(hashtag_text)
info = {
- 'uploader_id': user_id,
- 'uploader': username,
- 'webpage_url': url,
- 'description': '%s on Twitter: "%s"' % (username, description),
- 'title': username + ' - ' + title,
+ 'id': twid,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': unified_timestamp(status.get('created_at')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None,
+ 'like_count': int_or_none(status.get('favorite_count')),
+ 'repost_count': int_or_none(status.get('retweet_count')),
+ 'comment_count': int_or_none(status.get('reply_count')),
+ 'age_limit': 18 if status.get('possibly_sensitive') else 0,
+ 'tags': tags,
}
- mobj = re.search(r'''(?x)
- <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
- <source[^>]+video-src="(?P<url>[^"]+)"
- ''', webpage)
-
- if mobj:
- more_info = mobj.group('more_info')
- height = int_or_none(self._search_regex(
- r'data-height="(\d+)"', more_info, 'height', fatal=False))
- width = int_or_none(self._search_regex(
- r'data-width="(\d+)"', more_info, 'width', fatal=False))
- thumbnail = self._search_regex(
- r'poster="([^"]+)"', more_info, 'poster', fatal=False)
- info.update({
- 'id': twid,
- 'url': mobj.group('url'),
- 'height': height,
- 'width': width,
- 'thumbnail': thumbnail,
- })
- return info
+ media = try_get(status, lambda x: x['extended_entities']['media'][0])
+ if media and media.get('type') != 'photo':
+ video_info = media.get('video_info') or {}
+
+ formats = []
+ for variant in video_info.get('variants', []):
+ formats.extend(self._extract_variant_formats(variant, twid))
+ self._sort_formats(formats)
+
+ thumbnails = []
+ media_url = media.get('media_url_https') or media.get('media_url')
+ if media_url:
+ def add_thumbnail(name, size):
+ thumbnails.append({
+ 'id': name,
+ 'url': update_url_query(media_url, {'name': name}),
+ 'width': int_or_none(size.get('w') or size.get('width')),
+ 'height': int_or_none(size.get('h') or size.get('height')),
+ })
+ for name, size in media.get('sizes', {}).items():
+ add_thumbnail(name, size)
+ add_thumbnail('orig', media.get('original_info') or {})
- twitter_card_url = None
- if 'class="PlayableMedia' in webpage:
- twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid)
- else:
- twitter_card_iframe_url = self._search_regex(
- r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'Twitter card iframe URL', default=None, group='url')
- if twitter_card_iframe_url:
- twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url)
-
- if twitter_card_url:
info.update({
- '_type': 'url_transparent',
- 'ie_key': 'TwitterCard',
- 'url': twitter_card_url,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(video_info.get('duration_millis'), 1000),
})
- return info
-
- raise ExtractorError('There\'s no video in this tweet.')
+ else:
+ card = status.get('card')
+ if card:
+ binding_values = card['binding_values']
+
+ def get_binding_value(k):
+ o = binding_values.get(k) or {}
+ return try_get(o, lambda x: x[x['type'].lower() + '_value'])
+
+ card_name = card['name'].split(':')[-1]
+ if card_name in ('amplify', 'promo_video_website'):
+ is_amplify = card_name == 'amplify'
+ vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
+ content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
+ formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for suffix in ('_small', '', '_large', '_x_large', '_original'):
+ image = get_binding_value('player_image' + suffix) or {}
+ image_url = image.get('url')
+ if not image_url or '/player-placeholder' in image_url:
+ continue
+ thumbnails.append({
+ 'id': suffix[1:] if suffix else 'medium',
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(get_binding_value(
+ 'content_duration_seconds')),
+ })
+ elif card_name == 'player':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('player_url'),
+ })
+ elif card_name == 'periscope_broadcast':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('url') or get_binding_value('player_url'),
+ 'ie_key': PeriscopeIE.ie_key(),
+ })
+ elif card_name == 'broadcast':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('broadcast_url'),
+ 'ie_key': TwitterBroadcastIE.ie_key(),
+ })
+ else:
+ raise ExtractorError('Unsupported Twitter Card.')
+ else:
+ expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
+ if not expanded_url:
+ raise ExtractorError("There's no video in this tweet.")
+ info.update({
+ '_type': 'url',
+ 'url': expanded_url,
+ })
+ return info
class TwitterAmplifyIE(TwitterBaseIE):
@@ -556,3 +572,39 @@ class TwitterAmplifyIE(TwitterBaseIE):
'formats': formats,
'thumbnails': thumbnails,
}
+
+
+class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
+ IE_NAME = 'twitter:broadcast'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
+
+ _TEST = {
+ # untitled Periscope video
+ 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
+ 'info_dict': {
+ 'id': '1yNGaQLWpejGj',
+ 'ext': 'mp4',
+ 'title': 'Andrea May Sahouri - Periscope Broadcast',
+ 'uploader': 'Andrea May Sahouri',
+ 'uploader_id': '1PXEdBZWpGwKe',
+ },
+ }
+
+ def _real_extract(self, url):
+ broadcast_id = self._match_id(url)
+ broadcast = self._call_api(
+ 'broadcasts/show.json', broadcast_id,
+ {'ids': broadcast_id})['broadcasts'][broadcast_id]
+ info = self._parse_broadcast_data(broadcast, broadcast_id)
+ media_key = broadcast['media_key']
+ source = self._call_api(
+ 'live_video_stream/status/' + media_key, media_key)['source']
+ m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
+ if '/live_video_stream/geoblocked/' in m3u8_url:
+ self.raise_geo_restricted()
+ m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
+ m3u8_url).query).get('type', [None])[0]
+ state, width, height = self._extract_common_format_info(broadcast)
+ info['formats'] = self._extract_pscp_m3u8_formats(
+ m3u8_url, broadcast_id, m3u8_id, state, width, height)
+ return info
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py
index 105826e9b..2a4faecef 100644
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -29,7 +29,7 @@ class UdemyIE(InfoExtractor):
IE_NAME = 'udemy'
_VALID_URL = r'''(?x)
https?://
- www\.udemy\.com/
+ (?:[^/]+\.)?udemy\.com/
(?:
[^#]+\#/lecture/|
lecture/view/?\?lectureId=|
@@ -64,6 +64,9 @@ class UdemyIE(InfoExtractor):
# only outputs rendition
'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
+ 'only_matching': True,
}]
def _extract_course_info(self, webpage, video_id):
@@ -73,7 +76,10 @@ class UdemyIE(InfoExtractor):
webpage, 'course', default='{}')),
video_id, fatal=False) or {}
course_id = course.get('id') or self._search_regex(
- r'data-course-id=["\'](\d+)', webpage, 'course id')
+ [
+ r'data-course-id=["\'](\d+)',
+ r'&quot;courseId&quot;\s*:\s*(\d+)'
+ ], webpage, 'course id')
return course_id, course.get('title')
def _enroll_course(self, base_url, webpage, course_id):
@@ -123,10 +129,22 @@ class UdemyIE(InfoExtractor):
def _download_webpage_handle(self, *args, **kwargs):
headers = kwargs.get('headers', {}).copy()
- headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
kwargs['headers'] = headers
- return super(UdemyIE, self)._download_webpage_handle(
+ ret = super(UdemyIE, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
+ if not ret:
+ return ret
+ webpage, _ = ret
+ if any(p in webpage for p in (
+ '>Please verify you are a human',
+ 'Access to this page has been denied because we believe you are using automation tools to browse the website',
+ '"_pxCaptcha"')):
+ raise ExtractorError(
+ 'Udemy asks you to solve a CAPTCHA. Login with browser, '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'youtube-dl with --cookies.', expected=True)
+ return ret
def _download_json(self, url_or_request, *args, **kwargs):
headers = {
@@ -360,7 +378,7 @@ class UdemyIE(InfoExtractor):
}, res))
# react rendition since 2017.04.15 (see
- # https://github.com/rg3/youtube-dl/issues/12744)
+ # https://github.com/ytdl-org/youtube-dl/issues/12744)
data = self._parse_json(
self._search_regex(
r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
@@ -403,8 +421,14 @@ class UdemyIE(InfoExtractor):
class UdemyCourseIE(UdemyIE):
IE_NAME = 'udemy:course'
- _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)'
- _TESTS = []
+ _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }]
@classmethod
def suitable(cls, url):
diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py
index f3eaee6b3..3d74ba071 100644
--- a/youtube_dl/extractor/ufctv.py
+++ b/youtube_dl/extractor/ufctv.py
@@ -1,73 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- parse_duration,
- parse_iso8601,
- urlencode_postdata,
-)
+from .imggaming import ImgGamingBaseIE
-class UFCTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
+class UFCTVIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
_NETRC_MACHINE = 'ufctv'
- _TEST = {
- 'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
- 'info_dict': {
- 'id': '34167',
- 'ext': 'mp4',
- 'title': 'UFC 219 Countdown: Full Episode',
- 'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
- 'timestamp': 1513962360,
- 'upload_date': '20171222',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }
+ _REALM = 'ufc'
- def _real_initialize(self):
- username, password = self._get_login_info()
- if username is None:
- return
- code = self._download_json(
- 'https://www.ufc.tv/secure/authenticate',
- None, 'Logging in', data=urlencode_postdata({
- 'username': username,
- 'password': password,
- 'format': 'json',
- })).get('code')
- if code and code != 'loginsuccess':
- raise ExtractorError(code, expected=True)
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- video_data = self._download_json(url, display_id, query={
- 'format': 'json',
- })
- video_id = str(video_data['id'])
- title = video_data['name']
- m3u8_url = self._download_json(
- 'https://www.ufc.tv/service/publishpoint', video_id, query={
- 'type': 'video',
- 'format': 'json',
- 'id': video_id,
- }, headers={
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
- })['path']
- m3u8_url = m3u8_url.replace('_iphone.', '.')
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'duration': parse_duration(video_data.get('runtime')),
- 'timestamp': parse_iso8601(video_data.get('releaseDate')),
- 'formats': formats,
- }
+class UFCArabiaIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
+ _NETRC_MACHINE = 'ufcarabia'
+ _REALM = 'admufc'
diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py
index e67083004..628adf219 100644
--- a/youtube_dl/extractor/uol.py
+++ b/youtube_dl/extractor/uol.py
@@ -2,12 +2,17 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
from ..utils import (
clean_html,
int_or_none,
parse_duration,
+ parse_iso8601,
+ qualities,
update_url_query,
- str_or_none,
)
@@ -16,21 +21,25 @@ class UOLIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
_TESTS = [{
'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
- 'md5': '25291da27dc45e0afb5718a8603d3816',
+ 'md5': '4f1e26683979715ff64e4e29099cf020',
'info_dict': {
'id': '15951931',
'ext': 'mp4',
'title': 'Miss simpatia é encontrada morta',
'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+ 'timestamp': 1470421860,
+ 'upload_date': '20160805',
}
}, {
'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
- 'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9',
+ 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
'info_dict': {
'id': '15954259',
'ext': 'mp4',
'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+ 'timestamp': 1470674520,
+ 'upload_date': '20160808',
}
}, {
'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
@@ -55,74 +64,54 @@ class UOLIE(InfoExtractor):
'only_matching': True,
}]
- _FORMATS = {
- '2': {
- 'width': 640,
- 'height': 360,
- },
- '5': {
- 'width': 1080,
- 'height': 720,
- },
- '6': {
- 'width': 426,
- 'height': 240,
- },
- '7': {
- 'width': 1920,
- 'height': 1080,
- },
- '8': {
- 'width': 192,
- 'height': 144,
- },
- '9': {
- 'width': 568,
- 'height': 320,
- },
- }
-
def _real_extract(self, url):
video_id = self._match_id(url)
- media_id = None
-
- if video_id.isdigit():
- media_id = video_id
-
- if not media_id:
- embed_page = self._download_webpage(
- 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
- video_id, 'Downloading embed page', fatal=False)
- if embed_page:
- media_id = self._search_regex(
- (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
- embed_page, 'media id', default=None)
-
- if not media_id:
- webpage = self._download_webpage(url, video_id)
- media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
video_data = self._download_json(
- 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
- media_id)['item']
+ # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
+ 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
+ video_id)['item']
+ media_id = compat_str(video_data['mediaId'])
title = video_data['title']
+ ver = video_data.get('revision', 2)
- query = {
- 'ver': video_data.get('numRevision', 2),
- 'r': 'http://mais.uol.com.br',
- }
+ uol_formats = self._download_json(
+ 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
+ media_id)
+ quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
formats = []
- for f in video_data.get('formats', []):
+ for format_id, f in uol_formats.items():
+ if not isinstance(f, dict):
+ continue
f_url = f.get('url') or f.get('secureUrl')
if not f_url:
continue
- format_id = str_or_none(f.get('id'))
- fmt = {
- 'format_id': format_id,
- 'url': update_url_query(f_url, query),
+ query = {
+ 'ver': ver,
+ 'r': 'http://mais.uol.com.br',
}
- fmt.update(self._FORMATS.get(format_id, {}))
- formats.append(fmt)
+ for k in ('token', 'sign'):
+ v = f.get(k)
+ if v:
+ query[k] = v
+ f_url = update_url_query(f_url, query)
+ format_id = format_id
+ if format_id == 'HLS':
+ m3u8_formats = self._extract_m3u8_formats(
+ f_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ encoded_query = compat_urllib_parse_urlencode(query)
+ for m3u8_f in m3u8_formats:
+ m3u8_f['extra_param_to_segment_url'] = encoded_query
+ m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
+ formats.extend(m3u8_formats)
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': f_url,
+ 'quality': quality(format_id),
+ 'preference': -1,
+ })
self._sort_formats(formats)
tags = []
@@ -132,12 +121,24 @@ class UOLIE(InfoExtractor):
continue
tags.append(tag_description)
+ thumbnails = []
+ for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
+ q_url = video_data.get('thumb' + q)
+ if not q_url:
+ continue
+ thumbnails.append({
+ 'id': q,
+ 'url': q_url,
+ })
+
return {
'id': media_id,
'title': title,
- 'description': clean_html(video_data.get('desMedia')),
- 'thumbnail': video_data.get('thumbnail'),
- 'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')),
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(video_data.get('duration')),
'tags': tags,
'formats': formats,
+ 'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
+ 'view_count': int_or_none(video_data.get('viewsQtty')),
}
diff --git a/youtube_dl/extractor/upskill.py b/youtube_dl/extractor/upskill.py
deleted file mode 100644
index 30297b4dd..000000000
--- a/youtube_dl/extractor/upskill.py
+++ /dev/null
@@ -1,176 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .wistia import WistiaIE
-from ..compat import compat_str
-from ..utils import (
- clean_html,
- ExtractorError,
- get_element_by_class,
- urlencode_postdata,
- urljoin,
-)
-
-
-class UpskillBaseIE(InfoExtractor):
- _LOGIN_URL = 'http://upskillcourses.com/sign_in'
- _NETRC_MACHINE = 'upskill'
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page, urlh = self._download_webpage_handle(
- self._LOGIN_URL, None, 'Downloading login page')
-
- login_url = compat_str(urlh.geturl())
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'user[email]': username,
- 'user[password]': password,
- })
-
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
- 'post url', default=login_url, group='url')
-
- if not post_url.startswith('http'):
- post_url = urljoin(login_url, post_url)
-
- response = self._download_webpage(
- post_url, None, 'Logging in',
- data=urlencode_postdata(login_form),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': login_url,
- })
-
- # Successful login
- if any(re.search(p, response) for p in (
- r'class=["\']user-signout',
- r'<a[^>]+\bhref=["\']/sign_out',
- r'>\s*Log out\s*<')):
- return
-
- message = get_element_by_class('alert', response)
- if message is not None:
- raise ExtractorError(
- 'Unable to login: %s' % clean_html(message), expected=True)
-
- raise ExtractorError('Unable to log in')
-
-
-class UpskillIE(UpskillBaseIE):
- _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/[^/]+/lectures/(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
- 'info_dict': {
- 'id': 'uzw6zw58or',
- 'ext': 'mp4',
- 'title': 'Welcome to the Course!',
- 'description': 'md5:8d66c13403783370af62ca97a7357bdd',
- 'duration': 138.763,
- 'timestamp': 1479846621,
- 'upload_date': '20161122',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- wistia_url = WistiaIE._extract_url(webpage)
- if not wistia_url:
- if any(re.search(p, webpage) for p in (
- r'class=["\']lecture-contents-locked',
- r'>\s*Lecture contents locked',
- r'id=["\']lecture-locked')):
- self.raise_login_required('Lecture contents locked')
-
- title = self._og_search_title(webpage, default=None)
-
- return {
- '_type': 'url_transparent',
- 'url': wistia_url,
- 'ie_key': WistiaIE.ie_key(),
- 'title': title,
- }
-
-
-class UpskillCourseIE(UpskillBaseIE):
- _VALID_URL = r'https?://(?:www\.)?upskillcourses\.com/courses/(?:enrolled/)?(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
- 'info_dict': {
- 'id': '119763',
- 'title': 'The Essential Web Developer Course (Free)',
- },
- 'playlist_count': 192,
- }, {
- 'url': 'http://upskillcourses.com/courses/119763/',
- 'only_matching': True,
- }, {
- 'url': 'http://upskillcourses.com/courses/enrolled/119763',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if UpskillIE.suitable(url) else super(
- UpskillCourseIE, cls).suitable(url)
-
- def _real_extract(self, url):
- course_id = self._match_id(url)
-
- webpage = self._download_webpage(url, course_id)
-
- course_id = self._search_regex(
- r'data-course-id=["\'](\d+)', webpage, 'course id',
- default=course_id)
-
- entries = []
-
- for mobj in re.finditer(
- r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
- webpage):
- li = mobj.group('li')
- if 'fa-youtube-play' not in li:
- continue
- lecture_url = self._search_regex(
- r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
- 'lecture url', default=None, group='url')
- if not lecture_url:
- continue
- lecture_id = self._search_regex(
- r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
- title = self._html_search_regex(
- r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
- 'title', default=None)
- entries.append(
- self.url_result(
- urljoin('http://upskillcourses.com/', lecture_url),
- ie=UpskillIE.ie_key(), video_id=lecture_id,
- video_title=clean_html(title)))
-
- course_title = self._html_search_regex(
- (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
- r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
- webpage, 'course title', fatal=False)
-
- return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/urplay.py b/youtube_dl/extractor/urplay.py
index 8e6fd4731..6030b7cb5 100644
--- a/youtube_dl/extractor/urplay.py
+++ b/youtube_dl/extractor/urplay.py
@@ -2,18 +2,31 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import unified_timestamp
class URPlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde',
- 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb',
+ 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand',
+ 'md5': 'ff5b0c89928f8083c74bbd5099c9292d',
+ 'info_dict': {
+ 'id': '203704',
+ 'ext': 'mp4',
+ 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd',
+ 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a',
+ 'timestamp': 1513512768,
+ 'upload_date': '20171217',
+ },
+ }, {
+ 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde',
'info_dict': {
'id': '190031',
'ext': 'mp4',
'title': 'Tripp, Trapp, Träd : Sovkudde',
'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1',
+ 'timestamp': 1440093600,
+ 'upload_date': '20150820',
},
}, {
'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden',
@@ -51,6 +64,7 @@ class URPlayIE(InfoExtractor):
'title': urplayer_data['title'],
'description': self._og_search_description(webpage),
'thumbnail': urplayer_data.get('image'),
+ 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')),
'series': urplayer_data.get('series_title'),
'subtitles': subtitles,
'formats': formats,
diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dl/extractor/usanetwork.py
index 823340776..54c7495cc 100644
--- a/youtube_dl/extractor/usanetwork.py
+++ b/youtube_dl/extractor/usanetwork.py
@@ -1,11 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-
from .adobepass import AdobePassIE
from ..utils import (
- extract_attributes,
+ NO_DEFAULT,
smuggle_url,
update_url_query,
)
@@ -31,22 +29,22 @@ class USANetworkIE(AdobePassIE):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- player_params = extract_attributes(self._search_regex(
- r'(<div[^>]+data-usa-tve-player-container[^>]*>)', webpage, 'player params'))
- video_id = player_params['data-mpx-guid']
- title = player_params['data-episode-title']
+ def _x(name, default=NO_DEFAULT):
+ return self._search_regex(
+ r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
+ webpage, name, default=default, group='value')
- account_pid, path = re.search(
- r'data-src="(?:https?)?//player\.theplatform\.com/p/([^/]+)/.*?/(media/guid/\d+/\d+)',
- webpage).groups()
+ video_id = _x('mpx-guid')
+ title = _x('episode-title')
+ mpx_account_id = _x('mpx-account-id', '2304992029')
query = {
'mbr': 'true',
}
- if player_params.get('data-is-full-episode') == '1':
+ if _x('is-full-episode', None) == '1':
query['manifest'] = 'm3u'
- if player_params.get('data-entitlement') == 'auth':
+ if _x('is-entitlement', None) == '1':
adobe_pass = {}
drupal_settings = self._search_regex(
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
@@ -57,7 +55,7 @@ class USANetworkIE(AdobePassIE):
adobe_pass = drupal_settings.get('adobePass', {})
resource = self._get_mvpd_resource(
adobe_pass.get('adobePassResourceId', 'usa'),
- title, video_id, player_params.get('data-episode-rating', 'TV-14'))
+ title, video_id, _x('episode-rating', 'TV-14'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
@@ -65,11 +63,11 @@ class USANetworkIE(AdobePassIE):
info.update({
'_type': 'url_transparent',
'url': smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/%s/%s' % (account_pid, path),
+ 'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
query), {'force_smil_url': True}),
'id': video_id,
'title': title,
- 'series': player_params.get('data-show-title'),
+ 'series': _x('show-title', None),
'episode': title,
'ie_key': 'ThePlatform',
})
diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dl/extractor/usatoday.py
index e5678dc78..b2103448d 100644
--- a/youtube_dl/extractor/usatoday.py
+++ b/youtube_dl/extractor/usatoday.py
@@ -3,21 +3,23 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
get_element_by_attribute,
parse_duration,
+ try_get,
update_url_query,
- ExtractorError,
)
from ..compat import compat_str
class USATodayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?usatoday\.com/(?:[^/]+/)*(?P<id>[^?/#]+)'
- _TEST = {
+ _TESTS = [{
+ # Brightcove Partner ID = 29906170001
'url': 'http://www.usatoday.com/media/cinematic/video/81729424/us-france-warn-syrian-regime-ahead-of-new-peace-talks/',
- 'md5': '4d40974481fa3475f8bccfd20c5361f8',
+ 'md5': '033587d2529dc3411a1ab3644c3b8827',
'info_dict': {
- 'id': '81729424',
+ 'id': '4799374959001',
'ext': 'mp4',
'title': 'US, France warn Syrian regime ahead of new peace talks',
'timestamp': 1457891045,
@@ -25,8 +27,20 @@ class USATodayIE(InfoExtractor):
'uploader_id': '29906170001',
'upload_date': '20160313',
}
- }
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/29906170001/38a9eecc-bdd8-42a3-ba14-95397e48b3f8_default/index.html?videoId=%s'
+ }, {
+ # ui-video-data[asset_metadata][items][brightcoveaccount] = 28911775001
+ 'url': 'https://www.usatoday.com/story/tech/science/2018/08/21/yellowstone-supervolcano-eruption-stop-worrying-its-blow/973633002/',
+ 'info_dict': {
+ 'id': '5824495846001',
+ 'ext': 'mp4',
+ 'title': 'Yellowstone more likely to crack rather than explode',
+ 'timestamp': 1534790612,
+ 'description': 'md5:3715e7927639a4f16b474e9391687c62',
+ 'uploader_id': '28911775001',
+ 'upload_date': '20180820',
+ }
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -35,10 +49,11 @@ class USATodayIE(InfoExtractor):
if not ui_video_data:
raise ExtractorError('no video on the webpage', expected=True)
video_data = self._parse_json(ui_video_data, display_id)
+ item = try_get(video_data, lambda x: x['asset_metadata']['items'], dict) or {}
return {
'_type': 'url_transparent',
- 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_data['brightcove_id'],
+ 'url': self.BRIGHTCOVE_URL_TEMPLATE % (item.get('brightcoveaccount', '29906170001'), item.get('brightcoveid') or video_data['brightcove_id']),
'id': compat_str(video_data['id']),
'title': video_data['title'],
'thumbnail': video_data.get('thumbnail'),
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 5737d4d16..582090d0d 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -165,7 +165,7 @@ class UstreamIE(InfoExtractor):
m = re.match(self._VALID_URL, url)
video_id = m.group('id')
- # some sites use this embed format (see: https://github.com/rg3/youtube-dl/issues/2990)
+ # some sites use this embed format (see: https://github.com/ytdl-org/youtube-dl/issues/2990)
if m.group('type') == 'embed/recorded':
video_id = m.group('id')
desktop_url = 'http://www.ustream.tv/recorded/' + video_id
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py
index 0d8d832cc..a6dc3c8d8 100644
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dl/extractor/veehd.py
@@ -54,7 +54,7 @@ class VeeHDIE(InfoExtractor):
video_id = self._match_id(url)
# VeeHD seems to send garbage on the first request.
- # See https://github.com/rg3/youtube-dl/issues/2102
+ # See https://github.com/ytdl-org/youtube-dl/issues/2102
self._download_webpage(url, video_id, 'Requesting webpage')
webpage = self._download_webpage(url, video_id)
diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py
index 071774a6f..1c44c145c 100644
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dl/extractor/veoh.py
@@ -1,13 +1,10 @@
from __future__ import unicode_literals
-import re
-import json
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
- ExtractorError,
- sanitized_Request,
+ parse_duration,
+ qualities,
)
@@ -16,9 +13,9 @@ class VeohIE(InfoExtractor):
_TESTS = [{
'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
- 'md5': '620e68e6a3cff80086df3348426c9ca3',
+ 'md5': '9e7ecc0fd8bbee7a69fe38953aeebd30',
'info_dict': {
- 'id': '56314296',
+ 'id': 'v56314296nk7Zdmz3',
'ext': 'mp4',
'title': 'Straight Backs Are Stronger',
'uploader': 'LUMOback',
@@ -56,29 +53,6 @@ class VeohIE(InfoExtractor):
'only_matching': True,
}]
- def _extract_formats(self, source):
- formats = []
- link = source.get('aowPermalink')
- if link:
- formats.append({
- 'url': link,
- 'ext': 'mp4',
- 'format_id': 'aow',
- })
- link = source.get('fullPreviewHashLowPath')
- if link:
- formats.append({
- 'url': link,
- 'format_id': 'low',
- })
- link = source.get('fullPreviewHashHighPath')
- if link:
- formats.append({
- 'url': link,
- 'format_id': 'high',
- })
- return formats
-
def _extract_video(self, source):
return {
'id': source.get('videoId'),
@@ -93,38 +67,37 @@ class VeohIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- if video_id.startswith('v'):
- rsp = self._download_xml(
- r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML')
- stat = rsp.get('stat')
- if stat == 'ok':
- return self._extract_video(rsp.find('./videoList/video'))
- elif stat == 'fail':
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True)
-
- webpage = self._download_webpage(url, video_id)
- age_limit = 0
- if 'class="adultwarning-container"' in webpage:
- self.report_age_confirmation()
- age_limit = 18
- request = sanitized_Request(url)
- request.add_header('Cookie', 'confirmedAdult=true')
- webpage = self._download_webpage(request, video_id)
+ video_id = self._match_id(url)
+ video = self._download_json(
+ 'https://www.veoh.com/watch/getVideo/' + video_id,
+ video_id)['video']
+ title = video['title']
- m_youtube = re.search(r'http://www\.youtube\.com/v/(.*?)(\&|"|\?)', webpage)
- if m_youtube is not None:
- youtube_id = m_youtube.group(1)
- self.to_screen('%s: detected Youtube video.' % video_id)
- return self.url_result(youtube_id, 'Youtube')
-
- info = json.loads(
- self._search_regex(r'videoDetailsJSON = \'({.*?})\';', webpage, 'info').replace('\\\'', '\''))
-
- video = self._extract_video(info)
- video['age_limit'] = age_limit
+ thumbnail_url = None
+ q = qualities(['HQ', 'Regular'])
+ formats = []
+ for f_id, f_url in video.get('src', {}).items():
+ if not f_url:
+ continue
+ if f_id == 'poster':
+ thumbnail_url = f_url
+ else:
+ formats.append({
+ 'format_id': f_id,
+ 'quality': q(f_id),
+ 'url': f_url,
+ })
+ self._sort_formats(formats)
- return video
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': thumbnail_url,
+ 'uploader': video.get('author', {}).get('nickname'),
+ 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
+ 'view_count': int_or_none(video.get('views')),
+ 'formats': formats,
+ 'average_rating': int_or_none(video.get('rating')),
+ 'comment_count': int_or_none(video.get('numOfComments')),
+ }
diff --git a/youtube_dl/extractor/vessel.py b/youtube_dl/extractor/vessel.py
deleted file mode 100644
index 31eee0ba7..000000000
--- a/youtube_dl/extractor/vessel.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- parse_iso8601,
- sanitized_Request,
-)
-
-
-class VesselIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vessel\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z-_]+)'
- _API_URL_TEMPLATE = 'https://www.vessel.com/api/view/items/%s'
- _LOGIN_URL = 'https://www.vessel.com/api/account/login'
- _NETRC_MACHINE = 'vessel'
- _TESTS = [{
- 'url': 'https://www.vessel.com/videos/HDN7G5UMs',
- 'md5': '455cdf8beb71c6dd797fd2f3818d05c4',
- 'info_dict': {
- 'id': 'HDN7G5UMs',
- 'ext': 'mp4',
- 'title': 'Nvidia GeForce GTX Titan X - The Best Video Card on the Market?',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20150317',
- 'description': 'Did Nvidia pull out all the stops on the Titan X, or does its performance leave something to be desired?',
- 'timestamp': int,
- },
- }, {
- 'url': 'https://www.vessel.com/embed/G4U7gUJ6a?w=615&h=346',
- 'only_matching': True,
- }, {
- 'url': 'https://www.vessel.com/videos/F01_dsLj1',
- 'only_matching': True,
- }, {
- 'url': 'https://www.vessel.com/videos/RRX-sir-J',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?vessel\.com/embed/[0-9a-zA-Z-_]+.*?)\1',
- webpage)]
-
- @staticmethod
- def make_json_request(url, data):
- payload = json.dumps(data).encode('utf-8')
- req = sanitized_Request(url, payload)
- req.add_header('Content-Type', 'application/json; charset=utf-8')
- return req
-
- @staticmethod
- def find_assets(data, asset_type, asset_id=None):
- for asset in data.get('assets', []):
- if not asset.get('type') == asset_type:
- continue
- elif asset_id is not None and not asset.get('id') == asset_id:
- continue
- else:
- yield asset
-
- def _check_access_rights(self, data):
- access_info = data.get('__view', {})
- if not access_info.get('allow_access', True):
- err_code = access_info.get('error_code') or ''
- if err_code == 'ITEM_PAID_ONLY':
- raise ExtractorError(
- 'This video requires subscription.', expected=True)
- else:
- raise ExtractorError(
- 'Access to this content is restricted. (%s said: %s)' % (self.IE_NAME, err_code), expected=True)
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
- self.report_login()
- data = {
- 'client_id': 'web',
- 'type': 'password',
- 'user_key': username,
- 'password': password,
- }
- login_request = VesselIE.make_json_request(self._LOGIN_URL, data)
- self._download_webpage(login_request, None, False, 'Wrong login info')
-
- def _real_initialize(self):
- self._login()
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- data = self._parse_json(self._search_regex(
- r'App\.bootstrapData\((.*?)\);', webpage, 'data'), video_id)
- asset_id = data['model']['data']['id']
-
- req = VesselIE.make_json_request(
- self._API_URL_TEMPLATE % asset_id, {'client': 'web'})
- data = self._download_json(req, video_id)
- video_asset_id = data.get('main_video_asset')
-
- self._check_access_rights(data)
-
- try:
- video_asset = next(
- VesselIE.find_assets(data, 'video', asset_id=video_asset_id))
- except StopIteration:
- raise ExtractorError('No video assets found')
-
- formats = []
- for f in video_asset.get('sources', []):
- location = f.get('location')
- if not location:
- continue
- name = f.get('name')
- if name == 'hls-index':
- formats.extend(self._extract_m3u8_formats(
- location, video_id, ext='mp4',
- entry_protocol='m3u8_native', m3u8_id='m3u8', fatal=False))
- elif name == 'dash-index':
- formats.extend(self._extract_mpd_formats(
- location, video_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'format_id': name,
- 'tbr': f.get('bitrate'),
- 'height': f.get('height'),
- 'width': f.get('width'),
- 'url': location,
- })
- self._sort_formats(formats)
-
- thumbnails = []
- for im_asset in VesselIE.find_assets(data, 'image'):
- thumbnails.append({
- 'url': im_asset['location'],
- 'width': im_asset.get('width', 0),
- 'height': im_asset.get('height', 0),
- })
-
- return {
- 'id': video_id,
- 'title': data['title'],
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'description': data.get('short_description'),
- 'duration': data.get('duration'),
- 'comment_count': data.get('comment_count'),
- 'like_count': data.get('like_count'),
- 'view_count': data.get('view_count'),
- 'timestamp': parse_iso8601(data.get('released_at')),
- }
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index 890a149ea..4ea9f1b4b 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -34,6 +34,7 @@ class VevoIE(VevoBaseIE):
(?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?|
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
+ https?://embed\.vevo\.com/.*?[?&]isrc=|
vevo:)
(?P<id>[^&?#]+)'''
@@ -144,6 +145,9 @@ class VevoIE(VevoBaseIE):
# Geo-restricted to Netherlands/Germany
'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
'only_matching': True,
+ }, {
+ 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
+ 'only_matching': True,
}]
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
@@ -200,7 +204,7 @@ class VevoIE(VevoBaseIE):
fatal=False)
# Some videos are only available via webpage (e.g.
- # https://github.com/rg3/youtube-dl/issues/9366)
+ # https://github.com/ytdl-org/youtube-dl/issues/9366)
if not video_versions:
webpage = self._download_webpage(url, video_id)
json_data = self._extract_json(webpage, video_id)
@@ -275,8 +279,8 @@ class VevoIE(VevoBaseIE):
genres = video_info.get('genres')
genre = (
- genres[0] if genres and isinstance(genres, list) and
- isinstance(genres[0], compat_str) else None)
+ genres[0] if genres and isinstance(genres, list)
+ and isinstance(genres[0], compat_str) else None)
is_explicit = video_info.get('isExplicit')
if is_explicit is True:
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
index 538258617..e37499512 100644
--- a/youtube_dl/extractor/vice.py
+++ b/youtube_dl/extractor/vice.py
@@ -1,35 +1,50 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-import time
+import functools
import hashlib
import json
import random
+import re
+import time
from .adobepass import AdobePassIE
-from .youtube import YoutubeIE
from .common import InfoExtractor
+from .youtube import YoutubeIE
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
+ clean_html,
ExtractorError,
int_or_none,
+ OnDemandPagedList,
parse_age_limit,
str_or_none,
try_get,
)
-class ViceIE(AdobePassIE):
+class ViceBaseIE(InfoExtractor):
+ def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
+ return self._download_json(
+ 'https://video.vice.com/api/v1/graphql', resource_id, query={
+ 'query': '''{
+ %s(locale: "%s", %s: "%s"%s) {
+ %s
+ }
+}''' % (resource, locale, resource_key, resource_id, args, fields),
+ })['data'][resource]
+
+
+class ViceIE(ViceBaseIE, AdobePassIE):
IE_NAME = 'vice'
- _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
+ _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
_TESTS = [{
'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
'info_dict': {
- 'id': '5e647f0125e145c9aef2069412c0cbde',
+ 'id': '58c69e38a55424f1227dc3f7',
'ext': 'mp4',
'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
@@ -43,17 +58,16 @@ class ViceIE(AdobePassIE):
# m3u8 download
'skip_download': True,
},
- 'add_ie': ['UplynkPreplay'],
}, {
# geo restricted to US
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'info_dict': {
- 'id': '930c0ad1f47141cc955087eecaddb0e2',
+ 'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4',
- 'uploader': 'waypoint',
+ 'uploader': 'vice',
'title': 'The Signal From Tölva',
'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
- 'uploader_id': '57f7d621e05ca860fa9ccaf9',
+ 'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1477941983,
'upload_date': '20161031',
},
@@ -61,15 +75,14 @@ class ViceIE(AdobePassIE):
# m3u8 download
'skip_download': True,
},
- 'add_ie': ['UplynkPreplay'],
}, {
'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
'info_dict': {
'id': '581b12b60a0e1f4c0fb6ea2f',
'ext': 'mp4',
'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
- 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
- 'uploader': 'VICE',
+ 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
+ 'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1485368119,
'upload_date': '20170125',
@@ -78,9 +91,7 @@ class ViceIE(AdobePassIE):
'params': {
# AES-encrypted m3u8
'skip_download': True,
- 'proxy': '127.0.0.1:8118',
},
- 'add_ie': ['UplynkPreplay'],
}, {
'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
'only_matching': True,
@@ -94,12 +105,11 @@ class ViceIE(AdobePassIE):
'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
'only_matching': True,
}]
- _PREPLAY_HOST = 'vms.vice'
@staticmethod
def _extract_urls(webpage):
return re.findall(
- r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
+ r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
webpage)
@staticmethod
@@ -110,31 +120,16 @@ class ViceIE(AdobePassIE):
def _real_extract(self, url):
locale, video_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(
- 'https://video.vice.com/%s/embed/%s' % (locale, video_id),
- video_id)
-
- video = self._parse_json(
- self._search_regex(
- r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
- 'app state'), video_id)['video']
- video_id = video.get('vms_id') or video.get('id') or video_id
- title = video['title']
- is_locked = video.get('locked')
+ video = self._call_api('videos', 'id', video_id, locale, '''body
+ locked
+ rating
+ thumbnail_url
+ title''')[0]
+ title = video['title'].strip()
rating = video.get('rating')
- thumbnail = video.get('thumbnail_url')
- duration = int_or_none(video.get('duration'))
- series = try_get(
- video, lambda x: x['episode']['season']['show']['title'],
- compat_str)
- episode_number = try_get(
- video, lambda x: x['episode']['episode_number'])
- season_number = try_get(
- video, lambda x: x['episode']['season']['season_number'])
- uploader = None
query = {}
- if is_locked:
+ if video.get('locked'):
resource = self._get_mvpd_resource(
'VICELAND', title, video_id, rating)
query['tvetoken'] = self._extract_mvpd_auth(
@@ -149,18 +144,14 @@ class ViceIE(AdobePassIE):
query.update({
'exp': exp,
'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
- '_ad_blocked': None,
- '_ad_unit': '',
- '_debug': '',
+ 'skipadstitching': 1,
'platform': 'desktop',
'rn': random.randint(10000, 100000),
- 'fbprebidtoken': '',
})
try:
- host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json(
- 'https://%s.com/%s/video/preplay/%s' % (host, locale, video_id),
+ 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
@@ -171,85 +162,94 @@ class ViceIE(AdobePassIE):
raise
video_data = preplay['video']
- base = video_data['base']
- uplynk_preplay_url = preplay['preplayURL']
- episode = video_data.get('episode', {})
- channel = video_data.get('channel', {})
+ formats = self._extract_m3u8_formats(
+ preplay['playURL'], video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+ episode = video_data.get('episode') or {}
+ channel = video_data.get('channel') or {}
+ season = video_data.get('season') or {}
subtitles = {}
- cc_url = preplay.get('ccURL')
- if cc_url:
- subtitles['en'] = [{
+ for subtitle in preplay.get('subtitleURLs', []):
+ cc_url = subtitle.get('url')
+ if not cc_url:
+ continue
+ language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
+ subtitles.setdefault(language_code, []).append({
'url': cc_url,
- }]
+ })
return {
- '_type': 'url_transparent',
- 'url': uplynk_preplay_url,
+ 'formats': formats,
'id': video_id,
'title': title,
- 'description': base.get('body') or base.get('display_body'),
- 'thumbnail': thumbnail,
- 'duration': int_or_none(video_data.get('video_duration')) or duration,
+ 'description': clean_html(video.get('body')),
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': int_or_none(video_data.get('video_duration')),
'timestamp': int_or_none(video_data.get('created_at'), 1000),
- 'age_limit': parse_age_limit(video_data.get('video_rating')),
- 'series': video_data.get('show_title') or series,
- 'episode_number': int_or_none(episode.get('episode_number') or episode_number),
+ 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
+ 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
+ 'episode_number': int_or_none(episode.get('episode_number')),
'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
- 'season_number': int_or_none(season_number),
- 'season_id': str_or_none(episode.get('season_id')),
- 'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
+ 'season_number': int_or_none(season.get('season_number')),
+ 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
+ 'uploader': channel.get('name'),
'uploader_id': str_or_none(channel.get('id')),
'subtitles': subtitles,
- 'ie_key': 'UplynkPreplay',
}
-class ViceShowIE(InfoExtractor):
+class ViceShowIE(ViceBaseIE):
IE_NAME = 'vice:show'
- _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
-
- _TEST = {
- 'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
+ _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
+ _PAGE_SIZE = 25
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
'info_dict': {
- 'id': 'fuck-thats-delicious-2',
- 'title': "Fuck, That's Delicious",
- 'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
+ 'id': '57a2040c8cb727dec794c901',
+ 'title': 'F*ck, That’s Delicious',
+ 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
},
- 'playlist_count': 17,
- }
+ 'playlist_mincount': 64,
+ }, {
+ 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
+ 'only_matching': True,
+ }]
- def _real_extract(self, url):
- show_id = self._match_id(url)
- webpage = self._download_webpage(url, show_id)
+ def _fetch_page(self, locale, show_id, page):
+ videos = self._call_api('videos', 'show_id', show_id, locale, '''body
+ id
+ url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
+ for video in videos:
+ yield self.url_result(
+ video['url'], ViceIE.ie_key(), video.get('id'))
- entries = [
- self.url_result(video_url, ViceIE.ie_key())
- for video_url, _ in re.findall(
- r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
- % ViceIE._VALID_URL, webpage)]
+ def _real_extract(self, url):
+ locale, display_id = re.match(self._VALID_URL, url).groups()
+ show = self._call_api('shows', 'slug', display_id, locale, '''dek
+ id
+ title''')[0]
+ show_id = show['id']
- title = self._search_regex(
- r'<title>(.+?)</title>', webpage, 'title', default=None)
- if title:
- title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
- description = self._html_search_meta(
- 'description', webpage, 'description')
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, locale, show_id),
+ self._PAGE_SIZE)
- return self.playlist_result(entries, show_id, title, description)
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('dek'))
-class ViceArticleIE(InfoExtractor):
+class ViceArticleIE(ViceBaseIE):
IE_NAME = 'vice:article'
- _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
+ _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
'info_dict': {
- 'id': '41eae2a47b174a1398357cec55f1f6fc',
+ 'id': '58dc0a3dee202d2a0ccfcbd8',
'ext': 'mp4',
- 'title': 'Mormon War on Porn ',
- 'description': 'md5:6394a8398506581d0346b9ab89093fef',
+ 'title': 'Mormon War on Porn',
+ 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1491883129,
@@ -260,10 +260,10 @@ class ViceArticleIE(InfoExtractor):
# AES-encrypted m3u8
'skip_download': True,
},
- 'add_ie': ['UplynkPreplay'],
+ 'add_ie': [ViceIE.ie_key()],
}, {
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
- 'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
+ 'md5': '13010ee0bc694ea87ec40724397c2349',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
@@ -273,15 +273,15 @@ class ViceArticleIE(InfoExtractor):
'uploader_id': 'MotherboardTV',
'upload_date': '20140529',
},
- 'add_ie': ['Youtube'],
+ 'add_ie': [YoutubeIE.ie_key()],
}, {
'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': {
- 'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
+ 'id': '57f41d3556a0a80f54726060',
'ext': 'mp4',
'title': "Making The World's First Male Sex Doll",
- 'description': 'md5:916078ef0e032d76343116208b6cc2c4',
+ 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
'uploader': 'vice',
'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1476919911,
@@ -290,6 +290,7 @@ class ViceArticleIE(InfoExtractor):
},
'params': {
'skip_download': True,
+ 'format': 'bestvideo',
},
'add_ie': [ViceIE.ie_key()],
}, {
@@ -301,14 +302,11 @@ class ViceArticleIE(InfoExtractor):
}]
def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
+ locale, display_id = re.match(self._VALID_URL, url).groups()
- prefetch_data = self._parse_json(self._search_regex(
- r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
- webpage, 'app state'), display_id)['pageData']
- body = prefetch_data['body']
+ article = self._call_api('articles', 'slug', display_id, locale, '''body
+ embed_code''')[0]
+ body = article['body']
def _url_res(video_url, ie_key):
return {
@@ -318,7 +316,7 @@ class ViceArticleIE(InfoExtractor):
'ie_key': ie_key,
}
- vice_url = ViceIE._extract_url(webpage)
+ vice_url = ViceIE._extract_url(body)
if vice_url:
return _url_res(vice_url, ViceIE.ie_key())
@@ -334,6 +332,6 @@ class ViceArticleIE(InfoExtractor):
video_url = self._html_search_regex(
r'data-video-url="([^"]+)"',
- prefetch_data['embed_code'], 'video URL')
+ article['embed_code'], 'video URL')
return _url_res(video_url, ViceIE.ie_key())
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dl/extractor/viddler.py
index 67808e7e6..642358433 100644
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dl/extractor/viddler.py
@@ -1,19 +1,16 @@
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_urlparse,
-)
from ..utils import (
float_or_none,
int_or_none,
- sanitized_Request,
)
class ViddlerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)(?:.+?\bsecret=(\d+))?'
_TESTS = [{
'url': 'http://www.viddler.com/v/43903784',
'md5': '9eee21161d2c7f5b39690c3e325fab2f',
@@ -78,23 +75,18 @@ class ViddlerIE(InfoExtractor):
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
+ video_id, secret = re.match(self._VALID_URL, url).groups()
query = {
'video_id': video_id,
'key': 'v0vhrt7bg2xq1vyxhkct',
}
-
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- secret = qs.get('secret', [None])[0]
if secret:
query['secret'] = secret
- headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'}
- request = sanitized_Request(
- 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s'
- % compat_urllib_parse_urlencode(query), None, headers)
- data = self._download_json(request, video_id)['video']
+ data = self._download_json(
+ 'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json',
+ video_id, headers={'Referer': url}, query=query)['video']
formats = []
for filed in data['files']:
diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py
index a19411a05..fe70db713 100644
--- a/youtube_dl/extractor/videodetective.py
+++ b/youtube_dl/extractor/videodetective.py
@@ -1,7 +1,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..compat import compat_urlparse
from .internetvideoarchive import InternetVideoArchiveIE
@@ -13,7 +12,7 @@ class VideoDetectiveIE(InfoExtractor):
'info_dict': {
'id': '194487',
'ext': 'mp4',
- 'title': 'KICK-ASS 2',
+ 'title': 'Kick-Ass 2',
'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
},
'params': {
@@ -24,7 +23,7 @@ class VideoDetectiveIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- og_video = self._og_search_video_url(webpage)
- query = compat_urlparse.urlparse(og_video).query
- return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key())
+ query = 'customerid=69249&publishedid=' + video_id
+ return self.url_result(
+ InternetVideoArchiveIE._build_json_url(query),
+ ie=InternetVideoArchiveIE.ie_key())
diff --git a/youtube_dl/extractor/videomega.py b/youtube_dl/extractor/videomega.py
deleted file mode 100644
index c02830ddd..000000000
--- a/youtube_dl/extractor/videomega.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- decode_packed_codes,
- sanitized_Request,
-)
-
-
-class VideoMegaIE(InfoExtractor):
- _VALID_URL = r'(?:videomega:|https?://(?:www\.)?videomega\.tv/(?:(?:view|iframe|cdn)\.php)?\?ref=)(?P<id>[A-Za-z0-9]+)'
- _TESTS = [{
- 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA',
- 'md5': 'cc1920a58add3f05c6a93285b84fb3aa',
- 'info_dict': {
- 'id': 'AOSQBJYKIDDIKYJBQSOA',
- 'ext': 'mp4',
- 'title': '1254207',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'http://videomega.tv/cdn.php?ref=AOSQBJYKIDDIKYJBQSOA&width=1070&height=600',
- 'only_matching': True,
- }, {
- 'url': 'http://videomega.tv/view.php?ref=090051111052065112106089103052052103089106112065052111051090',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- iframe_url = 'http://videomega.tv/cdn.php?ref=%s' % video_id
- req = sanitized_Request(iframe_url)
- req.add_header('Referer', url)
- req.add_header('Cookie', 'noadvtday=0')
- webpage = self._download_webpage(req, video_id)
-
- title = self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title')
- title = re.sub(
- r'(?:^[Vv]ideo[Mm]ega\.tv\s-\s*|\s*-\svideomega\.tv$)', '', title)
- thumbnail = self._search_regex(
- r'<video[^>]+?poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
-
- real_codes = decode_packed_codes(webpage)
- video_url = self._search_regex(
- r'"src"\s*,\s*"([^"]+)"', real_codes, 'video URL')
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'http_headers': {
- 'Referer': iframe_url,
- },
- }
diff --git a/youtube_dl/extractor/videomore.py b/youtube_dl/extractor/videomore.py
index 9b56630de..e3eda3327 100644
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dl/extractor/videomore.py
@@ -4,8 +4,14 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
int_or_none,
+ orderedSet,
+ parse_duration,
+ str_or_none,
+ unified_strdate,
+ url_or_none,
xpath_element,
xpath_text,
)
@@ -13,7 +19,19 @@ from ..utils import (
class VideomoreIE(InfoExtractor):
IE_NAME = 'videomore'
- _VALID_URL = r'videomore:(?P<sid>\d+)$|https?://videomore\.ru/(?:(?:embed|[^/]+/[^/]+)/|[^/]+\?.*\btrack_id=)(?P<id>\d+)(?:[/?#&]|\.(?:xml|json)|$)'
+ _VALID_URL = r'''(?x)
+ videomore:(?P<sid>\d+)$|
+ https?://(?:player\.)?videomore\.ru/
+ (?:
+ (?:
+ embed|
+ [^/]+/[^/]+
+ )/|
+ [^/]*\?.*?\btrack_id=
+ )
+ (?P<id>\d+)
+ (?:[/?#&]|\.(?:xml|json)|$)
+ '''
_TESTS = [{
'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617',
'md5': '44455a346edc0d509ac5b5a5b531dc35',
@@ -79,6 +97,9 @@ class VideomoreIE(InfoExtractor):
}, {
'url': 'videomore:367617',
'only_matching': True,
+ }, {
+ 'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=',
+ 'only_matching': True,
}]
@staticmethod
@@ -136,7 +157,7 @@ class VideomoreIE(InfoExtractor):
class VideomoreVideoIE(InfoExtractor):
IE_NAME = 'videomore:video'
- _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)[/?#&]*$'
+ _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
# single video with og:video:iframe
'url': 'http://videomore.ru/elki_3',
@@ -176,6 +197,9 @@ class VideomoreVideoIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so',
+ 'only_matching': True,
}]
@classmethod
@@ -196,13 +220,16 @@ class VideomoreVideoIE(InfoExtractor):
r'track-id=["\'](\d+)',
r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id')
video_url = 'videomore:%s' % video_id
+ else:
+ video_id = None
- return self.url_result(video_url, VideomoreIE.ie_key())
+ return self.url_result(
+ video_url, ie=VideomoreIE.ie_key(), video_id=video_id)
class VideomoreSeasonIE(InfoExtractor):
IE_NAME = 'videomore:season'
- _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)[/?#&]*$'
+ _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$'
_TESTS = [{
'url': 'http://videomore.ru/molodezhka/sezon_promo',
'info_dict': {
@@ -210,8 +237,16 @@ class VideomoreSeasonIE(InfoExtractor):
'title': 'Молодежка Промо',
},
'playlist_mincount': 12,
+ }, {
+ 'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so',
+ 'only_matching': True,
}]
+ @classmethod
+ def suitable(cls, url):
+ return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url))
+ else super(VideomoreSeasonIE, cls).suitable(url))
+
def _real_extract(self, url):
display_id = self._match_id(url)
@@ -219,9 +254,54 @@ class VideomoreSeasonIE(InfoExtractor):
title = self._og_search_title(webpage)
- entries = [
- self.url_result(item) for item in re.findall(
- r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
- % display_id, webpage)]
+ data = self._parse_json(
+ self._html_search_regex(
+ r'\bclass=["\']seasons-tracks["\'][^>]+\bdata-custom-data=(["\'])(?P<value>{.+?})\1',
+ webpage, 'data', default='{}', group='value'),
+ display_id, fatal=False)
+
+ entries = []
+
+ if data:
+ episodes = data.get('episodes')
+ if isinstance(episodes, list):
+ for ep in episodes:
+ if not isinstance(ep, dict):
+ continue
+ ep_id = int_or_none(ep.get('id'))
+ ep_url = url_or_none(ep.get('url'))
+ if ep_id:
+ e = {
+ 'url': 'videomore:%s' % ep_id,
+ 'id': compat_str(ep_id),
+ }
+ elif ep_url:
+ e = {'url': ep_url}
+ else:
+ continue
+ e.update({
+ '_type': 'url',
+ 'ie_key': VideomoreIE.ie_key(),
+ 'title': str_or_none(ep.get('title')),
+ 'thumbnail': url_or_none(ep.get('image')),
+ 'duration': parse_duration(ep.get('duration')),
+ 'episode_number': int_or_none(ep.get('number')),
+ 'upload_date': unified_strdate(ep.get('date')),
+ })
+ entries.append(e)
+
+ if not entries:
+ entries = [
+ self.url_result(
+ 'videomore:%s' % video_id, ie=VideomoreIE.ie_key(),
+ video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r':(?:id|key)=["\'](\d+)["\']', webpage))]
+
+ if not entries:
+ entries = [
+ self.url_result(item) for item in re.findall(
+ r'<a[^>]+href="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^>]+class="widget-item-desc"'
+ % display_id, webpage)]
return self.playlist_result(entries, display_id, title)
diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py
deleted file mode 100644
index cf690d7b0..000000000
--- a/youtube_dl/extractor/videopremium.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import random
-
-from .common import InfoExtractor
-
-
-class VideoPremiumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
- _TEST = {
- 'url': 'http://videopremium.tv/4w7oadjsf156',
- 'info_dict': {
- 'id': '4w7oadjsf156',
- 'ext': 'f4v',
- 'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Test file has been deleted.',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage_url = 'http://videopremium.tv/' + video_id
- webpage = self._download_webpage(webpage_url, video_id)
-
- if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
- # Download again, we need a cookie
- webpage = self._download_webpage(
- webpage_url, video_id,
- note='Downloading webpage again (with cookie)')
-
- video_title = self._html_search_regex(
- r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
-
- return {
- 'id': video_id,
- 'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16),
- 'play_path': 'mp4:%s.f4v' % video_id,
- 'page_url': 'http://videopremium.tv/' + video_id,
- 'player_url': 'http://videopremium.tv/uplayer/uppod.swf',
- 'ext': 'f4v',
- 'title': video_title,
- }
diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py
index c43d1a1e8..d6b92b1c8 100644
--- a/youtube_dl/extractor/viewlift.py
+++ b/youtube_dl/extractor/viewlift.py
@@ -1,27 +1,62 @@
from __future__ import unicode_literals
-import base64
+import json
import re
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
- clean_html,
- determine_ext,
int_or_none,
- js_to_json,
parse_age_limit,
- parse_duration,
)
class ViewLiftBaseIE(InfoExtractor):
- _DOMAINS_REGEX = r'(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
+ _API_BASE = 'https://prod-api.viewlift.com/'
+ _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
+ _SITE_MAP = {
+ 'ftfnext': 'lax',
+ 'funnyforfree': 'snagfilms',
+ 'hoichoi': 'hoichoitv',
+ 'kiddovid': 'snagfilms',
+ 'laxsportsnetwork': 'lax',
+ 'legapallacanestro': 'lnp',
+ 'marquee': 'marquee-tv',
+ 'monumentalsportsnetwork': 'monumental-network',
+ 'moviespree': 'bingeflix',
+ 'pflmma': 'pfl',
+ 'snagxtreme': 'snagfilms',
+ 'theidentitytb': 'tampabay',
+ 'vayafilm': 'snagfilms',
+ }
+ _TOKENS = {}
+
+ def _call_api(self, site, path, video_id, query):
+ token = self._TOKENS.get(site)
+ if not token:
+ token_query = {'site': site}
+ email, password = self._get_login_info(netrc_machine=site)
+ if email:
+ resp = self._download_json(
+ self._API_BASE + 'identity/signin', video_id,
+ 'Logging in', query=token_query, data=json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode())
+ else:
+ resp = self._download_json(
+ self._API_BASE + 'identity/anonymous-token', video_id,
+ 'Downloading authorization token', query=token_query)
+ self._TOKENS[site] = token = resp['authorizationToken']
+ return self._download_json(
+ self._API_BASE + path, video_id,
+ headers={'Authorization': token}, query=query)
class ViewLiftEmbedIE(ViewLiftBaseIE):
- _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+ IE_NAME = 'viewlift:embed'
+ _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93',
@@ -29,6 +64,9 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'id': '74849a00-85a9-11e1-9660-123139220831',
'ext': 'mp4',
'title': '#whilewewatch',
+ 'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
+ 'timestamp': 1334350096,
+ 'upload_date': '20120413',
}
}, {
# invalid labels, 360p is better that 480p
@@ -38,7 +76,8 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
'ext': 'mp4',
'title': 'Life in Limbo',
- }
+ },
+ 'skip': 'The video does not exist',
}, {
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
'only_matching': True,
@@ -53,67 +92,68 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
return mobj.group('url')
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- if '>This film is not playable in your area.<' in webpage:
- raise ExtractorError(
- 'Film %s is not playable in your area.' % video_id, expected=True)
+ domain, film_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ try:
+ content_data = self._call_api(
+ site, 'entitlement/video/status', film_id, {
+ 'id': film_id
+ })['video']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
+ if error_message == 'User does not have a valid subscription or has not purchased this content.':
+ self.raise_login_required()
+ raise ExtractorError(error_message, expected=True)
+ raise
+ gist = content_data['gist']
+ title = gist['title']
+ video_assets = content_data['streamingInfo']['videoAssets']
formats = []
- has_bitrate = False
- sources = self._parse_json(self._search_regex(
- r'(?s)sources:\s*(\[.+?\]),', webpage,
- 'sources', default='[]'), video_id, js_to_json)
- for source in sources:
- file_ = source.get('file')
- if not file_:
+ mpeg_video_assets = video_assets.get('mpeg') or []
+ for video_asset in mpeg_video_assets:
+ video_asset_url = video_asset.get('url')
+ if not video_asset:
continue
- type_ = source.get('type')
- ext = determine_ext(file_)
- format_id = source.get('label') or ext
- if all(v in ('m3u8', 'hls') for v in (type_, ext)):
- formats.extend(self._extract_m3u8_formats(
- file_, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- bitrate = int_or_none(self._search_regex(
- [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
- file_, 'bitrate', default=None))
- if not has_bitrate and bitrate:
- has_bitrate = True
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]$', format_id, 'height', default=None))
- formats.append({
- 'url': file_,
- 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
- 'tbr': bitrate,
- 'height': height,
- })
- if not formats:
- hls_url = self._parse_json(self._search_regex(
- r'filmInfo\.src\s*=\s*({.+?});',
- webpage, 'src'), video_id, js_to_json)['src']
- formats = self._extract_m3u8_formats(
- hls_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False)
- field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
- self._sort_formats(formats, field_preference)
-
- title = self._search_regex(
- [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
- webpage, 'title')
-
- return {
- 'id': video_id,
+ bitrate = int_or_none(video_asset.get('bitrate'))
+ height = int_or_none(self._search_regex(
+ r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+ 'height', default=None))
+ formats.append({
+ 'url': video_asset_url,
+ 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+ 'tbr': bitrate,
+ 'height': height,
+ 'vcodec': video_asset.get('codec'),
+ })
+
+ hls_url = video_assets.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+ info = {
+ 'id': film_id,
'title': title,
+ 'description': gist.get('description'),
+ 'thumbnail': gist.get('videoImageUrl'),
+ 'duration': int_or_none(gist.get('runtime')),
+ 'age_limit': parse_age_limit(content_data.get('parentalRating')),
+ 'timestamp': int_or_none(gist.get('publishDate'), 1000),
'formats': formats,
}
+ for k in ('categories', 'tags'):
+ info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+ return info
class ViewLiftIE(ViewLiftBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
+ IE_NAME = 'viewlift'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942',
@@ -128,7 +168,7 @@ class ViewLiftIE(ViewLiftBaseIE):
'categories': 'mincount:3',
'age_limit': 14,
'upload_date': '20150421',
- 'timestamp': 1429656819,
+ 'timestamp': 1429656820,
}
}, {
'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
@@ -141,11 +181,30 @@ class ViewLiftIE(ViewLiftBaseIE):
'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 979,
- 'categories': 'mincount:2',
'timestamp': 1399478279,
'upload_date': '20140507',
}
}, {
+ 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
+ 'info_dict': {
+ 'id': '00000148-7b53-de26-a9fb-fbf306f70020',
+ 'display_id': 'augie_alone/s_2_ep_12_love',
+ 'ext': 'mp4',
+ 'title': 'S. 2 Ep. 12 - Love',
+ 'description': 'Augie finds love.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 107,
+ 'upload_date': '20141012',
+ 'timestamp': 1413129540,
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://main.snagfilms.com/films/title/the_freebie',
+ 'only_matching': True,
+ }, {
# Film is not playable in your area.
'url': 'http://www.snagfilms.com/films/title/inside_mecca',
'only_matching': True,
@@ -160,107 +219,32 @@ class ViewLiftIE(ViewLiftBaseIE):
# Was once Kaltura embed
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
'only_matching': True,
+ }, {
+ 'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
-
- webpage = self._download_webpage(url, display_id)
-
- if ">Sorry, the Film you're looking for is not available.<" in webpage:
- raise ExtractorError(
- 'Film %s is not available.' % display_id, expected=True)
-
- initial_store_state = self._search_regex(
- r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
- webpage, 'Initial Store State', default=None)
- if initial_store_state:
- modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
- initial_store_state).decode()), display_id)['page']['data']['modules']
- content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
- gist = content_data['gist']
- film_id = gist['id']
- title = gist['title']
- video_assets = content_data['streamingInfo']['videoAssets']
-
- formats = []
- mpeg_video_assets = video_assets.get('mpeg') or []
- for video_asset in mpeg_video_assets:
- video_asset_url = video_asset.get('url')
- if not video_asset:
- continue
- bitrate = int_or_none(video_asset.get('bitrate'))
- height = int_or_none(self._search_regex(
- r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
- 'height', default=None))
- formats.append({
- 'url': video_asset_url,
- 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
- 'tbr': bitrate,
- 'height': height,
- 'vcodec': video_asset.get('codec'),
- })
-
- hls_url = video_assets.get('hls')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+ @classmethod
+ def suitable(cls, url):
+ return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
- info = {
- 'id': film_id,
- 'display_id': display_id,
- 'title': title,
- 'description': gist.get('description'),
- 'thumbnail': gist.get('videoImageUrl'),
- 'duration': int_or_none(gist.get('runtime')),
- 'age_limit': parse_age_limit(content_data.get('parentalRating')),
- 'timestamp': int_or_none(gist.get('publishDate'), 1000),
- 'formats': formats,
- }
- for k in ('categories', 'tags'):
- info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
- return info
- else:
- film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
-
- snag = self._parse_json(
- self._search_regex(
- r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
- display_id)
-
- for item in snag:
- if item.get('data', {}).get('film', {}).get('id') == film_id:
- data = item['data']['film']
- title = data['title']
- description = clean_html(data.get('synopsis'))
- thumbnail = data.get('image')
- duration = int_or_none(data.get('duration') or data.get('runtime'))
- categories = [
- category['title'] for category in data.get('categories', [])
- if category.get('title')]
- break
- else:
- title = self._search_regex(
- r'itemprop="title">([^<]+)<', webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
- webpage, 'description', default=None) or self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- duration = parse_duration(self._search_regex(
- r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
- webpage, 'duration', fatal=False))
- categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
-
- return {
- '_type': 'url_transparent',
- 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
- 'id': film_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'categories': categories,
- 'ie_key': 'ViewLiftEmbed',
- }
+ def _real_extract(self, url):
+ domain, path, display_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ modules = self._call_api(
+ site, 'content/pages', display_id, {
+ 'includeContent': 'true',
+ 'moduleOffset': 1,
+ 'path': path,
+ 'site': site,
+ })['modules']
+ film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+ 'id': film_id,
+ 'display_id': display_id,
+ 'ie_key': 'ViewLiftEmbed',
+ }
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
deleted file mode 100644
index d5d5b4c69..000000000
--- a/youtube_dl/extractor/viewster.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse_unquote,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_iso8601,
- sanitized_Request,
- HEADRequest,
- url_basename,
-)
-
-
-class ViewsterIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
- _TESTS = [{
- # movie, Type=Movie
- 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
- 'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
- 'info_dict': {
- 'id': '1140-11855-000',
- 'ext': 'mp4',
- 'title': 'The listening Project',
- 'description': 'md5:bac720244afd1a8ea279864e67baa071',
- 'timestamp': 1214870400,
- 'upload_date': '20080701',
- 'duration': 4680,
- },
- }, {
- # series episode, Type=Episode
- 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
- 'md5': '9243079a8531809efe1b089db102c069',
- 'info_dict': {
- 'id': '1284-19427-001',
- 'ext': 'mp4',
- 'title': 'The World and a Wall',
- 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
- 'timestamp': 1428192000,
- 'upload_date': '20150405',
- 'duration': 1500,
- },
- }, {
- # serie, Type=Serie
- 'url': 'http://www.viewster.com/serie/1303-19426-000/',
- 'info_dict': {
- 'id': '1303-19426-000',
- 'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
- 'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
- },
- 'playlist_count': 13,
- }, {
- # unfinished serie, no Type
- 'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
- 'info_dict': {
- 'id': '1284-19427-000',
- 'title': 'Baby Steps—Season 2',
- 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
- },
- 'playlist_mincount': 16,
- }, {
- # geo restricted series
- 'url': 'https://www.viewster.com/serie/1280-18794-002/',
- 'only_matching': True,
- }, {
- # geo restricted video
- 'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
- 'only_matching': True,
- }]
-
- _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
-
- def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
- request = sanitized_Request(url)
- request.add_header('Accept', self._ACCEPT_HEADER)
- request.add_header('Auth-token', self._AUTH_TOKEN)
- return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- # Get 'api_token' cookie
- self._request_webpage(
- HEADRequest('http://www.viewster.com/'),
- video_id, headers=self.geo_verification_headers())
- cookies = self._get_cookies('http://www.viewster.com/')
- self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
-
- info = self._download_json(
- 'https://public-api.viewster.com/search/%s' % video_id,
- video_id, 'Downloading entry JSON')
-
- entry_id = info.get('Id') or info['id']
-
- # unfinished serie has no Type
- if info.get('Type') in ('Serie', None):
- try:
- episodes = self._download_json(
- 'https://public-api.viewster.com/series/%s/episodes' % entry_id,
- video_id, 'Downloading series JSON')
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- self.raise_geo_restricted()
- else:
- raise
- entries = [
- self.url_result(
- 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
- for episode in episodes]
- title = (info.get('Title') or info['Synopsis']['Title']).strip()
- description = info.get('Synopsis', {}).get('Detailed')
- return self.playlist_result(entries, video_id, title, description)
-
- formats = []
- for language_set in info.get('LanguageSets', []):
- manifest_url = None
- m3u8_formats = []
- audio = language_set.get('Audio') or ''
- subtitle = language_set.get('Subtitle') or ''
- base_format_id = audio
- if subtitle:
- base_format_id += '-%s' % subtitle
-
- def concat(suffix, sep='-'):
- return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
-
- for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
- media = self._download_json(
- 'https://public-api.viewster.com/movies/%s/video' % entry_id,
- video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
- 'mediaType': media_type,
- 'language': audio,
- 'subtitle': subtitle,
- })
- if not media:
- continue
- video_url = media.get('Uri')
- if not video_url:
- continue
- ext = determine_ext(video_url)
- if ext == 'f4m':
- manifest_url = video_url
- video_url += '&' if '?' in video_url else '?'
- video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id=concat('hds')))
- elif ext == 'm3u8':
- manifest_url = video_url
- m3u8_formats = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id=concat('hls'),
- fatal=False) # m3u8 sometimes fail
- if m3u8_formats:
- formats.extend(m3u8_formats)
- else:
- qualities_basename = self._search_regex(
- r'/([^/]+)\.csmil/',
- manifest_url, 'qualities basename', default=None)
- if not qualities_basename:
- continue
- QUALITIES_RE = r'((,\d+k)+,?)'
- qualities = self._search_regex(
- QUALITIES_RE, qualities_basename,
- 'qualities', default=None)
- if not qualities:
- continue
- qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
- qualities.sort()
- http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
- http_url_basename = url_basename(video_url)
- if m3u8_formats:
- self._sort_formats(m3u8_formats)
- m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none', m3u8_formats))
- if len(qualities) == len(m3u8_formats):
- for q, m3u8_format in zip(qualities, m3u8_formats):
- f = m3u8_format.copy()
- f.update({
- 'url': video_url.replace(http_url_basename, http_template % q),
- 'format_id': f['format_id'].replace('hls', 'http'),
- 'protocol': 'http',
- })
- formats.append(f)
- else:
- for q in qualities:
- formats.append({
- 'url': video_url.replace(http_url_basename, http_template % q),
- 'ext': 'mp4',
- 'format_id': 'http-%d' % q,
- 'tbr': q,
- })
-
- if not formats and not info.get('VODSettings'):
- self.raise_geo_restricted()
-
- self._sort_formats(formats)
-
- synopsis = info.get('Synopsis') or {}
- # Prefer title outside synopsis since it's less messy
- title = (info.get('Title') or synopsis['Title']).strip()
- description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
- duration = int_or_none(info.get('Duration'))
- timestamp = parse_iso8601(info.get('ReleaseDate'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 546de95d8..b0dcdc0e6 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -21,7 +21,7 @@ from ..utils import (
class VikiBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
_API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
- _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
+ _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
_APP = '100005a'
_APP_VERSION = '2.2.5.1428709186'
@@ -377,7 +377,7 @@ class VikiChannelIE(VikiBaseIE):
for video in page['response']:
video_id = video['id']
entries.append(self.url_result(
- 'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+ 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
if not page['pagination']['next']:
break
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 0a9239b62..421795b94 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -1,33 +1,40 @@
# coding: utf-8
from __future__ import unicode_literals
+import base64
+import functools
import json
import re
import itertools
from .common import InfoExtractor
from ..compat import (
+ compat_kwargs,
compat_HTTPError,
compat_str,
compat_urlparse,
)
from ..utils import (
+ clean_html,
determine_ext,
+ dict_get,
ExtractorError,
- InAdvancePagedList,
+ js_to_json,
int_or_none,
merge_dicts,
- NO_DEFAULT,
+ OnDemandPagedList,
+ parse_filesize,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
std_headers,
+ str_or_none,
try_get,
unified_timestamp,
unsmuggle_url,
urlencode_postdata,
+ urljoin,
unescapeHTML,
- parse_filesize,
)
@@ -96,6 +103,13 @@ class VimeoBaseInfoExtractor(InfoExtractor):
webpage, 'vuid', group='vuid')
return xsrft, vuid
+ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
+ webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
+ if vimeo_config:
+ return self._parse_json(vimeo_config, video_id)
+
def _set_vimeo_cookie(self, name, value):
self._set_cookie('vimeo.com', name, value)
@@ -106,23 +120,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
def _parse_config(self, config, video_id):
video_data = config['video']
- # Extract title
video_title = video_data['title']
-
- # Extract uploader, uploader_url and uploader_id
- video_uploader = video_data.get('owner', {}).get('name')
- video_uploader_url = video_data.get('owner', {}).get('url')
- video_uploader_id = video_uploader_url.split('/')[-1] if video_uploader_url else None
-
- # Extract video thumbnail
- video_thumbnail = video_data.get('thumbnail')
- if video_thumbnail is None:
- video_thumbs = video_data.get('thumbs')
- if video_thumbs and isinstance(video_thumbs, dict):
- _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
-
- # Extract video duration
- video_duration = int_or_none(video_data.get('duration'))
+ live_event = video_data.get('live_event') or {}
+ is_live = live_event.get('status') == 'started'
formats = []
config_files = video_data.get('files') or config['request'].get('files', {})
@@ -139,59 +139,119 @@ class VimeoBaseInfoExtractor(InfoExtractor):
'tbr': int_or_none(f.get('bitrate')),
})
+ # TODO: fix handling of 308 status code returned for live archive manifest requests
+ sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
manifest_url = cdn_data.get('url')
if not manifest_url:
continue
format_id = '%s-%s' % (files_type, cdn_name)
- if files_type == 'hls':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id=format_id,
- note='Downloading %s m3u8 information' % cdn_name,
- fatal=False))
- elif files_type == 'dash':
- mpd_pattern = r'/%s/(?:sep/)?video/' % video_id
- mpd_manifest_urls = []
- if re.search(mpd_pattern, manifest_url):
- for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
- mpd_manifest_urls.append((format_id + suffix, re.sub(
- mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url)))
- else:
- mpd_manifest_urls = [(format_id, manifest_url)]
- for f_id, m_url in mpd_manifest_urls:
+ sep_manifest_urls = []
+ if re.search(sep_pattern, manifest_url):
+ for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
+ sep_manifest_urls.append((format_id + suffix, re.sub(
+ sep_pattern, '/%s/' % repl, manifest_url)))
+ else:
+ sep_manifest_urls = [(format_id, manifest_url)]
+ for f_id, m_url in sep_manifest_urls:
+ if files_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ m_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
+ note='Downloading %s m3u8 information' % cdn_name,
+ fatal=False))
+ elif files_type == 'dash':
+ if 'json=1' in m_url:
+ real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
+ if real_m_url:
+ m_url = real_m_url
mpd_formats = self._extract_mpd_formats(
m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
'Downloading %s MPD information' % cdn_name,
fatal=False)
- for f in mpd_formats:
- if f.get('vcodec') == 'none':
- f['preference'] = -50
- elif f.get('acodec') == 'none':
- f['preference'] = -40
formats.extend(mpd_formats)
+ live_archive = live_event.get('archive') or {}
+ live_archive_source_url = live_archive.get('source_url')
+ if live_archive_source_url and live_archive.get('status') == 'done':
+ formats.append({
+ 'format_id': 'live-archive-source',
+ 'url': live_archive_source_url,
+ 'preference': 1,
+ })
+
+ for f in formats:
+ if f.get('vcodec') == 'none':
+ f['preference'] = -50
+ elif f.get('acodec') == 'none':
+ f['preference'] = -40
+
subtitles = {}
text_tracks = config['request'].get('text_tracks')
if text_tracks:
for tt in text_tracks:
subtitles[tt['lang']] = [{
'ext': 'vtt',
- 'url': 'https://vimeo.com' + tt['url'],
+ 'url': urljoin('https://vimeo.com', tt['url']),
}]
+ thumbnails = []
+ if not is_live:
+ for key, thumb in video_data.get('thumbs', {}).items():
+ thumbnails.append({
+ 'id': key,
+ 'width': int_or_none(key),
+ 'url': thumb,
+ })
+ thumbnail = video_data.get('thumbnail')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ owner = video_data.get('owner') or {}
+ video_uploader_url = owner.get('url')
+
return {
- 'title': video_title,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
+ 'id': str_or_none(video_data.get('id')) or video_id,
+ 'title': self._live_title(video_title) if is_live else video_title,
+ 'uploader': owner.get('name'),
+ 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
'uploader_url': video_uploader_url,
- 'thumbnail': video_thumbnail,
- 'duration': video_duration,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(video_data.get('duration')),
'formats': formats,
'subtitles': subtitles,
+ 'is_live': is_live,
}
+ def _extract_original_format(self, url, video_id):
+ download_data = self._download_json(
+ url, video_id, fatal=False,
+ query={'action': 'load_download_config'},
+ headers={'X-Requested-With': 'XMLHttpRequest'})
+ if download_data:
+ source_file = download_data.get('source_file')
+ if isinstance(source_file, dict):
+ download_url = source_file.get('download_url')
+ if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
+ source_name = source_file.get('public_name', 'Original')
+ if self._is_valid_url(download_url, video_id, '%s video' % source_name):
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
+ return {
+ 'url': download_url,
+ 'ext': ext,
+ 'width': int_or_none(source_file.get('width')),
+ 'height': int_or_none(source_file.get('height')),
+ 'filesize': parse_filesize(source_file.get('size')),
+ 'format_id': source_name,
+ 'preference': 1,
+ }
+
class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
@@ -202,12 +262,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
(?:
(?:
www|
- (?P<player>player)
+ player
)
\.
)?
- vimeo(?P<pro>pro)?\.com/
- (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+ vimeo(?:pro)?\.com/
+ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)?
(?:
(?:
@@ -228,7 +288,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '56015672',
'ext': 'mp4',
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
- 'description': 'md5:509a9ad5c9bf97c60faee9203aca4479',
+ 'description': 'md5:2d3305bad981a06ff79f027f19865021',
'timestamp': 1355990239,
'upload_date': '20121220',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
@@ -237,6 +297,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 10,
'license': 'by-sa',
},
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
},
{
'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
@@ -249,8 +312,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'openstreetmapus',
'uploader': 'OpenStreetMap US',
'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
- 'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30',
+ 'description': 'md5:2c362968038d4499f4d79f88458590c1',
'duration': 1595,
+ 'upload_date': '20130610',
+ 'timestamp': 1370893156,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
},
},
{
@@ -267,6 +335,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 3610,
'description': None,
},
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
},
{
'url': 'http://vimeo.com/68375962',
@@ -285,6 +357,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
},
'params': {
+ 'format': 'best[protocol=https]',
'videopassword': 'youtube-dl',
},
},
@@ -385,10 +458,31 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': '10Ft Films',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
'uploader_id': 'tenfootfilms',
+ 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
+ 'upload_date': '20130830',
+ 'timestamp': 1377853339,
},
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl password protected test video',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
},
{
'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
@@ -416,6 +510,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'url': 'https://vimeo.com/160743502/abd0e13fb4',
'only_matching': True,
}
+ # https://gettingthingsdone.com/workflowmap/
+ # vimeo embed with check-password page protected by Referer header
]
@staticmethod
@@ -446,18 +542,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
urls = VimeoIE._extract_urls(url, webpage)
return urls[0] if urls else None
- def _verify_player_video_password(self, url, video_id):
+ def _verify_player_video_password(self, url, video_id, headers):
password = self._downloader.params.get('videopassword')
if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option')
- data = urlencode_postdata({'password': password})
- pass_url = url + '/check-password'
- password_request = sanitized_Request(pass_url, data)
- password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Referer', url)
- return self._download_json(
- password_request, video_id,
- 'Verifying the password', 'Wrong password')
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+ data = urlencode_postdata({
+ 'password': base64.b64encode(password.encode()),
+ })
+ headers = merge_dicts(headers, {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ checked = self._download_json(
+ url + '/check-password', video_id,
+ 'Verifying the password', data=data, headers=headers)
+ if checked is False:
+ raise ExtractorError('Wrong video password', expected=True)
+ return checked
def _real_initialize(self):
self._login()
@@ -474,24 +574,26 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
# Extract ID from URL
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
orig_url = url
- if mobj.group('pro') or mobj.group('player'):
+ is_pro = 'vimeopro.com/' in url
+ is_player = '://player.vimeo.com/video/' in url
+ if is_pro:
+ # some videos require portfolio_id to be present in player url
+ # https://github.com/ytdl-org/youtube-dl/issues/20070
+ url = self._extract_url(url, self._download_webpage(url, video_id))
+ if not url:
+ url = 'https://vimeo.com/' + video_id
+ elif is_player:
url = 'https://player.vimeo.com/video/' + video_id
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
- # Retrieve video webpage to extract further information
- request = sanitized_Request(url, headers=headers)
try:
- webpage, urlh = self._download_webpage_handle(request, video_id)
- redirect_url = compat_str(urlh.geturl())
- # Some URLs redirect to ondemand can't be extracted with
- # this extractor right away thus should be passed through
- # ondemand extractor (e.g. https://vimeo.com/73445910)
- if VimeoOndemandIE.suitable(redirect_url):
- return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
+ # Retrieve video webpage to extract further information
+ webpage, urlh = self._download_webpage_handle(
+ url, video_id, headers=headers)
+ redirect_url = urlh.geturl()
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read()
@@ -508,11 +610,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
# and latter we extract those that are Vimeo specific.
self.report_extraction(video_id)
- vimeo_config = self._search_regex(
- r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
- 'vimeo config', default=None)
+ vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
- seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
+ seed_status = vimeo_config.get('seed_status', {})
if seed_status.get('state') == 'failed':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, seed_status['title']),
@@ -520,6 +620,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
cc_license = None
timestamp = None
+ video_description = None
# Extract the config JSON
try:
@@ -530,18 +631,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
if not config_url:
# Sometimes new react-based page is served instead of old one that require
# different config URL extraction approach (see
- # https://github.com/rg3/youtube-dl/pull/7209)
- vimeo_clip_page_config = self._search_regex(
- r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
- 'vimeo clip page config')
- page_config = self._parse_json(vimeo_clip_page_config, video_id)
+ # https://github.com/ytdl-org/youtube-dl/pull/7209)
+ page_config = self._parse_json(self._search_regex(
+ r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+ webpage, 'page config'), video_id)
config_url = page_config['player']['config_url']
cc_license = page_config.get('cc_license')
timestamp = try_get(
page_config, lambda x: x['clip']['uploaded_on'],
compat_str)
- config_json = self._download_webpage(config_url, video_id)
- config = json.loads(config_json)
+ video_description = clean_html(dict_get(
+ page_config, ('description', 'description_html_escaped')))
+ config = self._download_json(config_url, video_id)
except RegexNotFoundError:
# For pro videos or player.vimeo.com urls
# We try to find out to which variable is assigned the config dic
@@ -551,6 +652,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
else:
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
+ config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
config = self._search_regex(config_re, webpage, 'info section',
flags=re.DOTALL)
config = json.loads(config)
@@ -569,7 +671,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
cause=e)
else:
if config.get('view') == 4:
- config = self._verify_player_video_password(redirect_url, video_id)
+ config = self._verify_player_video_password(redirect_url, video_id, headers)
vod = config.get('video', {}).get('vod', {})
@@ -594,14 +696,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
{'force_feature_id': True}), 'Vimeo')
# Extract video description
-
- video_description = self._html_search_regex(
- r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
- webpage, 'description', default=None)
+ if not video_description:
+ video_description = self._html_search_regex(
+ r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
+ webpage, 'description', default=None)
if not video_description:
video_description = self._html_search_meta(
'description', webpage, default=None)
- if not video_description and mobj.group('pro'):
+ if not video_description and is_pro:
orig_webpage = self._download_webpage(
orig_url, video_id,
note='Downloading webpage for description',
@@ -609,7 +711,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
if orig_webpage:
video_description = self._html_search_meta(
'description', orig_webpage, default=None)
- if not video_description and not mobj.group('player'):
+ if not video_description and not is_player:
self._downloader.report_warning('Cannot find video description')
# Extract upload date
@@ -629,29 +731,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
comment_count = None
formats = []
- download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
- 'X-Requested-With': 'XMLHttpRequest'})
- download_data = self._download_json(download_request, video_id, fatal=False)
- if download_data:
- source_file = download_data.get('source_file')
- if isinstance(source_file, dict):
- download_url = source_file.get('download_url')
- if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
- source_name = source_file.get('public_name', 'Original')
- if self._is_valid_url(download_url, video_id, '%s video' % source_name):
- ext = (try_get(
- source_file, lambda x: x['extension'],
- compat_str) or determine_ext(
- download_url, None) or 'mp4').lower()
- formats.append({
- 'url': download_url,
- 'ext': ext,
- 'width': int_or_none(source_file.get('width')),
- 'height': int_or_none(source_file.get('height')),
- 'filesize': parse_filesize(source_file.get('size')),
- 'format_id': source_name,
- 'preference': 1,
- })
+
+ source_format = self._extract_original_format(
+ 'https://vimeo.com/' + video_id, video_id)
+ if source_format:
+ formats.append(source_format)
info_dict_config = self._parse_config(config, video_id)
formats.extend(info_dict_config['formats'])
@@ -667,7 +751,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
info_dict = {
- 'id': video_id,
'formats': formats,
'timestamp': unified_timestamp(timestamp),
'description': video_description,
@@ -685,9 +768,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
return info_dict
-class VimeoOndemandIE(VimeoBaseInfoExtractor):
+class VimeoOndemandIE(VimeoIE):
IE_NAME = 'vimeo:ondemand'
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
_TESTS = [{
# ondemand video not available via https://vimeo.com/id
'url': 'https://vimeo.com/ondemand/20704',
@@ -699,24 +782,32 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
'uploader': 'גם סרטים',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
'uploader_id': 'gumfilms',
+ 'description': 'md5:4c027c965e439de4baab621e48b60791',
+ 'upload_date': '20140906',
+ 'timestamp': 1410032453,
},
'params': {
'format': 'best[protocol=https]',
},
+ 'expected_warnings': ['Unable to download JSON metadata'],
}, {
# requires Referer to be passed along with og:video:url
'url': 'https://vimeo.com/ondemand/36938/126682985',
'info_dict': {
- 'id': '126682985',
+ 'id': '126584684',
'ext': 'mp4',
'title': 'Rävlock, rätt läte på rätt plats',
'uploader': 'Lindroth & Norin',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847',
- 'uploader_id': 'user14430847',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
+ 'uploader_id': 'lindrothnorin',
+ 'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
+ 'upload_date': '20150502',
+ 'timestamp': 1430586422,
},
'params': {
'skip_download': True,
},
+ 'expected_warnings': ['Unable to download JSON metadata'],
}, {
'url': 'https://vimeo.com/ondemand/nazmaalik',
'only_matching': True,
@@ -728,16 +819,6 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor):
'only_matching': True,
}]
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- return self.url_result(
- # Some videos require Referer to be passed along with og:video:url
- # similarly to generic vimeo embeds (e.g.
- # https://vimeo.com/ondemand/36938/126682985).
- VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url),
- VimeoIE.ie_key())
-
class VimeoChannelIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:channel'
@@ -753,39 +834,14 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
},
'playlist_mincount': 25,
}]
+ _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
def _page_url(self, base_url, pagenum):
return '%s/videos/page:%d/' % (base_url, pagenum)
def _extract_list_title(self, webpage):
- return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
-
- def _login_list_password(self, page_url, list_id, webpage):
- login_form = self._search_regex(
- r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
- webpage, 'login form', default=None)
- if not login_form:
- return webpage
-
- password = self._downloader.params.get('videopassword')
- if password is None:
- raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
- fields = self._hidden_inputs(login_form)
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- fields['token'] = token
- fields['password'] = password
- post = urlencode_postdata(fields)
- password_path = self._search_regex(
- r'action="([^"]+)"', login_form, 'password URL')
- password_url = compat_urlparse.urljoin(page_url, password_path)
- password_request = sanitized_Request(password_url, post)
- password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
- self._set_vimeo_cookie('vuid', vuid)
- self._set_vimeo_cookie('xsrft', token)
-
- return self._download_webpage(
- password_request, list_id,
- 'Verifying the password', 'Wrong password')
+ return self._TITLE or self._html_search_regex(
+ self._TITLE_RE, webpage, 'list title', fatal=False)
def _title_and_entries(self, list_id, base_url):
for pagenum in itertools.count(1):
@@ -795,7 +851,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
'Downloading page %s' % pagenum)
if pagenum == 1:
- webpage = self._login_list_password(page_url, list_id, webpage)
yield self._extract_list_title(webpage)
# Try extracting href first since not all videos are available via
@@ -823,14 +878,13 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
return self.playlist_result(title_and_entries, list_id, list_title)
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
+ channel_id = self._match_id(url)
+ return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
class VimeoUserIE(VimeoChannelIE):
IE_NAME = 'vimeo:user'
- _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
+ _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
_TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
_TESTS = [{
'url': 'https://vimeo.com/nkistudio/videos',
@@ -840,16 +894,12 @@ class VimeoUserIE(VimeoChannelIE):
},
'playlist_mincount': 66,
}]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- return self._extract_videos(name, 'https://vimeo.com/%s' % name)
+ _BASE_URL_TEMPL = 'https://vimeo.com/%s'
-class VimeoAlbumIE(VimeoChannelIE):
+class VimeoAlbumIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:album'
- _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
+ _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'https://vimeo.com/album/2632481',
@@ -869,48 +919,87 @@ class VimeoAlbumIE(VimeoChannelIE):
'params': {
'videopassword': 'youtube-dl',
}
- }, {
- 'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
- 'only_matching': True,
- }, {
- # TODO: respect page number
- 'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
- 'only_matching': True,
}]
-
- def _page_url(self, base_url, pagenum):
- return '%s/page:%d/' % (base_url, pagenum)
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
+ api_page = page + 1
+ query = {
+ 'fields': 'link,uri',
+ 'page': api_page,
+ 'per_page': self._PAGE_SIZE,
+ }
+ if hashed_pass:
+ query['_hashed_pass'] = hashed_pass
+ videos = self._download_json(
+ 'https://api.vimeo.com/albums/%s/videos' % album_id,
+ album_id, 'Downloading page %d' % api_page, query=query, headers={
+ 'Authorization': 'jwt ' + authorizaion,
+ })['data']
+ for video in videos:
+ link = video.get('link')
+ if not link:
+ continue
+ uri = video.get('uri')
+ video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
+ yield self.url_result(link, VimeoIE.ie_key(), video_id)
def _real_extract(self, url):
album_id = self._match_id(url)
- return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
-
-
-class VimeoGroupsIE(VimeoAlbumIE):
+ webpage = self._download_webpage(url, album_id)
+ viewer = self._parse_json(self._search_regex(
+ r'bootstrap_data\s*=\s*({.+?})</script>',
+ webpage, 'bootstrap data'), album_id)['viewer']
+ jwt = viewer['jwt']
+ album = self._download_json(
+ 'https://api.vimeo.com/albums/' + album_id,
+ album_id, headers={'Authorization': 'jwt ' + jwt},
+ query={'fields': 'description,name,privacy'})
+ hashed_pass = None
+ if try_get(album, lambda x: x['privacy']['view']) == 'password':
+ password = self._downloader.params.get('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This album is protected by a password, use the --video-password option',
+ expected=True)
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ hashed_pass = self._download_json(
+ 'https://vimeo.com/showcase/%s/auth' % album_id,
+ album_id, 'Verifying the password', data=urlencode_postdata({
+ 'password': password,
+ 'token': viewer['xsrft'],
+ }), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['hashed_pass']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError('Wrong password', expected=True)
+ raise
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, album_id, album.get('name'), album.get('description'))
+
+
+class VimeoGroupsIE(VimeoChannelIE):
IE_NAME = 'vimeo:group'
- _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{
- 'url': 'https://vimeo.com/groups/rolexawards',
+ 'url': 'https://vimeo.com/groups/kattykay',
'info_dict': {
- 'id': 'rolexawards',
- 'title': 'Rolex Awards for Enterprise',
+ 'id': 'kattykay',
+ 'title': 'Katty Kay',
},
- 'playlist_mincount': 73,
+ 'playlist_mincount': 27,
}]
-
- def _extract_list_title(self, webpage):
- return self._og_search_title(webpage)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
+ _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
class VimeoReviewIE(VimeoBaseInfoExtractor):
IE_NAME = 'vimeo:review'
IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
+ _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
_TESTS = [{
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -920,7 +1009,9 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'title': "DICK HARDWICK 'Comedian'",
'uploader': 'Richard Hardwick',
'uploader_id': 'user21297594',
- }
+ 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
}, {
'note': 'video player needs Referer',
'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
@@ -933,7 +1024,8 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'duration': 2773,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader_id': 'user22258446',
- }
+ },
+ 'skip': 'video gone',
}, {
'note': 'Password protected',
'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
@@ -953,29 +1045,20 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
def _real_initialize(self):
self._login()
- def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
- webpage = self._download_webpage(webpage_url, video_id)
- config_url = self._html_search_regex(
- r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'config URL', default=None, group='url')
- if not config_url:
- data = self._parse_json(self._search_regex(
- r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
- default=NO_DEFAULT if video_password_verified else '{}'), video_id)
- config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
- if config_url is None:
- self._verify_video_password(webpage_url, video_id, webpage)
- config_url = self._get_config_url(
- webpage_url, video_id, video_password_verified=True)
- return config_url
-
def _real_extract(self, url):
- video_id = self._match_id(url)
- config_url = self._get_config_url(url, video_id)
+ page_url, video_id = re.match(self._VALID_URL, url).groups()
+ clip_data = self._download_json(
+ page_url.replace('/review/', '/review/data/'),
+ video_id)['clipData']
+ config_url = clip_data['configUrl']
config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
+ source_format = self._extract_original_format(
+ page_url + '/action', video_id)
+ if source_format:
+ info_dict['formats'].append(source_format)
self._vimeo_sort_formats(info_dict['formats'])
- info_dict['id'] = video_id
+ info_dict['description'] = clean_html(clip_data.get('description'))
return info_dict
@@ -1005,7 +1088,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
-class VimeoLikesIE(InfoExtractor):
+class VimeoLikesIE(VimeoChannelIE):
_VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
IE_NAME = 'vimeo:likes'
IE_DESC = 'Vimeo user likes'
@@ -1013,52 +1096,33 @@ class VimeoLikesIE(InfoExtractor):
'url': 'https://vimeo.com/user755559/likes/',
'playlist_mincount': 293,
'info_dict': {
- 'id': 'user755559_likes',
- 'description': 'See all the videos urza likes',
- 'title': 'Videos urza likes',
+ 'id': 'user755559',
+ 'title': 'urza’s Likes',
},
}, {
'url': 'https://vimeo.com/stormlapse/likes',
'only_matching': True,
}]
+ def _page_url(self, base_url, pagenum):
+ return '%s/page:%d/' % (base_url, pagenum)
+
def _real_extract(self, url):
user_id = self._match_id(url)
- webpage = self._download_webpage(url, user_id)
- page_count = self._int(
- self._search_regex(
- r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
- .*?</a></li>\s*<li\s+class="pagination_next">
- ''', webpage, 'page count', default=1),
- 'page count', fatal=True)
- PAGE_SIZE = 12
- title = self._html_search_regex(
- r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False)
- description = self._html_search_meta('description', webpage)
-
- def _get_page(idx):
- page_url = 'https://vimeo.com/%s/likes/page:%d/sort:date' % (
- user_id, idx + 1)
- webpage = self._download_webpage(
- page_url, user_id,
- note='Downloading page %d/%d' % (idx + 1, page_count))
- video_list = self._search_regex(
- r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>',
- webpage, 'video content')
- paths = re.findall(
- r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list)
- for path in paths:
- yield {
- '_type': 'url',
- 'url': compat_urlparse.urljoin(page_url, path),
- }
-
- pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+ return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
- return {
- '_type': 'playlist',
- 'id': '%s_likes' % user_id,
- 'title': title,
- 'description': description,
- 'entries': pl,
- }
+
+class VHXEmbedIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vhx:embed'
+ _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ config_url = self._parse_json(self._search_regex(
+ r'window\.OTTData\s*=\s*({.+})', webpage,
+ 'ott data'), video_id, js_to_json)['config_url']
+ config = self._download_json(config_url, video_id)
+ info = self._parse_config(config, video_id)
+ self._vimeo_sort_formats(info['formats'])
+ return info
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
index ef8b9bcb7..00ec006c4 100644
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -2,21 +2,18 @@
from __future__ import unicode_literals
import collections
+import functools
import re
-import sys
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
+from ..compat import compat_urlparse
from ..utils import (
clean_html,
ExtractorError,
get_element_by_class,
int_or_none,
+ OnDemandPagedList,
orderedSet,
- remove_start,
str_or_none,
str_to_int,
unescapeHTML,
@@ -25,6 +22,7 @@ from ..utils import (
urlencode_postdata,
)
from .dailymotion import DailymotionIE
+from .odnoklassniki import OdnoklassnikiIE
from .pladform import PladformIE
from .vimeo import VimeoIE
from .youtube import YoutubeIE
@@ -48,24 +46,9 @@ class VKBaseIE(InfoExtractor):
'pass': password.encode('cp1251'),
})
- # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
- # and expects the first one to be set rather than second (see
- # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201).
- # As of RFC6265 the newer one cookie should be set into cookie store
- # what actually happens.
- # We will workaround this VK issue by resetting the remixlhk cookie to
- # the first one manually.
- for header, cookies in url_handle.headers.items():
- if header.lower() != 'set-cookie':
- continue
- if sys.version_info[0] >= 3:
- cookies = cookies.encode('iso-8859-1')
- cookies = cookies.decode('utf-8')
- remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
- if remixlhk:
- value, domain = remixlhk.groups()
- self._set_cookie(domain, 'remixlhk', value)
- break
+ # vk serves two same remixlhk cookies in Set-Cookie header and expects
+ # first one to be actually set
+ self._apply_first_set_cookie_header(url_handle, 'remixlhk')
login_page = self._download_webpage(
'https://login.vk.com/?act=login', None,
@@ -79,6 +62,18 @@ class VKBaseIE(InfoExtractor):
def _real_initialize(self):
self._login()
+ def _download_payload(self, path, video_id, data, fatal=True):
+ data['al'] = 1
+ code, payload = self._download_json(
+ 'https://vk.com/%s.php' % path, video_id,
+ data=urlencode_postdata(data), fatal=fatal,
+ headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
+ if code == '3':
+ self.raise_login_required()
+ elif code == '8':
+ raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
+ return payload
+
class VKIE(VKBaseIE):
IE_NAME = 'vk'
@@ -103,7 +98,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
- 'id': '162222515',
+ 'id': '-77521_162222515',
'ext': 'mp4',
'title': 'ProtivoGunz - Хуёвая песня',
'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
@@ -115,9 +110,8 @@ class VKIE(VKBaseIE):
},
{
'url': 'http://vk.com/video205387401_165548505',
- 'md5': '6c0aeb2e90396ba97035b9cbde548700',
'info_dict': {
- 'id': '165548505',
+ 'id': '205387401_165548505',
'ext': 'mp4',
'title': 'No name',
'uploader': 'Tom Cruise',
@@ -129,18 +123,18 @@ class VKIE(VKBaseIE):
},
{
'note': 'Embedded video',
- 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
- 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
+ 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
+ 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
'info_dict': {
- 'id': '162925554',
+ 'id': '-77521_162222515',
'ext': 'mp4',
- 'uploader': 'Vladimir Gavrin',
- 'title': 'Lin Dan',
- 'duration': 101,
- 'upload_date': '20120730',
- 'view_count': int,
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'duration': 195,
+ 'upload_date': '20120212',
+ 'timestamp': 1329049880,
+ 'uploader_id': '-77521',
},
- 'skip': 'This video has been removed from public access.',
},
{
# VIDEO NOW REMOVED
@@ -149,7 +143,7 @@ class VKIE(VKBaseIE):
'md5': 'a590bcaf3d543576c9bd162812387666',
'note': 'Only available for registered users',
'info_dict': {
- 'id': '164049491',
+ 'id': '-8871596_164049491',
'ext': 'mp4',
'uploader': 'Триллеры',
'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
@@ -157,18 +151,19 @@ class VKIE(VKBaseIE):
'upload_date': '20121218',
'view_count': int,
},
- 'skip': 'Requires vk account credentials',
+ 'skip': 'Removed',
},
{
'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
- 'md5': '4d7a5ef8cf114dfa09577e57b2993202',
'info_dict': {
- 'id': '168067957',
+ 'id': '-43215063_168067957',
'ext': 'mp4',
- 'uploader': 'Киномания - лучшее из мира кино',
+ 'uploader': 'Bro Mazter',
'title': ' ',
'duration': 7291,
'upload_date': '20140328',
+ 'uploader_id': '223413403',
+ 'timestamp': 1396018030,
},
'skip': 'Requires vk account credentials',
},
@@ -177,21 +172,21 @@ class VKIE(VKBaseIE):
'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
'note': 'ivi.ru embed',
'info_dict': {
- 'id': '60690',
+ 'id': '-43215063_169084319',
'ext': 'mp4',
'title': 'Книга Илая',
'duration': 6771,
'upload_date': '20140626',
'view_count': int,
},
- 'skip': 'Only works from Russia',
+ 'skip': 'Removed',
},
{
# video (removed?) only available with list id
'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
'md5': '091287af5402239a1051c37ec7b92913',
'info_dict': {
- 'id': '171201961',
+ 'id': '30481095_171201961',
'ext': 'mp4',
'title': 'ТюменцевВВ_09.07.2015',
'uploader': 'Anton Ivanov',
@@ -206,10 +201,10 @@ class VKIE(VKBaseIE):
'url': 'https://vk.com/video276849682_170681728',
'info_dict': {
'id': 'V3K4mi0SYkc',
- 'ext': 'webm',
+ 'ext': 'mp4',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
- 'duration': 179,
+ 'duration': 178,
'upload_date': '20130116',
'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf',
@@ -223,8 +218,7 @@ class VKIE(VKBaseIE):
'id': 'k3lz2cmXyRuJQSjGHUv',
'ext': 'mp4',
'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
- # TODO: fix test by fixing dailymotion description extraction
- 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
+ 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
'uploader': 'AniLibria.Tv',
'upload_date': '20160914',
'uploader_id': 'x1p5vl5',
@@ -239,7 +233,7 @@ class VKIE(VKBaseIE):
'url': 'http://vk.com/video-110305615_171782105',
'md5': 'e13fcda136f99764872e739d13fac1d1',
'info_dict': {
- 'id': '171782105',
+ 'id': '-110305615_171782105',
'ext': 'mp4',
'title': 'S-Dance, репетиции к The way show',
'uploader': 'THE WAY SHOW | 17 апреля',
@@ -254,14 +248,20 @@ class VKIE(VKBaseIE):
{
# finished live stream, postlive_mp4
'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
- 'md5': '90d22d051fccbbe9becfccc615be6791',
'info_dict': {
- 'id': '456242764',
+ 'id': '-387766_456242764',
'ext': 'mp4',
- 'title': 'ИгроМир 2016 — день 1',
+ 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
'uploader': 'Игромания',
'duration': 5239,
- 'view_count': int,
+ # TODO: use act=show to extract view_count
+ # 'view_count': int,
+ 'upload_date': '20160929',
+ 'uploader_id': '-387766',
+ 'timestamp': 1475137527,
+ },
+ 'params': {
+ 'skip_download': True,
},
},
{
@@ -293,84 +293,105 @@ class VKIE(VKBaseIE):
# This video is no longer available, because its author has been blocked.
'url': 'https://vk.com/video-10639516_456240611',
'only_matching': True,
- }
- ]
+ },
+ {
+ # The video is not available in your region.
+ 'url': 'https://vk.com/video-51812607_171445436',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid')
+ mv_data = {}
if video_id:
- info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
+ data = {
+ 'act': 'show_inline',
+ 'video': video_id,
+ }
# Some videos (removed?) can only be downloaded with list id specified
list_id = mobj.group('list_id')
if list_id:
- info_url += '&list=%s' % list_id
+ data['list'] = list_id
+
+ payload = self._download_payload('al_video', video_id, data)
+ info_page = payload[1]
+ opts = payload[-1]
+ mv_data = opts.get('mvData') or {}
+ player = opts.get('player') or {}
else:
- info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query')
video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
- info_page = self._download_webpage(info_url, video_id)
+ info_page = self._download_webpage(
+ 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
- error_message = self._html_search_regex(
- [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
- r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
- info_page, 'error message', default=None)
- if error_message:
- raise ExtractorError(error_message, expected=True)
+ error_message = self._html_search_regex(
+ [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+ r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
+ info_page, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
- if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
- raise ExtractorError(
- 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
- expected=True)
+ if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
+ raise ExtractorError(
+ 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
+ expected=True)
- ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
+ ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
- ERRORS = {
- r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
- ERROR_COPYRIGHT,
+ ERRORS = {
+ r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
+ ERROR_COPYRIGHT,
- r'>The video .*? was removed from public access by request of the copyright holder.<':
- ERROR_COPYRIGHT,
+ r'>The video .*? was removed from public access by request of the copyright holder.<':
+ ERROR_COPYRIGHT,
- r'<!>Please log in or <':
- 'Video %s is only available for registered users, '
- 'use --username and --password options to provide account credentials.',
+ r'<!>Please log in or <':
+ 'Video %s is only available for registered users, '
+ 'use --username and --password options to provide account credentials.',
- r'<!>Unknown error':
- 'Video %s does not exist.',
+ r'<!>Unknown error':
+ 'Video %s does not exist.',
- r'<!>Видео временно недоступно':
- 'Video %s is temporarily unavailable.',
+ r'<!>Видео временно недоступно':
+ 'Video %s is temporarily unavailable.',
- r'<!>Access denied':
- 'Access denied to video %s.',
+ r'<!>Access denied':
+ 'Access denied to video %s.',
- r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
- 'Video %s is no longer available, because its author has been blocked.',
+ r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
+ 'Video %s is no longer available, because its author has been blocked.',
- r'<!>This video is no longer available, because its author has been blocked.':
- 'Video %s is no longer available, because its author has been blocked.',
+ r'<!>This video is no longer available, because its author has been blocked.':
+ 'Video %s is no longer available, because its author has been blocked.',
- r'<!>This video is no longer available, because it has been deleted.':
- 'Video %s is no longer available, because it has been deleted.',
- }
+ r'<!>This video is no longer available, because it has been deleted.':
+ 'Video %s is no longer available, because it has been deleted.',
+
+ r'<!>The video .+? is not available in your region.':
+ 'Video %s is not available in your region.',
+ }
+
+ for error_re, error_msg in ERRORS.items():
+ if re.search(error_re, info_page):
+ raise ExtractorError(error_msg % video_id, expected=True)
- for error_re, error_msg in ERRORS.items():
- if re.search(error_re, info_page):
- raise ExtractorError(error_msg % video_id, expected=True)
+ player = self._parse_json(self._search_regex(
+ r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
+ info_page, 'player params'), video_id)
youtube_url = YoutubeIE._extract_url(info_page)
if youtube_url:
- return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
vimeo_url = VimeoIE._extract_url(url, info_page)
if vimeo_url is not None:
- return self.url_result(vimeo_url)
+ return self.url_result(vimeo_url, VimeoIE.ie_key())
pladform_url = PladformIE._extract_url(info_page)
if pladform_url:
- return self.url_result(pladform_url)
+ return self.url_result(pladform_url, PladformIE.ie_key())
m_rutube = re.search(
r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
@@ -383,6 +404,10 @@ class VKIE(VKBaseIE):
if dailymotion_urls:
return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
+
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts:
m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
@@ -392,29 +417,7 @@ class VKIE(VKBaseIE):
opts_url = 'http:' + opts_url
return self.url_result(opts_url)
- # vars does not look to be served anymore since 24.10.2016
- data = self._parse_json(
- self._search_regex(
- r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
- video_id, fatal=False)
-
- # <!json> is served instead
- if not data:
- data = self._parse_json(
- self._search_regex(
- [r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
- info_page, 'json', default='{}'),
- video_id)
- if data:
- data = data['player']['params'][0]
-
- if not data:
- data = self._parse_json(
- self._search_regex(
- r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
- 'player params'),
- video_id)['params'][0]
-
+ data = player['params'][0]
title = unescapeHTML(data['md_title'])
# 2 = live
@@ -436,8 +439,8 @@ class VKIE(VKBaseIE):
format_url = url_or_none(format_url)
if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
continue
- if (format_id.startswith(('url', 'cache')) or
- format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
+ if (format_id.startswith(('url', 'cache'))
+ or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
height = int_or_none(self._search_regex(
r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
formats.append({
@@ -458,17 +461,17 @@ class VKIE(VKBaseIE):
self._sort_formats(formats)
return {
- 'id': compat_str(data.get('vid') or video_id),
+ 'id': video_id,
'formats': formats,
'title': title,
'thumbnail': data.get('jpg'),
'uploader': data.get('md_author'),
- 'uploader_id': str_or_none(data.get('author_id')),
- 'duration': data.get('duration'),
+ 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
+ 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
'timestamp': timestamp,
'view_count': view_count,
- 'like_count': int_or_none(data.get('liked')),
- 'dislike_count': int_or_none(data.get('nolikes')),
+ 'like_count': int_or_none(mv_data.get('likes')),
+ 'comment_count': int_or_none(mv_data.get('commcount')),
'is_live': is_live,
}
@@ -476,15 +479,23 @@ class VKIE(VKBaseIE):
class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos"
- _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
+ _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
_TEMPLATE_URL = 'https://vk.com/videos'
_TESTS = [{
- 'url': 'http://vk.com/videos205387401',
+ 'url': 'https://vk.com/videos-767561',
'info_dict': {
- 'id': '205387401',
- 'title': "Tom Cruise's Videos",
+ 'id': '-767561_all',
},
- 'playlist_mincount': 4,
+ 'playlist_mincount': 1150,
+ }, {
+ 'url': 'https://vk.com/videos-767561?section=uploaded',
+ 'info_dict': {
+ 'id': '-767561_uploaded',
+ },
+ 'playlist_mincount': 425,
+ }, {
+ 'url': 'http://vk.com/videos205387401',
+ 'only_matching': True,
}, {
'url': 'http://vk.com/videos-77521',
'only_matching': True,
@@ -498,22 +509,33 @@ class VKUserVideosIE(VKBaseIE):
'url': 'http://new.vk.com/videos205387401',
'only_matching': True,
}]
+ _PAGE_SIZE = 1000
+ _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
+
+ def _fetch_page(self, page_id, section, page):
+ l = self._download_payload('al_video', page_id, {
+ 'act': 'load_videos_silent',
+ 'offset': page * self._PAGE_SIZE,
+ 'oid': page_id,
+ 'section': section,
+ })[0][section]['list']
+
+ for video in l:
+ v = self._VIDEO._make(video[:2])
+ video_id = '%d_%d' % (v.owner_id, v.id)
+ yield self.url_result(
+ 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
def _real_extract(self, url):
- page_id = self._match_id(url)
+ page_id, section = re.match(self._VALID_URL, url).groups()
+ if not section:
+ section = 'all'
- webpage = self._download_webpage(url, page_id)
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, page_id, section),
+ self._PAGE_SIZE)
- entries = [
- self.url_result(
- 'http://vk.com/video' + video_id, 'VK', video_id=video_id)
- for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
-
- title = unescapeHTML(self._search_regex(
- r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
- webpage, 'title', default=page_id))
-
- return self.playlist_result(entries, page_id, title)
+ return self.playlist_result(entries, '%s_%s' % (page_id, section))
class VKWallPostIE(VKBaseIE):
@@ -523,15 +545,15 @@ class VKWallPostIE(VKBaseIE):
# public page URL, audio playlist
'url': 'https://vk.com/bs.official?w=wall-23538238_35',
'info_dict': {
- 'id': '23538238_35',
- 'title': 'Black Shadow - Wall post 23538238_35',
+ 'id': '-23538238_35',
+ 'title': 'Black Shadow - Wall post -23538238_35',
'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
},
'playlist': [{
'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
'info_dict': {
'id': '135220665_111806521',
- 'ext': 'mp3',
+ 'ext': 'mp4',
'title': 'Black Shadow - Слепое Верование',
'duration': 370,
'uploader': 'Black Shadow',
@@ -542,18 +564,16 @@ class VKWallPostIE(VKBaseIE):
'md5': '4cc7e804579122b17ea95af7834c9233',
'info_dict': {
'id': '135220665_111802303',
- 'ext': 'mp3',
+ 'ext': 'mp4',
'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
'duration': 423,
'uploader': 'Black Shadow',
'artist': 'Black Shadow',
'track': 'Война - Негасимое Бездны Пламя!',
},
- 'params': {
- 'skip_download': True,
- },
}],
'params': {
+ 'skip_download': True,
'usenetrc': True,
},
'skip': 'Requires vk account credentials',
@@ -562,7 +582,7 @@ class VKWallPostIE(VKBaseIE):
'url': 'https://vk.com/wall85155021_6319',
'info_dict': {
'id': '85155021_6319',
- 'title': 'Sergey Gorbunov - Wall post 85155021_6319',
+ 'title': 'Сергей Горбунов - Wall post 85155021_6319',
},
'playlist_count': 1,
'params': {
@@ -578,58 +598,72 @@ class VKWallPostIE(VKBaseIE):
'url': 'https://m.vk.com/wall-23538238_35',
'only_matching': True,
}]
+ _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
+ _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
+
+ def _decode(self, enc):
+ dec = ''
+ e = n = 0
+ for c in enc:
+ r = self._BASE64_CHARS.index(c)
+ cond = n % 4
+ e = 64 * e + r if cond else r
+ n += 1
+ if cond:
+ dec += chr(255 & e >> (-2 * n & 6))
+ return dec
+
+ def _unmask_url(self, mask_url, vk_id):
+ if 'audio_api_unavailable' in mask_url:
+ extra = mask_url.split('?extra=')[1].split('#')
+ func, base = self._decode(extra[1]).split(chr(11))
+ mask_url = list(self._decode(extra[0]))
+ url_len = len(mask_url)
+ indexes = [None] * url_len
+ index = int(base) ^ vk_id
+ for n in range(url_len - 1, -1, -1):
+ index = (url_len * (n + 1) ^ index + n) % url_len
+ indexes[n] = index
+ for n in range(1, url_len):
+ c = mask_url[n]
+ index = indexes[url_len - 1 - n]
+ mask_url[n] = mask_url[index]
+ mask_url[index] = c
+ mask_url = ''.join(mask_url)
+ return mask_url
def _real_extract(self, url):
post_id = self._match_id(url)
- wall_url = 'https://vk.com/wall%s' % post_id
-
- post_id = remove_start(post_id, '-')
-
- webpage = self._download_webpage(wall_url, post_id)
-
- error = self._html_search_regex(
- r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError('VK said: %s' % error, expected=True)
+ webpage = self._download_payload('wkview', post_id, {
+ 'act': 'show',
+ 'w': 'wall' + post_id,
+ })[1]
description = clean_html(get_element_by_class('wall_post_text', webpage))
uploader = clean_html(get_element_by_class('author', webpage))
- thumbnail = self._og_search_thumbnail(webpage)
entries = []
- audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage)
- if audio_ids:
- al_audio = self._download_webpage(
- 'https://vk.com/al_audio.php', post_id,
- note='Downloading audio info', fatal=False,
- data=urlencode_postdata({
- 'act': 'reload_audio',
- 'al': '1',
- 'ids': ','.join(audio_ids)
- }))
- if al_audio:
- Audio = collections.namedtuple(
- 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration'])
- audios = self._parse_json(
- self._search_regex(
- r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
- post_id, fatal=False, transform_source=unescapeHTML)
- if isinstance(audios, list):
- for audio in audios:
- a = Audio._make(audio[:6])
- entries.append({
- 'id': '%s_%s' % (a.user_id, a.id),
- 'url': a.url,
- 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
- 'thumbnail': thumbnail,
- 'duration': a.duration,
- 'uploader': uploader,
- 'artist': a.artist,
- 'track': a.track,
- })
+ for audio in re.findall(r'data-audio="([^"]+)', webpage):
+ audio = self._parse_json(unescapeHTML(audio), post_id)
+ a = self._AUDIO._make(audio[:16])
+ if not a.url:
+ continue
+ title = unescapeHTML(a.title)
+ performer = unescapeHTML(a.performer)
+ entries.append({
+ 'id': '%s_%s' % (a.owner_id, a.id),
+ 'url': self._unmask_url(a.url, a.ads['vk_id']),
+ 'title': '%s - %s' % (performer, title) if performer else title,
+ 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
+ 'duration': int_or_none(a.duration),
+ 'uploader': uploader,
+ 'artist': performer,
+ 'track': title,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ })
for video in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
index 0b5165fd0..f79531e6f 100644
--- a/youtube_dl/extractor/vlive.py
+++ b/youtube_dl/extractor/vlive.py
@@ -6,24 +6,21 @@ import time
import itertools
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_str,
-)
+from .naver import NaverBaseIE
+from ..compat import compat_str
from ..utils import (
- dict_get,
ExtractorError,
- float_or_none,
- int_or_none,
+ merge_dicts,
remove_start,
try_get,
urlencode_postdata,
)
-class VLiveIE(InfoExtractor):
+class VLiveIE(NaverBaseIE):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
+ _NETRC_MACHINE = 'vlive'
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
@@ -33,6 +30,7 @@ class VLiveIE(InfoExtractor):
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
+ 'uploader_id': 'muploader_a',
},
}, {
'url': 'http://www.vlive.tv/video/16937',
@@ -43,16 +41,60 @@ class VLiveIE(InfoExtractor):
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
+ 'uploader_id': 'muploader_j',
},
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.vlive.tv/video/129100',
+ 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
+ 'info_dict': {
+ 'id': '129100',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
+ 'creator': 'BTS+',
+ 'view_count': int,
+ 'subtitles': 'mincount:10',
+ },
+ 'skip': 'This video is only available for CH+ subscribers',
}]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ email, password = self._get_login_info()
+ if None in (email, password):
+ return
+
+ def is_logged_in():
+ login_info = self._download_json(
+ 'https://www.vlive.tv/auth/loginInfo', None,
+ note='Downloading login info',
+ headers={'Referer': 'https://www.vlive.tv/home'})
+ return try_get(
+ login_info, lambda x: x['message']['login'], bool) or False
+
+ LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+ self._request_webpage(
+ LOGIN_URL, None, note='Downloading login cookies')
+
+ self._download_webpage(
+ LOGIN_URL, None, note='Logging in',
+ data=urlencode_postdata({'email': email, 'pwd': password}),
+ headers={
+ 'Referer': LOGIN_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ if not is_logged_in():
+ raise ExtractorError('Unable to log in', expected=True)
+
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -77,10 +119,7 @@ class VLiveIE(InfoExtractor):
if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
return self._live(video_id, webpage)
elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
- if long_video_id and key:
- return self._replay(video_id, webpage, long_video_id, key)
- else:
- status = 'COMING_SOON'
+ return self._replay(video_id, webpage, long_video_id, key)
if status == 'LIVE_END':
raise ExtractorError('Uploading for replay. Please wait...',
@@ -91,13 +130,15 @@ class VLiveIE(InfoExtractor):
raise ExtractorError('We are sorry, '
'but the live broadcast has been canceled.',
expected=True)
+ elif status == 'ONLY_APP':
+ raise ExtractorError('Unsupported video type', expected=True)
else:
raise ExtractorError('Unknown status %s' % status)
def _get_common_fields(self, webpage):
title = self._og_search_title(webpage)
creator = self._html_search_regex(
- r'<div[^>]+class="info_area"[^>]*>\s*<a\s+[^>]*>([^<]+)',
+ r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
webpage, 'creator', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
return {
@@ -107,14 +148,7 @@ class VLiveIE(InfoExtractor):
}
def _live(self, video_id, webpage):
- init_page = self._download_webpage(
- 'https://www.vlive.tv/video/init/view',
- video_id, note='Downloading live webpage',
- data=urlencode_postdata({'videoSeq': video_id}),
- headers={
- 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
+ init_page = self._download_init_page(video_id)
live_params = self._search_regex(
r'"liveStreamInfo"\s*:\s*(".*"),',
@@ -140,45 +174,30 @@ class VLiveIE(InfoExtractor):
return info
def _replay(self, video_id, webpage, long_video_id, key):
- playinfo = self._download_json(
- 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
- % compat_urllib_parse_urlencode({
- 'videoId': long_video_id,
- 'key': key,
- 'ptc': 'http',
- 'doct': 'json', # document type (xml or json)
- 'cpt': 'vtt', # captions type (vtt or ttml)
- }), video_id)
-
- formats = [{
- 'url': vid['source'],
- 'format_id': vid.get('encodingOption', {}).get('name'),
- 'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
- 'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
- 'width': int_or_none(vid.get('encodingOption', {}).get('width')),
- 'height': int_or_none(vid.get('encodingOption', {}).get('height')),
- 'filesize': int_or_none(vid.get('size')),
- } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
- self._sort_formats(formats)
-
- view_count = int_or_none(playinfo.get('meta', {}).get('count'))
-
- subtitles = {}
- for caption in playinfo.get('captions', {}).get('list', []):
- lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
- if lang and caption.get('source'):
- subtitles[lang] = [{
- 'ext': 'vtt',
- 'url': caption['source']}]
-
- info = self._get_common_fields(webpage)
- info.update({
- 'id': video_id,
- 'formats': formats,
- 'view_count': view_count,
- 'subtitles': subtitles,
- })
- return info
+ if '' in (long_video_id, key):
+ init_page = self._download_init_page(video_id)
+ video_info = self._parse_json(self._search_regex(
+ (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
+ r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
+ video_id)
+ if video_info.get('status') == 'NEED_CHANNEL_PLUS':
+ self.raise_login_required(
+ 'This video is only available for CH+ subscribers')
+ long_video_id, key = video_info['vid'], video_info['inkey']
+
+ return merge_dicts(
+ self._get_common_fields(webpage),
+ self._extract_video_info(video_id, long_video_id, key))
+
+ def _download_init_page(self, video_id):
+ return self._download_webpage(
+ 'https://www.vlive.tv/video/init/view',
+ video_id, note='Downloading live webpage',
+ data=urlencode_postdata({'videoSeq': video_id}),
+ headers={
+ 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
class VLiveChannelIE(InfoExtractor):
@@ -239,7 +258,7 @@ class VLiveChannelIE(InfoExtractor):
# Large values of maxNumOfRows (~300 or above) may cause
# empty responses (see [1]), e.g. this happens for [2] that
# has more than 300 videos.
- # 1. https://github.com/rg3/youtube-dl/issues/13830
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
# 2. http://channels.vlive.tv/EDBF.
'maxNumOfRows': 100,
'_': int(time.time()),
@@ -275,26 +294,45 @@ class VLiveChannelIE(InfoExtractor):
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
- _TEST = {
+ _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
+ _TESTS = [{
+ # regular working playlist
+ 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
+ 'info_dict': {
+ 'id': '117963',
+ 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
+ },
+ 'playlist_mincount': 10
+ }, {
+ # playlist with no playlistVideoSeqs
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
- 'id': '22912',
- 'title': 'Valentine Day Message from TWICE'
+ 'id': '22867',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] Valentine Day Message from MINA',
+ 'creator': 'TWICE',
+ 'view_count': int
},
- 'playlist_mincount': 9
- }
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _build_video_result(self, video_id, message):
+ self.to_screen(message)
+ return self.url_result(
+ self._VIDEO_URL_TEMPLATE % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
- VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
if self._downloader.params.get('noplaylist'):
- self.to_screen(
- 'Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result(
- VIDEO_URL_TEMPLATE % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id)
+ return self._build_video_result(
+ video_id,
+ 'Downloading just video %s because of --no-playlist'
+ % video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
@@ -304,15 +342,21 @@ class VLivePlaylistIE(InfoExtractor):
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
- item_ids = self._parse_json(
- self._search_regex(
- r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
- 'playlist video seqs'),
- playlist_id)
+ raw_item_ids = self._search_regex(
+ r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
+ 'playlist video seqs', default=None, fatal=False)
+
+ if not raw_item_ids:
+ return self._build_video_result(
+ video_id,
+ 'Downloading just video %s because no playlist was found'
+ % video_id)
+
+ item_ids = self._parse_json(raw_item_ids, playlist_id)
entries = [
self.url_result(
- VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
+ self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py
index 239644340..74d2257e7 100644
--- a/youtube_dl/extractor/vodplatform.py
+++ b/youtube_dl/extractor/vodplatform.py
@@ -6,8 +6,8 @@ from ..utils import unescapeHTML
class VODPlatformIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
+ _TESTS = [{
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
'md5': '1db2b7249ce383d6be96499006e951fc',
@@ -16,7 +16,10 @@ class VODPlatformIE(InfoExtractor):
'ext': 'mp4',
'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
}
- }
+ }, {
+ 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py
index 59e1359c4..a52e40afa 100644
--- a/youtube_dl/extractor/voicerepublic.py
+++ b/youtube_dl/extractor/voicerepublic.py
@@ -1,17 +1,12 @@
from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
determine_ext,
int_or_none,
- sanitized_Request,
+ urljoin,
)
@@ -26,8 +21,7 @@ class VoiceRepublicIE(InfoExtractor):
'ext': 'm4a',
'title': 'Watching the Watchers: Building a Sousveillance State',
'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
- 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
- 'duration': 1800,
+ 'duration': 1556,
'view_count': int,
}
}, {
@@ -38,63 +32,31 @@ class VoiceRepublicIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- req = sanitized_Request(
- compat_urlparse.urljoin(url, '/talks/%s' % display_id))
- # Older versions of Firefox get redirected to an "upgrade browser" page
- req.add_header('User-Agent', 'youtube-dl')
- webpage = self._download_webpage(req, display_id)
+ webpage = self._download_webpage(url, display_id)
if '>Queued for processing, please stand by...<' in webpage:
raise ExtractorError(
'Audio is still queued for processing', expected=True)
- config = self._search_regex(
- r'(?s)return ({.+?});\s*\n', webpage,
- 'data', default=None)
- data = self._parse_json(config, display_id, fatal=False) if config else None
- if data:
- title = data['title']
- description = data.get('teaser')
- talk_id = compat_str(data.get('talk_id') or display_id)
- talk = data['talk']
- duration = int_or_none(talk.get('duration'))
- formats = [{
- 'url': compat_urlparse.urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in talk['links'].items()]
- else:
- title = self._og_search_title(webpage)
- description = self._html_search_regex(
- r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
- webpage, 'description', fatal=False)
- talk_id = self._search_regex(
- [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
- webpage, 'talk id', default=None) or display_id
- duration = None
- player = self._search_regex(
- r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
- formats = [{
- 'url': compat_urlparse.urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
+ talk = self._parse_json(self._search_regex(
+ r'initialSnapshot\s*=\s*({.+?});',
+ webpage, 'talk'), display_id)['talk']
+ title = talk['title']
+ formats = [{
+ 'url': urljoin(url, talk_url),
+ 'format_id': format_id,
+ 'ext': determine_ext(talk_url) or format_id,
+ 'vcodec': 'none',
+ } for format_id, talk_url in talk['media_links'].items()]
self._sort_formats(formats)
- thumbnail = self._og_search_thumbnail(webpage)
- view_count = int_or_none(self._search_regex(
- r"class='play-count[^']*'>\s*(\d+) plays",
- webpage, 'play count', fatal=False))
-
return {
- 'id': talk_id,
+ 'id': compat_str(talk.get('id') or display_id),
'display_id': display_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
+ 'description': talk.get('teaser'),
+ 'thumbnail': talk.get('image_url'),
+ 'duration': int_or_none(talk.get('archived_duration')),
+ 'view_count': int_or_none(talk.get('play_count')),
'formats': formats,
}
diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dl/extractor/voxmedia.py
index c7a0a88fe..b318e15d4 100644
--- a/youtube_dl/extractor/voxmedia.py
+++ b/youtube_dl/extractor/voxmedia.py
@@ -4,7 +4,10 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from .once import OnceIE
from ..compat import compat_urllib_parse_unquote
-from ..utils import ExtractorError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+)
class VoxMediaVolumeIE(OnceIE):
@@ -13,18 +16,43 @@ class VoxMediaVolumeIE(OnceIE):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_data = self._parse_json(self._search_regex(
- r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', webpage, 'video data'), video_id)
+
+ setup = self._parse_json(self._search_regex(
+ r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
+ video_data = setup.get('video') or {}
+ info = {
+ 'id': video_id,
+ 'title': video_data.get('title_short'),
+ 'description': video_data.get('description_long') or video_data.get('description_short'),
+ 'thumbnail': video_data.get('brightcove_thumbnail')
+ }
+ asset = setup.get('asset') or setup.get('params') or {}
+
+ formats = []
+ hls_url = asset.get('hls_url')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ mp4_url = asset.get('mp4_url')
+ if mp4_url:
+ tbr = self._search_regex(r'-(\d+)k\.', mp4_url, 'bitrate', default=None)
+ format_id = 'http'
+ if tbr:
+ format_id += '-' + tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': mp4_url,
+ 'tbr': int_or_none(tbr),
+ })
+ if formats:
+ self._sort_formats(formats)
+ info['formats'] = formats
+ return info
+
for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
provider_video_id = video_data.get('%s_id' % provider_video_type)
if not provider_video_id:
continue
- info = {
- 'id': video_id,
- 'title': video_data.get('title_short'),
- 'description': video_data.get('description_long') or video_data.get('description_short'),
- 'thumbnail': video_data.get('brightcove_thumbnail')
- }
if provider_video_type == 'brightcove':
info['formats'] = self._extract_once_formats(provider_video_id)
self._sort_formats(info['formats'])
@@ -39,46 +67,49 @@ class VoxMediaVolumeIE(OnceIE):
class VoxMediaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:(?:theverge|vox|sbnation|eater|polygon|curbed|racked|funnyordie)\.com|recode\.net)/(?:[^/]+/)*(?P<id>[^/?]+)'
_TESTS = [{
+ # Volume embed, Youtube
'url': 'http://www.theverge.com/2014/6/27/5849272/material-world-how-google-discovered-what-software-is-made-of',
'info_dict': {
- 'id': '11eXZobjrG8DCSTgrNjVinU-YmmdYjhe',
+ 'id': 'j4mLW6x17VM',
'ext': 'mp4',
- 'title': 'Google\'s new material design direction',
- 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
+ 'title': 'Material world: how Google discovered what software is made of',
+ 'description': 'md5:dfc17e7715e3b542d66e33a109861382',
+ 'upload_date': '20190710',
+ 'uploader_id': 'TheVerge',
+ 'uploader': 'The Verge',
},
- 'add_ie': ['Ooyala'],
+ 'add_ie': ['Youtube'],
}, {
- # data-ooyala-id
+ # Volume embed, Youtube
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
- 'md5': 'd744484ff127884cd2ba09e3fa604e4b',
+ 'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
'info_dict': {
- 'id': 'RkZXU4cTphOCPDMZg5oEounJyoFI0g-B',
+ 'id': 'Gy8Md3Eky38',
'ext': 'mp4',
'title': 'The Nexus 6: hands-on with Google\'s phablet',
- 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
+ 'description': 'md5:d9f0216e5fb932dd2033d6db37ac3f1d',
+ 'uploader_id': 'TheVerge',
+ 'upload_date': '20141021',
+ 'uploader': 'The Verge',
},
- 'add_ie': ['Ooyala'],
- 'skip': 'Video Not Found',
+ 'add_ie': ['Youtube'],
+ 'skip': 'similar to the previous test',
}, {
- # volume embed
+ # Volume embed, Youtube
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
'info_dict': {
- 'id': 'wydzk3dDpmRz7PQoXRsTIX6XTkPjYL0b',
+ 'id': 'YCjDnX-Xzhg',
'ext': 'mp4',
- 'title': 'The new frontier of LGBTQ civil rights, explained',
- 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
+ 'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
+ 'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
+ 'uploader_id': 'voxdotcom',
+ 'upload_date': '20150915',
+ 'uploader': 'Vox',
},
- 'add_ie': ['Ooyala'],
+ 'add_ie': ['Youtube'],
+ 'skip': 'similar to the previous test',
}, {
# youtube embed
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
@@ -93,6 +124,7 @@ class VoxMediaIE(InfoExtractor):
'uploader': 'Vox',
},
'add_ie': ['Youtube'],
+ 'skip': 'Page no longer contain videos',
}, {
# SBN.VideoLinkset.entryGroup multiple ooyala embeds
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@@ -118,10 +150,11 @@ class VoxMediaIE(InfoExtractor):
'description': 'md5:e02d56b026d51aa32c010676765a690d',
},
}],
+ 'skip': 'Page no longer contain videos',
}, {
# volume embed, Brightcove Once
'url': 'https://www.recode.net/2014/6/17/11628066/post-post-pc-ceo-the-full-code-conference-video-of-microsofts-satya',
- 'md5': '01571a896281f77dc06e084138987ea2',
+ 'md5': '2dbc77b8b0bff1894c2fce16eded637d',
'info_dict': {
'id': '1231c973d',
'ext': 'mp4',
diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py
deleted file mode 100644
index 858ac9e71..000000000
--- a/youtube_dl/extractor/vporn.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- parse_duration,
- str_to_int,
- urljoin,
-)
-
-
-class VpornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
- _TESTS = [
- {
- 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
- 'md5': 'facf37c1b86546fa0208058546842c55',
- 'info_dict': {
- 'id': '497944',
- 'display_id': 'violet-on-her-th-birthday',
- 'ext': 'mp4',
- 'title': 'Violet on her 19th birthday',
- 'description': 'Violet dances in front of the camera which is sure to get you horny.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'kileyGrope',
- 'categories': ['Masturbation', 'Teen'],
- 'duration': 393,
- 'age_limit': 18,
- 'view_count': int,
- },
- 'skip': 'video removed',
- },
- {
- 'url': 'http://www.vporn.com/female/hana-shower/523564/',
- 'md5': 'ced35a4656198a1664cf2cda1575a25f',
- 'info_dict': {
- 'id': '523564',
- 'display_id': 'hana-shower',
- 'ext': 'mp4',
- 'title': 'Hana Shower',
- 'description': 'Hana showers at the bathroom.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Hmmmmm',
- 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'],
- 'duration': 588,
- 'age_limit': 18,
- 'view_count': int,
- }
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!'
- if errmsg in webpage:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
-
- title = self._html_search_regex(
- r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
- description = self._html_search_regex(
- r'class="(?:descr|description_txt)">(.*?)</div>',
- webpage, 'description', fatal=False)
- thumbnail = urljoin('http://www.vporn.com', self._html_search_regex(
- r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description',
- default=None))
-
- uploader = self._html_search_regex(
- r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
- webpage, 'uploader', fatal=False)
-
- categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
-
- duration = parse_duration(self._search_regex(
- r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
- webpage, 'duration', fatal=False))
-
- view_count = str_to_int(self._search_regex(
- r'class="views">([\d,\.]+) [Vv]iews<',
- webpage, 'view count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r"'Comments \(([\d,\.]+)\)'",
- webpage, 'comment count', default=None))
-
- formats = []
-
- for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
- video_url = video[1]
- fmt = {
- 'url': video_url,
- 'format_id': video[0],
- }
- m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
- if m:
- fmt.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- 'vbr': int(m.group('vbr')),
- })
- formats.append(fmt)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'categories': categories,
- 'duration': duration,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'age_limit': 18,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py
index 444295d68..422025267 100644
--- a/youtube_dl/extractor/vrt.py
+++ b/youtube_dl/extractor/vrt.py
@@ -5,150 +5,83 @@ import re
from .common import InfoExtractor
from ..utils import (
+ extract_attributes,
float_or_none,
+ get_element_by_class,
+ strip_or_none,
+ unified_timestamp,
)
class VRTIE(InfoExtractor):
- IE_DESC = 'deredactie.be, sporza.be, cobra.be and cobra.canvas.be'
- _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P<id>[^/]+)/*'
- _TESTS = [
- # deredactie.be
- {
- 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
- 'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
- 'info_dict': {
- 'id': '2129880',
- 'ext': 'flv',
- 'title': 'Het journaal L - 25/10/14',
- 'description': None,
- 'timestamp': 1414271750.949,
- 'upload_date': '20141025',
- 'duration': 929,
- },
- 'skip': 'HTTP Error 404: Not Found',
+ IE_DESC = 'VRT NWS, Flanders News, Flandern Info and Sporza'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
+ 'md5': 'e1663accf5cf13f375f3cd0d10476669',
+ 'info_dict': {
+ 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
+ 'ext': 'mp4',
+ 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
+ 'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
+ 'timestamp': 1557924660,
+ 'upload_date': '20190515',
+ 'duration': 31.2,
},
- # sporza.be
- {
- 'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
- 'md5': '11f53088da9bf8e7cfc42456697953ff',
- 'info_dict': {
- 'id': '2124639',
- 'ext': 'flv',
- 'title': 'Bekijk Extra Time van 20 oktober',
- 'description': 'md5:83ac5415a4f1816c6a93f8138aef2426',
- 'timestamp': 1413835980.560,
- 'upload_date': '20141020',
- 'duration': 3238,
- },
- 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
+ 'md5': '910bba927566e9ab992278f647eb4b75',
+ 'info_dict': {
+ 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
+ 'ext': 'mp4',
+ 'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
+ 'timestamp': 1557923760,
+ 'upload_date': '20190515',
+ 'duration': 115.17,
},
- # cobra.be
- {
- 'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
- 'md5': '78a2b060a5083c4f055449a72477409d',
- 'info_dict': {
- 'id': '2126050',
- 'ext': 'flv',
- 'title': 'Bret Easton Ellis in Café Corsari',
- 'description': 'md5:f699986e823f32fd6036c1855a724ee9',
- 'timestamp': 1413967500.494,
- 'upload_date': '20141022',
- 'duration': 661,
- },
- 'skip': 'HTTP Error 404: Not Found',
- },
- {
- # YouTube video
- 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957',
- 'md5': 'b8b93da1df1cea6c8556255a796b7d61',
- 'info_dict': {
- 'id': 'Wji-BZ0oCwg',
- 'ext': 'mp4',
- 'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer',
- 'description': 'md5:8e468944dce15567a786a67f74262583',
- 'uploader': 'Star Wars',
- 'uploader_id': 'starwars',
- 'upload_date': '20160407',
- },
- 'add_ie': ['Youtube'],
- },
- {
- 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055',
- 'info_dict': {
- 'id': '2377055',
- 'ext': 'mp4',
- 'title': 'Cafe Derby',
- 'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.',
- 'upload_date': '20150626',
- 'timestamp': 1435305240.769,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }
- ]
+ }, {
+ 'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
+ 'only_matching': True,
+ }]
+ _CLIENT_MAP = {
+ 'vrt.be/vrtnws': 'vrtnieuws',
+ 'sporza.be': 'sporza',
+ }
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._search_regex(
- r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False)
-
- src = self._search_regex(
- r'data-video-src="([^"]+)"', webpage, 'video src', default=None)
-
- video_type = self._search_regex(
- r'data-video-type="([^"]+)"', webpage, 'video type', default=None)
-
- if video_type == 'YouTubeVideo':
- return self.url_result(src, 'Youtube')
-
- formats = []
-
- mobj = re.search(
- r'data-video-iphone-server="(?P<server>[^"]+)"\s+data-video-iphone-path="(?P<path>[^"]+)"',
- webpage)
- if mobj:
- formats.extend(self._extract_m3u8_formats(
- '%s/%s' % (mobj.group('server'), mobj.group('path')),
- video_id, 'mp4', m3u8_id='hls', fatal=False))
-
- if src:
- formats = self._extract_wowza_formats(src, video_id)
- if 'data-video-geoblocking="true"' not in webpage:
- for f in formats:
- if f['url'].startswith('rtsp://'):
- http_format = f.copy()
- http_format.update({
- 'url': f['url'].replace('rtsp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''),
- 'format_id': f['format_id'].replace('rtsp', 'http'),
- 'protocol': 'http',
- })
- formats.append(http_format)
-
- if not formats and 'data-video-geoblocking="true"' in webpage:
- self.raise_geo_restricted('This video is only available in Belgium')
-
- self._sort_formats(formats)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage, default=None)
- thumbnail = self._og_search_thumbnail(webpage)
- timestamp = float_or_none(self._search_regex(
- r'data-video-sitestat-pubdate="(\d+)"', webpage, 'timestamp', fatal=False), 1000)
- duration = float_or_none(self._search_regex(
- r'data-video-duration="(\d+)"', webpage, 'duration', fatal=False), 1000)
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ attrs = extract_attributes(self._search_regex(
+ r'(<[^>]+class="vrtvideo"[^>]*>)', webpage, 'vrt video'))
+
+ asset_id = attrs['data-videoid']
+ publication_id = attrs.get('data-publicationid')
+ if publication_id:
+ asset_id = publication_id + '$' + asset_id
+ client = attrs.get('data-client') or self._CLIENT_MAP[site]
+
+ title = strip_or_none(get_element_by_class(
+ 'vrt-title', webpage) or self._html_search_meta(
+ ['og:title', 'twitter:title', 'name'], webpage))
+ description = self._html_search_meta(
+ ['og:description', 'twitter:description', 'description'], webpage)
+ if description == '…':
+ description = None
+ timestamp = unified_timestamp(self._html_search_meta(
+ 'article:published_time', webpage))
return {
- 'id': video_id,
+ '_type': 'url_transparent',
+ 'id': asset_id,
+ 'display_id': display_id,
'title': title,
'description': description,
- 'thumbnail': thumbnail,
+ 'thumbnail': attrs.get('data-posterimage'),
'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
+ 'duration': float_or_none(attrs.get('data-duration'), 1000),
+ 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client, asset_id),
+ 'ie_key': 'Canvas',
}
diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py
index ac0819c7c..6e51469b0 100644
--- a/youtube_dl/extractor/vrv.py
+++ b/youtube_dl/extractor/vrv.py
@@ -11,10 +11,12 @@ import time
from .common import InfoExtractor
from ..compat import (
+ compat_HTTPError,
compat_urllib_parse_urlencode,
compat_urllib_parse,
)
from ..utils import (
+ ExtractorError,
float_or_none,
int_or_none,
)
@@ -24,50 +26,73 @@ class VRVBaseIE(InfoExtractor):
_API_DOMAIN = None
_API_PARAMS = {}
_CMS_SIGNING = {}
+ _TOKEN = None
+ _TOKEN_SECRET = ''
def _call_api(self, path, video_id, note, data=None):
+ # https://tools.ietf.org/html/rfc5849#section-3
base_url = self._API_DOMAIN + '/core/' + path
- encoded_query = compat_urllib_parse_urlencode({
- 'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
- 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
- 'oauth_signature_method': 'HMAC-SHA1',
- 'oauth_timestamp': int(time.time()),
- 'oauth_version': '1.0',
- })
+ query = [
+ ('oauth_consumer_key', self._API_PARAMS['oAuthKey']),
+ ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])),
+ ('oauth_signature_method', 'HMAC-SHA1'),
+ ('oauth_timestamp', int(time.time())),
+ ]
+ if self._TOKEN:
+ query.append(('oauth_token', self._TOKEN))
+ encoded_query = compat_urllib_parse_urlencode(query)
headers = self.geo_verification_headers()
if data:
data = json.dumps(data).encode()
headers['Content-Type'] = 'application/json'
- method = 'POST' if data else 'GET'
- base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
+ base_string = '&'.join([
+ 'POST' if data else 'GET',
+ compat_urllib_parse.quote(base_url, ''),
+ compat_urllib_parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
- (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
+ (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
- return self._download_json(
- '?'.join([base_url, encoded_query]), video_id,
- note='Downloading %s JSON metadata' % note, headers=headers, data=data)
+ try:
+ return self._download_json(
+ '?'.join([base_url, encoded_query]), video_id,
+ note='Downloading %s JSON metadata' % note, headers=headers, data=data)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True)
+ raise
def _call_cms(self, path, video_id, note):
if not self._CMS_SIGNING:
- self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
+ index = self._call_api('index', video_id, 'CMS Signing')
+ self._CMS_SIGNING = index.get('cms_signing') or {}
+ if not self._CMS_SIGNING:
+ for signing_policy in index.get('signing_policies', []):
+ signing_path = signing_policy.get('path')
+ if signing_path and signing_path.startswith('/cms/'):
+ name, value = signing_policy.get('name'), signing_policy.get('value')
+ if name and value:
+ self._CMS_SIGNING[name] = value
return self._download_json(
self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
- def _set_api_params(self, webpage, video_id):
- if not self._API_PARAMS:
- self._API_PARAMS = self._parse_json(self._search_regex(
- r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
- webpage, 'api config'), video_id)['cxApiParams']
- self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
-
def _get_cms_resource(self, resource_key, video_id):
return self._call_api(
'cms_resource', video_id, 'resource path', data={
'resource_key': resource_key,
})['__links__']['cms_resource']['href']
+ def _real_initialize(self):
+ webpage = self._download_webpage(
+ 'https://vrv.co/', None, headers=self.geo_verification_headers())
+ self._API_PARAMS = self._parse_json(self._search_regex(
+ [
+ r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)',
+ r'window\.__APP_CONFIG__\s*=\s*({.+})'
+ ], webpage, 'app config'), None)['cxApiParams']
+ self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
+
class VRVIE(VRVBaseIE):
IE_NAME = 'vrv'
@@ -85,28 +110,53 @@ class VRVIE(VRVBaseIE):
# m3u8 download
'skip_download': True,
},
+ }, {
+ # movie listing
+ 'url': 'https://vrv.co/watch/G6NQXZ1J6/Lily-CAT',
+ 'info_dict': {
+ 'id': 'G6NQXZ1J6',
+ 'title': 'Lily C.A.T',
+ 'description': 'md5:988b031e7809a6aeb60968be4af7db07',
+ },
+ 'playlist_count': 2,
}]
+ _NETRC_MACHINE = 'vrv'
+
+ def _real_initialize(self):
+ super(VRVIE, self)._real_initialize()
+
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ token_credentials = self._call_api(
+ 'authenticate/by:credentials', None, 'Token Credentials', data={
+ 'email': email,
+ 'password': password,
+ })
+ self._TOKEN = token_credentials['oauth_token']
+ self._TOKEN_SECRET = token_credentials['oauth_token_secret']
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
- if not url or stream_format not in ('hls', 'dash'):
+ if not url or stream_format not in ('hls', 'dash', 'adaptive_hls'):
return []
- assert audio_lang or hardsub_lang
stream_id_list = []
if audio_lang:
stream_id_list.append('audio-%s' % audio_lang)
if hardsub_lang:
stream_id_list.append('hardsub-%s' % hardsub_lang)
- stream_id = '-'.join(stream_id_list)
- format_id = '%s-%s' % (stream_format, stream_id)
- if stream_format == 'hls':
+ format_id = stream_format
+ if stream_id_list:
+ format_id += '-' + '-'.join(stream_id_list)
+ if 'hls' in stream_format:
adaptive_formats = self._extract_m3u8_formats(
url, video_id, 'mp4', m3u8_id=format_id,
- note='Downloading %s m3u8 information' % stream_id,
+ note='Downloading %s information' % format_id,
fatal=False)
elif stream_format == 'dash':
adaptive_formats = self._extract_mpd_formats(
url, video_id, mpd_id=format_id,
- note='Downloading %s MPD information' % stream_id,
+ note='Downloading %s information' % format_id,
fatal=False)
if audio_lang:
for f in adaptive_formats:
@@ -116,26 +166,34 @@ class VRVIE(VRVBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(
- url, video_id,
- headers=self.geo_verification_headers())
- media_resource = self._parse_json(self._search_regex(
- r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
- webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
-
- video_data = media_resource.get('json')
- if not video_data:
- self._set_api_params(webpage, video_id)
- episode_path = self._get_cms_resource(
- 'cms:/episodes/' + video_id, video_id)
- video_data = self._call_cms(episode_path, video_id, 'video')
+
+ object_data = self._call_cms(self._get_cms_resource(
+ 'cms:/objects/' + video_id, video_id), video_id, 'object')['items'][0]
+ resource_path = object_data['__links__']['resource']['href']
+ video_data = self._call_cms(resource_path, video_id, 'video')
title = video_data['title']
+ description = video_data.get('description')
- streams_json = media_resource.get('streams', {}).get('json', {})
- if not streams_json:
- self._set_api_params(webpage, video_id)
- streams_path = video_data['__links__']['streams']['href']
- streams_json = self._call_cms(streams_path, video_id, 'streams')
+ if video_data.get('__class__') == 'movie_listing':
+ items = self._call_cms(
+ video_data['__links__']['movie_listing/movies']['href'],
+ video_id, 'movie listing').get('items') or []
+ if len(items) != 1:
+ entries = []
+ for item in items:
+ item_id = item.get('id')
+ if not item_id:
+ continue
+ entries.append(self.url_result(
+ 'https://vrv.co/watch/' + item_id,
+ self.ie_key(), item_id, item.get('title')))
+ return self.playlist_result(entries, video_id, title, description)
+ video_data = items[0]
+
+ streams_path = video_data['__links__'].get('streams', {}).get('href')
+ if not streams_path:
+ self.raise_login_required()
+ streams_json = self._call_cms(streams_path, video_id, 'streams')
audio_locale = streams_json.get('audio_locale')
formats = []
@@ -148,14 +206,15 @@ class VRVIE(VRVBaseIE):
self._sort_formats(formats)
subtitles = {}
- for subtitle in streams_json.get('subtitles', {}).values():
- subtitle_url = subtitle.get('url')
- if not subtitle_url:
- continue
- subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
- 'url': subtitle_url,
- 'ext': subtitle.get('format', 'ass'),
- })
+ for k in ('captions', 'subtitles'):
+ for subtitle in streams_json.get(k, {}).values():
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
+ 'url': subtitle_url,
+ 'ext': subtitle.get('format', 'ass'),
+ })
thumbnails = []
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
@@ -174,7 +233,7 @@ class VRVIE(VRVBaseIE):
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
- 'description': video_data.get('description'),
+ 'description': description,
'duration': float_or_none(video_data.get('duration_ms'), 1000),
'uploader_id': video_data.get('channel_id'),
'series': video_data.get('series_title'),
@@ -200,11 +259,7 @@ class VRVSeriesIE(VRVBaseIE):
def _real_extract(self, url):
series_id = self._match_id(url)
- webpage = self._download_webpage(
- url, series_id,
- headers=self.geo_verification_headers())
- self._set_api_params(webpage, series_id)
seasons_path = self._get_cms_resource(
'cms:/seasons?series_id=' + series_id, series_id)
seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
diff --git a/youtube_dl/extractor/vshare.py b/youtube_dl/extractor/vshare.py
index e4ec77889..c631ac1fa 100644
--- a/youtube_dl/extractor/vshare.py
+++ b/youtube_dl/extractor/vshare.py
@@ -48,7 +48,7 @@ class VShareIE(InfoExtractor):
webpage = self._download_webpage(
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
- video_id)
+ video_id, headers={'Referer': url})
title = self._html_search_regex(
r'<title>([^<]+)</title>', webpage, 'title')
diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dl/extractor/vvvvid.py
index 3d0dc403b..6906cd2ab 100644
--- a/youtube_dl/extractor/vvvvid.py
+++ b/youtube_dl/extractor/vvvvid.py
@@ -12,7 +12,7 @@ from ..utils import (
class VVVVIDIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/#!(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
_TESTS = [{
# video_type == 'video/vvvvid'
'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py
index 6000671c3..b7d02fca3 100644
--- a/youtube_dl/extractor/vzaar.py
+++ b/youtube_dl/extractor/vzaar.py
@@ -32,6 +32,22 @@ class VzaarIE(InfoExtractor):
'ext': 'mp3',
'title': 'MP3',
},
+ }, {
+ # hlsAes = true
+ 'url': 'https://view.vzaar.com/11379930/player',
+ 'info_dict': {
+ 'id': '11379930',
+ 'ext': 'mp4',
+ 'title': 'Videoaula',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # with null videoTitle
+ 'url': 'https://view.vzaar.com/20313539/download',
+ 'only_matching': True,
}]
@staticmethod
@@ -45,7 +61,7 @@ class VzaarIE(InfoExtractor):
video_data = self._download_json(
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
- title = video_data['videoTitle']
+ title = video_data.get('videoTitle') or video_id
formats = []
@@ -54,6 +70,7 @@ class VzaarIE(InfoExtractor):
f = {
'url': source_url,
'format_id': 'http',
+ 'preference': 1,
}
if 'audio' in source_url:
f.update({
@@ -71,13 +88,17 @@ class VzaarIE(InfoExtractor):
video_guid = video_data.get('guid')
usp = video_data.get('usp')
- if isinstance(video_guid, compat_str) and isinstance(usp, dict):
- m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
- % (video_guid, video_id)) + '&'.join(
- '%s=%s' % (k, v) for k, v in usp.items())
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
+ if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
+ hls_aes = video_data.get('hlsAes')
+ qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
+ url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
+ m3u8_formats = self._extract_m3u8_formats(
+ url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ if hls_aes:
+ for f in m3u8_formats:
+ f['_decryption_key_url'] = url_templ % ('goose', '') + qs
+ formats.extend(m3u8_formats)
self._sort_formats(formats)
diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dl/extractor/wakanim.py
new file mode 100644
index 000000000..f9a2395d9
--- /dev/null
+++ b/youtube_dl/extractor/wakanim.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ merge_dicts,
+ urljoin,
+)
+
+
+class WakanimIE(InfoExtractor):
+ _VALID_URL = r'https://(?:www\.)?wakanim\.tv/[^/]+/v2/catalogue/episode/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/2997/the-asterisk-war-omu-staffel-1-episode-02-omu',
+ 'info_dict': {
+ 'id': '2997',
+ 'ext': 'mp4',
+ 'title': 'Episode 02',
+ 'description': 'md5:2927701ea2f7e901de8bfa8d39b2852d',
+ 'series': 'The Asterisk War (OmU.)',
+ 'season_number': 1,
+ 'episode': 'Episode 02',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # DRM Protected
+ 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ m3u8_url = urljoin(url, self._search_regex(
+ r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url',
+ group='url'))
+ # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
+ encryption = self._search_regex(
+ r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
+ m3u8_url, 'encryption', default=None)
+ if encryption and encryption in ('cenc', 'cbcs-aapl'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._search_regex(
+ (r'<h1[^>]+\bclass=["\']episode_h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<span[^>]+\bclass=["\']episode_title["\'][^>]*>(?P<title>[^<]+)'),
+ webpage, 'title', default=None, group='title')
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ })
diff --git a/youtube_dl/extractor/weibo.py b/youtube_dl/extractor/weibo.py
index 3cb4d71a6..621df5b54 100644
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dl/extractor/weibo.py
@@ -19,7 +19,7 @@ from ..utils import (
class WeiboIE(InfoExtractor):
- _VALID_URL = r'https?://weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?weibo\.com/[0-9]+/(?P<id>[a-zA-Z0-9]+)'
_TEST = {
'url': 'https://weibo.com/6275294458/Fp6RGfbff?type=comment',
'info_dict': {
diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py
deleted file mode 100644
index 3dab9145b..000000000
--- a/youtube_dl/extractor/wimp.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-
-
-class WimpIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)'
- _TESTS = [{
- 'url': 'http://www.wimp.com/maru-is-exhausted/',
- 'md5': 'ee21217ffd66d058e8b16be340b74883',
- 'info_dict': {
- 'id': 'maru-is-exhausted',
- 'ext': 'mp4',
- 'title': 'Maru is exhausted.',
- 'description': 'md5:57e099e857c0a4ea312542b684a869b8',
- }
- }, {
- 'url': 'http://www.wimp.com/clowncar/',
- 'md5': '5c31ad862a90dc5b1f023956faec13fe',
- 'info_dict': {
- 'id': 'cG4CEr2aiSg',
- 'ext': 'webm',
- 'title': 'Basset hound clown car...incredible!',
- 'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom',
- 'upload_date': '20140303',
- 'uploader': 'Gretchen Hoey',
- 'uploader_id': 'gretchenandjeff1',
- },
- 'add_ie': ['Youtube'],
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- youtube_id = self._search_regex(
- (r"videoId\s*:\s*[\"']([0-9A-Za-z_-]{11})[\"']",
- r'data-id=["\']([0-9A-Za-z_-]{11})'),
- webpage, 'video URL', default=None)
- if youtube_id:
- return {
- '_type': 'url',
- 'url': youtube_id,
- 'ie_key': YoutubeIE.ie_key(),
- }
-
- info_dict = self._extract_jwplayer_data(
- webpage, video_id, require_title=False)
-
- info_dict.update({
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'description': self._og_search_description(webpage),
- })
-
- return info_dict
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
index 2182d6fd4..77febd2eb 100644
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -12,9 +12,8 @@ from ..utils import (
class WistiaIE(InfoExtractor):
- _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
- _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
- _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
+ _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
+ _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
_TESTS = [{
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@@ -35,35 +34,43 @@ class WistiaIE(InfoExtractor):
# with hls video
'url': 'wistia:807fafadvk',
'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+ 'only_matching': True,
}]
+ # https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod
def _extract_url(webpage):
- match = re.search(
- r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
- if match:
- return unescapeHTML(match.group('url'))
+ urls = WistiaIE._extract_urls(webpage)
+ return urls[0] if urls else None
- match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
-
- match = re.search(
- r'''(?sx)
- <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
- <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
- ''', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ for match in re.finditer(
+ r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
+ urls.append(unescapeHTML(match.group('url')))
+ for match in re.finditer(
+ r'''(?sx)
+ <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
+ ''', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ return urls
def _real_extract(self, url):
video_id = self._match_id(url)
data_json = self._download_json(
- self._API_URL % video_id, video_id,
+ self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
# Some videos require this.
headers={
- 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
+ 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
})
if data_json.get('error'):
@@ -88,27 +95,61 @@ class WistiaIE(InfoExtractor):
'url': aurl,
'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')),
+ 'filesize': int_or_none(a.get('size')),
})
else:
aext = a.get('ext')
- is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
- formats.append({
- 'format_id': atype,
+ display_name = a.get('display_name')
+ format_id = atype
+ if atype and atype.endswith('_video') and display_name:
+ format_id = '%s-%s' % (atype[:-6], display_name)
+ f = {
+ 'format_id': format_id,
'url': aurl,
- 'tbr': int_or_none(a.get('bitrate')),
- 'vbr': int_or_none(a.get('opt_vbitrate')),
- 'width': int_or_none(a.get('width')),
- 'height': int_or_none(a.get('height')),
- 'filesize': int_or_none(a.get('size')),
- 'vcodec': a.get('codec'),
- 'container': a.get('container'),
- 'ext': 'mp4' if is_m3u8 else aext,
- 'protocol': 'm3u8' if is_m3u8 else None,
+ 'tbr': int_or_none(a.get('bitrate')) or None,
'preference': 1 if atype == 'original' else None,
- })
+ }
+ if display_name == 'Audio':
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'vcodec': a.get('codec'),
+ })
+ if a.get('container') == 'm3u8' or aext == 'm3u8':
+ ts_f = f.copy()
+ ts_f.update({
+ 'ext': 'ts',
+ 'format_id': f['format_id'].replace('hls-', 'ts-'),
+ 'url': f['url'].replace('.bin', '.ts'),
+ })
+ formats.append(ts_f)
+ f.update({
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ })
+ else:
+ f.update({
+ 'container': a.get('container'),
+ 'ext': aext,
+ 'filesize': int_or_none(a.get('size')),
+ })
+ formats.append(f)
self._sort_formats(formats)
+ subtitles = {}
+ for caption in data.get('captions', []):
+ language = caption.get('language')
+ if not language:
+ continue
+ subtitles[language] = [{
+ 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
+ }]
+
return {
'id': video_id,
'title': title,
@@ -117,4 +158,5 @@ class WistiaIE(InfoExtractor):
'thumbnails': thumbnails,
'duration': float_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('createdAt')),
+ 'subtitles': subtitles,
}
diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py
deleted file mode 100644
index 0f53f1bcb..000000000
--- a/youtube_dl/extractor/wrzuta.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- qualities,
- remove_start,
-)
-
-
-class WrzutaIE(InfoExtractor):
- IE_NAME = 'wrzuta.pl'
-
- _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)'
-
- _TESTS = [{
- 'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game',
- 'md5': '9e67e05bed7c03b82488d87233a9efe7',
- 'info_dict': {
- 'id': 'aq4hIZWrkBu',
- 'ext': 'mp4',
- 'title': 'Nike Football: The Last Game',
- 'duration': 307,
- 'uploader_id': 'laboratoriumdextera',
- 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd',
- },
- 'skip': 'Redirected to wrzuta.pl',
- }, {
- 'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus',
- 'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d',
- 'info_dict': {
- 'id': '01xBFabGXu6',
- 'ext': 'mp3',
- 'title': 'James Horner - Into The Na\'vi World [Bonus]',
- 'description': 'md5:30a70718b2cd9df3120fce4445b0263b',
- 'duration': 95,
- 'uploader_id': 'vexling',
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- typ = mobj.group('typ')
- uploader = mobj.group('uploader')
-
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- if urlh.geturl() == 'http://www.wrzuta.pl/':
- raise ExtractorError('Video removed', expected=True)
-
- quality = qualities(['SD', 'MQ', 'HQ', 'HD'])
-
- audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'}
-
- embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id)
-
- formats = []
- for media in embedpage['url']:
- fmt = media['type'].split('@')[0]
- if typ == 'audio':
- ext = audio_table.get(fmt, fmt)
- else:
- ext = fmt
-
- formats.append({
- 'format_id': '%s_%s' % (ext, media['quality'].lower()),
- 'url': media['url'],
- 'ext': ext,
- 'quality': quality(media['quality']),
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'formats': formats,
- 'duration': int_or_none(embedpage['duration']),
- 'uploader_id': uploader,
- 'description': self._og_search_description(webpage),
- 'age_limit': embedpage.get('minimalAge', 0),
- }
-
-
-class WrzutaPlaylistIE(InfoExtractor):
- """
- this class covers extraction of wrzuta playlist entries
- the extraction process bases on following steps:
- * collect information of playlist size
- * download all entries provided on
- the playlist webpage (the playlist is split
- on two pages: first directly reached from webpage
- second: downloaded on demand by ajax call and rendered
- using the ajax call response)
- * in case size of extracted entries not reached total number of entries
- use the ajax call to collect the remaining entries
- """
-
- IE_NAME = 'wrzuta.pl:playlist'
- _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/playlista/(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR/moja_muza',
- 'playlist_mincount': 14,
- 'info_dict': {
- 'id': '7XfO4vE84iR',
- 'title': 'Moja muza',
- },
- }, {
- 'url': 'http://heroesf70.wrzuta.pl/playlista/6Nj3wQHx756/lipiec_-_lato_2015_muzyka_swiata',
- 'playlist_mincount': 144,
- 'info_dict': {
- 'id': '6Nj3wQHx756',
- 'title': 'Lipiec - Lato 2015 Muzyka Świata',
- },
- }, {
- 'url': 'http://miromak71.wrzuta.pl/playlista/7XfO4vE84iR',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
- uploader = mobj.group('uploader')
-
- webpage = self._download_webpage(url, playlist_id)
-
- playlist_size = int_or_none(self._html_search_regex(
- (r'<div[^>]+class=["\']playlist-counter["\'][^>]*>\d+/(\d+)',
- r'<div[^>]+class=["\']all-counter["\'][^>]*>(.+?)</div>'),
- webpage, 'playlist size', default=None))
-
- playlist_title = remove_start(
- self._og_search_title(webpage), 'Playlista: ')
-
- entries = []
- if playlist_size:
- entries = [
- self.url_result(entry_url)
- for _, entry_url in re.findall(
- r'<a[^>]+href=(["\'])(http.+?)\1[^>]+class=["\']playlist-file-page',
- webpage)]
- if playlist_size > len(entries):
- playlist_content = self._download_json(
- 'http://%s.wrzuta.pl/xhr/get_playlist_offset/%s' % (uploader, playlist_id),
- playlist_id,
- 'Downloading playlist JSON',
- 'Unable to download playlist JSON')
- entries.extend([
- self.url_result(entry['filelink'])
- for entry in playlist_content.get('files', []) if entry.get('filelink')])
-
- return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py
new file mode 100644
index 000000000..bebc77bb5
--- /dev/null
+++ b/youtube_dl/extractor/wwe.py
@@ -0,0 +1,140 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ urljoin,
+)
+
+
+class WWEBaseIE(InfoExtractor):
+ _SUBTITLE_LANGS = {
+ 'English': 'en',
+ 'Deutsch': 'de',
+ }
+
+ def _extract_entry(self, data, url, video_id=None):
+ video_id = compat_str(video_id or data['nid'])
+ title = data['title']
+
+ formats = self._extract_m3u8_formats(
+ data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ description = data.get('description')
+ thumbnail = urljoin(url, data.get('image'))
+ series = data.get('show_name')
+ episode = data.get('episode_name')
+
+ subtitles = {}
+ tracks = data.get('tracks')
+ if isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ track_file = url_or_none(track.get('file'))
+ if not track_file:
+ continue
+ label = track.get('label')
+ lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': track_file,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'series': series,
+ 'episode': episode,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class WWEIE(WWEBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
+ 'md5': '92811c6a14bfc206f7a6a9c5d9140184',
+ 'info_dict': {
+ 'id': '40048199',
+ 'ext': 'mp4',
+ 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
+ 'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ landing = self._parse_json(
+ self._html_search_regex(
+ r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
+ webpage, 'drupal settings'),
+ display_id)['WWEVideoLanding']
+
+ data = landing['initialVideo']['playlist'][0]
+ video_id = landing.get('initialVideoId')
+
+ info = self._extract_entry(data, url, video_id)
+ info['display_id'] = display_id
+ return info
+
+
+class WWEPlaylistIE(WWEBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.wwe.com/shows/raw/2018-11-12',
+ 'info_dict': {
+ 'id': '2018-11-12',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
+ video = self._parse_json(
+ mobj.group('data'), display_id, transform_source=unescapeHTML,
+ fatal=False)
+ if not video:
+ continue
+ data = try_get(video, lambda x: x['playlist'][0], dict)
+ if not data:
+ continue
+ try:
+ entry = self._extract_entry(data, url)
+ except Exception:
+ continue
+ entry['extractor_key'] = WWEIE.ie_key()
+ entries.append(entry)
+
+ return self.playlist_result(entries, display_id)
diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py
index b38c7a7b3..48ef07ed1 100644
--- a/youtube_dl/extractor/xfileshare.py
+++ b/youtube_dl/extractor/xfileshare.py
@@ -4,37 +4,64 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_chr
from ..utils import (
decode_packed_codes,
determine_ext,
ExtractorError,
int_or_none,
- NO_DEFAULT,
+ js_to_json,
urlencode_postdata,
)
+# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
+def aa_decode(aa_code):
+ symbol_table = [
+ ('7', '((゚ー゚) + (o^_^o))'),
+ ('6', '((o^_^o) +(o^_^o))'),
+ ('5', '((゚ー゚) + (゚Θ゚))'),
+ ('2', '((o^_^o) - (゚Θ゚))'),
+ ('4', '(゚ー゚)'),
+ ('3', '(o^_^o)'),
+ ('1', '(゚Θ゚)'),
+ ('0', '(c^_^o)'),
+ ]
+ delim = '(゚Д゚)[゚ε゚]+'
+ ret = ''
+ for aa_char in aa_code.split(delim):
+ for val, pat in symbol_table:
+ aa_char = aa_char.replace(pat, val)
+ aa_char = aa_char.replace('+ ', '')
+ m = re.match(r'^\d+', aa_char)
+ if m:
+ ret += compat_chr(int(m.group(0), 8))
+ else:
+ m = re.match(r'^u([\da-f]+)', aa_char)
+ if m:
+ ret += compat_chr(int(m.group(1), 16))
+ return ret
+
+
class XFileShareIE(InfoExtractor):
_SITES = (
- (r'daclips\.(?:in|com)', 'DaClips'),
- (r'filehoot\.com', 'FileHoot'),
- (r'gorillavid\.(?:in|com)', 'GorillaVid'),
- (r'movpod\.in', 'MovPod'),
- (r'powerwatch\.pw', 'PowerWatch'),
- (r'rapidvideo\.ws', 'Rapidvideo.ws'),
+ (r'clipwatching\.com', 'ClipWatching'),
+ (r'gounlimited\.to', 'GoUnlimited'),
+ (r'govid\.me', 'GoVid'),
+ (r'holavid\.com', 'HolaVid'),
+ (r'streamty\.com', 'Streamty'),
(r'thevideobee\.to', 'TheVideoBee'),
- (r'vidto\.(?:me|se)', 'Vidto'),
- (r'streamin\.to', 'Streamin.To'),
- (r'xvidstage\.com', 'XVIDSTAGE'),
- (r'vidabc\.com', 'Vid ABC'),
+ (r'uqload\.com', 'Uqload'),
(r'vidbom\.com', 'VidBom'),
(r'vidlo\.us', 'vidlo'),
- (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
- (r'fastvideo\.me', 'FastVideo.me'),
+ (r'vidlocker\.xyz', 'VidLocker'),
+ (r'vidshare\.tv', 'VidShare'),
+ (r'vup\.to', 'VUp'),
+ (r'xvideosharing\.com', 'XVideoSharing'),
)
IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
- _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
% '|'.join(site for site in list(zip(*_SITES))[0]))
_FILE_NOT_FOUND_REGEXES = (
@@ -43,82 +70,14 @@ class XFileShareIE(InfoExtractor):
)
_TESTS = [{
- 'url': 'http://gorillavid.in/06y9juieqpmi',
- 'md5': '5ae4a3580620380619678ee4875893ba',
- 'info_dict': {
- 'id': '06y9juieqpmi',
- 'ext': 'mp4',
- 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
- 'thumbnail': r're:http://.*\.jpg',
- },
- }, {
- 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
- 'only_matching': True,
- }, {
- 'url': 'http://daclips.in/3rso4kdn6f9m',
- 'md5': '1ad8fd39bb976eeb66004d3a4895f106',
+ 'url': 'http://xvideosharing.com/fq65f94nd2ve',
+ 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
'info_dict': {
- 'id': '3rso4kdn6f9m',
+ 'id': 'fq65f94nd2ve',
'ext': 'mp4',
- 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
+ 'title': 'sample',
'thumbnail': r're:http://.*\.jpg',
- }
- }, {
- 'url': 'http://movpod.in/0wguyyxi1yca',
- 'only_matching': True,
- }, {
- 'url': 'http://filehoot.com/3ivfabn7573c.html',
- 'info_dict': {
- 'id': '3ivfabn7573c',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
- 'thumbnail': r're:http://.*\.jpg',
- },
- 'skip': 'Video removed',
- }, {
- 'url': 'http://vidto.me/ku5glz52nqe1.html',
- 'info_dict': {
- 'id': 'ku5glz52nqe1',
- 'ext': 'mp4',
- 'title': 'test'
- }
- }, {
- 'url': 'http://powerwatch.pw/duecjibvicbu',
- 'info_dict': {
- 'id': 'duecjibvicbu',
- 'ext': 'mp4',
- 'title': 'Big Buck Bunny trailer',
- },
- }, {
- 'url': 'http://xvidstage.com/e0qcnl03co6z',
- 'info_dict': {
- 'id': 'e0qcnl03co6z',
- 'ext': 'mp4',
- 'title': 'Chucky Prank 2015.mp4',
- },
- }, {
- # removed by administrator
- 'url': 'http://xvidstage.com/amfy7atlkx25',
- 'only_matching': True,
- }, {
- 'url': 'http://vidabc.com/i8ybqscrphfv',
- 'info_dict': {
- 'id': 'i8ybqscrphfv',
- 'ext': 'mp4',
- 'title': 're:Beauty and the Beast 2017',
- },
- 'params': {
- 'skip_download': True,
},
- }, {
- 'url': 'http://www.rapidvideo.cool/b667kprndr8w',
- 'only_matching': True,
- }, {
- 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
- 'only_matching': True,
- }, {
- 'url': 'http://vidto.se/1tx1pf6t12cg.html',
- 'only_matching': True,
}]
@staticmethod
@@ -131,10 +90,9 @@ class XFileShareIE(InfoExtractor):
webpage)]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ host, video_id = re.match(self._VALID_URL, url).groups()
- url = 'http://%s/%s' % (mobj.group('host'), video_id)
+ url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
webpage = self._download_webpage(url, video_id)
if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
@@ -142,7 +100,7 @@ class XFileShareIE(InfoExtractor):
fields = self._hidden_inputs(webpage)
- if fields['op'] == 'download1':
+ if fields.get('op') == 'download1':
countdown = int_or_none(self._search_regex(
r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
webpage, 'countdown', default=None))
@@ -160,13 +118,37 @@ class XFileShareIE(InfoExtractor):
(r'style="z-index: [0-9]+;">([^<]+)</span>',
r'<td nowrap>([^<]+)</td>',
r'h4-fine[^>]*>([^<]+)<',
- r'>Watch (.+) ',
+ r'>Watch (.+)[ <]',
r'<h2 class="video-page-head">([^<]+)</h2>',
- r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to
+ r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
+ r'title\s*:\s*"([^"]+)"'), # govid.me
webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or video_id).strip()
- def extract_formats(default=NO_DEFAULT):
+ for regex, func in (
+ (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
+ (r'(゚.+)', aa_decode)):
+ obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
+ if obf_code:
+ webpage = webpage.replace(obf_code, func(obf_code))
+
+ formats = []
+
+ jwplayer_data = self._search_regex(
+ [
+ r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
+ r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
+ ], webpage,
+ 'jwplayer data', default=None)
+ if jwplayer_data:
+ jwplayer_data = self._parse_json(
+ jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
+ if jwplayer_data:
+ formats = self._parse_jwplayer_data(
+ jwplayer_data, video_id, False,
+ m3u8_id='hls', mpd_id='dash')['formats']
+
+ if not formats:
urls = []
for regex in (
r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
@@ -177,6 +159,12 @@ class XFileShareIE(InfoExtractor):
video_url = mobj.group('url')
if video_url not in urls:
urls.append(video_url)
+
+ sources = self._search_regex(
+ r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
+ if sources:
+ urls.extend(self._parse_json(sources, video_id))
+
formats = []
for video_url in urls:
if determine_ext(video_url) == 'm3u8':
@@ -189,21 +177,13 @@ class XFileShareIE(InfoExtractor):
'url': video_url,
'format_id': 'sd',
})
- if not formats and default is not NO_DEFAULT:
- return default
- self._sort_formats(formats)
- return formats
-
- formats = extract_formats(default=None)
-
- if not formats:
- webpage = decode_packed_codes(self._search_regex(
- r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
- webpage, 'packed code'))
- formats = extract_formats()
+ self._sort_formats(formats)
thumbnail = self._search_regex(
- r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
+ [
+ r'<video[^>]+poster="([^"]+)"',
+ r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
+ ], webpage, 'thumbnail', default=None)
return {
'id': video_id,
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 68a48034e..0f7be6a7d 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
+import itertools
import re
from .common import InfoExtractor
@@ -8,6 +9,7 @@ from ..utils import (
clean_html,
determine_ext,
dict_get,
+ extract_attributes,
ExtractorError,
int_or_none,
parse_duration,
@@ -18,21 +20,21 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)'
_VALID_URL = r'''(?x)
https?://
- (?:.+?\.)?xhamster\.com/
+ (?:.+?\.)?%s/
(?:
movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
)
- '''
-
+ ''' % _DOMAINS
_TESTS = [{
- 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
- 'md5': '8281348b8d3c53d39fffb377d24eac4e',
+ 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'md5': '98b4687efb1ffd331c4197854dc09e8f',
'info_dict': {
'id': '1509445',
- 'display_id': 'femaleagent_shy_beauty_takes_the_bait',
+ 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait',
'timestamp': 1350194821,
@@ -40,13 +42,12 @@ class XHamsterIE(InfoExtractor):
'uploader': 'Ruseful2011',
'duration': 893,
'age_limit': 18,
- 'categories': ['Fake Hub', 'Amateur', 'MILFs', 'POV', 'Beauti', 'Beauties', 'Beautiful', 'Boss', 'Office', 'Oral', 'Reality', 'Sexy', 'Taking'],
},
}, {
- 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
'info_dict': {
'id': '2221348',
- 'display_id': 'britney_spears_sexy_booty',
+ 'display_id': 'britney-spears-sexy-booty',
'ext': 'mp4',
'title': 'Britney Spears Sexy Booty',
'timestamp': 1379123460,
@@ -54,13 +55,12 @@ class XHamsterIE(InfoExtractor):
'uploader': 'jojo747400',
'duration': 200,
'age_limit': 18,
- 'categories': ['Britney Spears', 'Celebrities', 'HD Videos', 'Sexy', 'Sexy Booty'],
},
'params': {
'skip_download': True,
},
}, {
- # empty seo
+ # empty seo, unavailable via new URL schema
'url': 'http://xhamster.com/movies/5667973/.html',
'info_dict': {
'id': '5667973',
@@ -71,7 +71,6 @@ class XHamsterIE(InfoExtractor):
'uploader': 'parejafree',
'duration': 72,
'age_limit': 18,
- 'categories': ['Amateur', 'Blowjobs'],
},
'params': {
'skip_download': True,
@@ -91,6 +90,21 @@ class XHamsterIE(InfoExtractor):
# new URL schema
'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -99,7 +113,7 @@ class XHamsterIE(InfoExtractor):
display_id = mobj.group('display_id') or mobj.group('display_id_2')
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
- webpage = self._download_webpage(desktop_url, video_id)
+ webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
error = self._html_search_regex(
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
@@ -147,6 +161,9 @@ class XHamsterIE(InfoExtractor):
'ext': determine_ext(format_url, 'mp4'),
'height': get_height(quality),
'filesize': filesize,
+ 'http_headers': {
+ 'Referer': urlh.geturl(),
+ },
})
self._sort_formats(formats)
@@ -282,7 +299,7 @@ class XHamsterIE(InfoExtractor):
class XHamsterEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?xhamster\.com/xembed\.php\?video=(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
_TEST = {
'url': 'http://xhamster.com/xembed.php?video=3328539',
'info_dict': {
@@ -319,3 +336,49 @@ class XHamsterEmbedIE(InfoExtractor):
video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
return self.url_result(video_url, 'XHamster')
+
+
+class XHamsterUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
+ _TESTS = [{
+ # Paginated user profile
+ 'url': 'https://xhamster.com/users/netvideogirls/videos',
+ 'info_dict': {
+ 'id': 'netvideogirls',
+ },
+ 'playlist_mincount': 267,
+ }, {
+ # Non-paginated user profile
+ 'url': 'https://xhamster.com/users/firatkaan/videos',
+ 'info_dict': {
+ 'id': 'firatkaan',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ def _entries(self, user_id):
+ next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
+ for pagenum in itertools.count(1):
+ page = self._download_webpage(
+ next_page_url, user_id, 'Downloading page %s' % pagenum)
+ for video_tag in re.findall(
+ r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
+ page):
+ video = extract_attributes(video_tag)
+ video_url = url_or_none(video.get('href'))
+ if not video_url or not XHamsterIE.suitable(video_url):
+ continue
+ video_id = XHamsterIE._match_id(video_url)
+ yield self.url_result(
+ video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
+ mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
+ if not mobj:
+ break
+ next_page = extract_attributes(mobj.group(0))
+ next_page_url = url_or_none(next_page.get('href'))
+ if not next_page_url:
+ break
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self.playlist_result(self._entries(user_id), user_id)
diff --git a/youtube_dl/extractor/xiami.py b/youtube_dl/extractor/xiami.py
index 8333fb534..618da8382 100644
--- a/youtube_dl/extractor/xiami.py
+++ b/youtube_dl/extractor/xiami.py
@@ -7,7 +7,7 @@ from ..utils import int_or_none
class XiamiBaseIE(InfoExtractor):
- _API_BASE_URL = 'http://www.xiami.com/song/playlist/cat/json/id'
+ _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id'
def _download_webpage_handle(self, *args, **kwargs):
webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs)
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
index c6c0b3291..01b253dcb 100644
--- a/youtube_dl/extractor/xtube.py
+++ b/youtube_dl/extractor/xtube.py
@@ -47,7 +47,7 @@ class XTubeIE(InfoExtractor):
'display_id': 'A-Super-Run-Part-1-YT',
'ext': 'flv',
'title': 'A Super Run - Part 1 (YT)',
- 'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
+ 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
'uploader': 'tshirtguy59',
'duration': 579,
'view_count': int,
@@ -87,10 +87,24 @@ class XTubeIE(InfoExtractor):
'Cookie': 'age_verified=1; cookiesAccepted=1',
})
- sources = self._parse_json(self._search_regex(
- r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
- webpage, 'sources', group='sources'), video_id,
- transform_source=js_to_json)
+ title, thumbnail, duration = [None] * 3
+
+ config = self._parse_json(self._search_regex(
+ r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
+ default='{}'), video_id, transform_source=js_to_json, fatal=False)
+ if config:
+ config = config.get('mainRoll')
+ if isinstance(config, dict):
+ title = config.get('title')
+ thumbnail = config.get('poster')
+ duration = int_or_none(config.get('duration'))
+ sources = config.get('sources') or config.get('format')
+
+ if not isinstance(sources, dict):
+ sources = self._parse_json(self._search_regex(
+ r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
+ webpage, 'sources', group='sources'), video_id,
+ transform_source=js_to_json)
formats = []
for format_id, format_url in sources.items():
@@ -102,20 +116,25 @@ class XTubeIE(InfoExtractor):
self._remove_duplicate_formats(formats)
self._sort_formats(formats)
- title = self._search_regex(
- (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
- webpage, 'title', group='title')
- description = self._search_regex(
+ if not title:
+ title = self._search_regex(
+ (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
+ webpage, 'title', group='title')
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:description', webpage, default=None) or self._search_regex(
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
uploader = self._search_regex(
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
webpage, 'uploader', fatal=False)
- duration = parse_duration(self._search_regex(
- r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
- webpage, 'duration', fatal=False))
+ if not duration:
+ duration = parse_duration(self._search_regex(
+ r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
+ webpage, 'duration', fatal=False))
view_count = str_to_int(self._search_regex(
- r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
+ (r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
+ r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'>Comments? \(([\d,\.]+)\)<',
@@ -126,6 +145,7 @@ class XTubeIE(InfoExtractor):
'display_id': display_id,
'title': title,
'description': description,
+ 'thumbnail': thumbnail,
'uploader': uploader,
'duration': duration,
'view_count': view_count,
@@ -144,7 +164,7 @@ class XTubeUserIE(InfoExtractor):
'id': 'greenshowers-4056496',
'age_limit': 18,
},
- 'playlist_mincount': 155,
+ 'playlist_mincount': 154,
}
def _real_extract(self, url):
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py
index efee95651..8fc64914c 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dl/extractor/xvideos.py
@@ -17,7 +17,8 @@ class XVideosIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:www\.)?xvideos\.com/video|
+ (?:[^/]+\.)?xvideos2?\.com/video|
+ (?:www\.)?xvideos\.es/video|
flashservice\.xvideos\.com/embedframe/|
static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
)
@@ -39,13 +40,49 @@ class XVideosIE(InfoExtractor):
}, {
'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
'only_matching': True,
+ }, {
+ 'url': 'http://xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
+ }, {
+ 'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
+ 'only_matching': True
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
- 'http://www.xvideos.com/video%s/' % video_id, video_id)
+ 'https://www.xvideos.com/video%s/' % video_id, video_id)
mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
if mobj:
@@ -57,10 +94,17 @@ class XVideosIE(InfoExtractor):
webpage, 'title', default=None,
group='title') or self._og_search_title(webpage)
- thumbnail = self._search_regex(
- (r'setThumbUrl\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1',
- r'url_bigthumb=(?P<thumbnail>.+?)&amp'),
- webpage, 'thumbnail', fatal=False, group='thumbnail')
+ thumbnails = []
+ for preference, thumbnail in enumerate(('', '169')):
+ thumbnail_url = self._search_regex(
+ r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail,
+ webpage, 'thumbnail', default=None, group='thumbnail')
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
+ })
+
duration = int_or_none(self._og_search_property(
'duration', webpage, default=None)) or parse_duration(
self._search_regex(
@@ -98,6 +142,6 @@ class XVideosIE(InfoExtractor):
'formats': formats,
'title': title,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'age_limit': 18,
}
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 552013a74..e4615376c 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -1,451 +1,316 @@
# coding: utf-8
from __future__ import unicode_literals
+import hashlib
import itertools
-import json
import re
from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
+ compat_str,
compat_urllib_parse,
- compat_urlparse,
)
from ..utils import (
clean_html,
- determine_ext,
ExtractorError,
- extract_attributes,
int_or_none,
mimetype2ext,
+ parse_iso8601,
smuggle_url,
- unescapeHTML,
+ try_get,
+ url_or_none,
)
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .nbc import NBCSportsVPlayerIE
+from .brightcove import BrightcoveNewIE
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
- _VALID_URL = r'(?P<host>https?://(?:(?P<country>[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?'
- _TESTS = [
- {
- 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
- 'info_dict': {
- 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
- 'ext': 'mp4',
- 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
- 'description': 'Julian and Travis watch Julian Smith',
- 'duration': 6863,
- },
+ _VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
+ _TESTS = [{
+ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+ 'info_dict': {
+ 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
+ 'ext': 'mp4',
+ 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
+ 'description': 'Julian and Travis watch Julian Smith',
+ 'duration': 6863,
+ 'timestamp': 1369812016,
+ 'upload_date': '20130529',
},
- {
- 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
- 'md5': '251af144a19ebc4a033e8ba91ac726bb',
- 'info_dict': {
- 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
- 'ext': 'mp4',
- 'title': 'Codefellas - The Cougar Lies with Spanish Moss',
- 'description': 'md5:66b627ab0a282b26352136ca96ce73c1',
- 'duration': 151,
- },
- 'skip': 'HTTP Error 404',
+ }, {
+ 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
+ 'md5': '7993e572fac98e044588d0b5260f4352',
+ 'info_dict': {
+ 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
+ 'ext': 'mp4',
+ 'title': "Yahoo Saves 'Community'",
+ 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
+ 'duration': 170,
+ 'timestamp': 1406838636,
+ 'upload_date': '20140731',
},
- {
- 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
- 'md5': '7993e572fac98e044588d0b5260f4352',
- 'info_dict': {
- 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
- 'ext': 'mp4',
- 'title': "Yahoo Saves 'Community'",
- 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
- 'duration': 170,
- }
+ }, {
+ 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
+ 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
+ 'info_dict': {
+ 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
+ 'ext': 'webm',
+ 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
+ 'description': 'md5:f66c890e1490f4910a9953c941dee944',
+ 'duration': 97,
+ 'timestamp': 1414489862,
+ 'upload_date': '20141028',
+ }
+ }, {
+ 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
+ 'md5': '88e209b417f173d86186bef6e4d1f160',
+ 'info_dict': {
+ 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
+ 'ext': 'mp4',
+ 'title': 'China Moses Is Crazy About the Blues',
+ 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
+ 'duration': 128,
+ 'timestamp': 1385722202,
+ 'upload_date': '20131129',
+ }
+ }, {
+ 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
+ 'md5': '2a9752f74cb898af5d1083ea9f661b58',
+ 'info_dict': {
+ 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+ 'ext': 'mp4',
+ 'title': '\'True Story\' Trailer',
+ 'description': 'True Story',
+ 'duration': 150,
+ 'timestamp': 1418919206,
+ 'upload_date': '20141218',
},
- {
- 'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
- 'md5': '45c024bad51e63e9b6f6fad7a43a8c23',
- 'info_dict': {
- 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
- 'ext': 'mp4',
- 'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
- 'description': '直言台南沒捷運 交通居五都之末',
- 'duration': 396,
- },
+ }, {
+ 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
+ 'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ 'timestamp': 1426270238,
},
- {
- 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
- 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
- 'info_dict': {
- 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
- 'ext': 'webm',
- 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
- 'description': 'md5:f66c890e1490f4910a9953c941dee944',
- 'duration': 97,
- }
+ }, {
+ 'url': 'https://tw.news.yahoo.com/-100120367.html',
+ 'only_matching': True,
+ }, {
+ # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
+ 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
+ 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+ 'info_dict': {
+ 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
+ 'ext': 'mp4',
+ 'title': 'Communitary - Community Episode 1: Ladders',
+ 'description': 'md5:8fc39608213295748e1e289807838c97',
+ 'duration': 1646,
+ 'timestamp': 1440436550,
+ 'upload_date': '20150824',
+ 'series': 'Communitary',
+ 'season_number': 6,
+ 'episode_number': 1,
},
- {
- 'url': 'https://ca.sports.yahoo.com/video/program-makes-hockey-more-affordable-013127711.html',
- 'md5': '57e06440778b1828a6079d2f744212c4',
- 'info_dict': {
- 'id': 'c9fa2a36-0d4d-3937-b8f6-cc0fb1881e73',
- 'ext': 'mp4',
- 'title': 'Program that makes hockey more affordable not offered in Manitoba',
- 'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4',
- 'duration': 121,
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
- 'info_dict': {
- 'id': '154609075',
- },
- 'playlist': [{
- 'md5': '000887d0dc609bc3a47c974151a40fb8',
- 'info_dict': {
- 'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
- 'ext': 'mp4',
- 'title': '\'The Interview\' TV Spot: War',
- 'description': 'The Interview',
- 'duration': 30,
- },
- }, {
- 'md5': '81bc74faf10750fe36e4542f9a184c66',
- 'info_dict': {
- 'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
- 'ext': 'mp4',
- 'title': '\'The Interview\' TV Spot: Guys',
- 'description': 'The Interview',
- 'duration': 30,
- },
- }],
- }, {
- 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
- 'md5': '88e209b417f173d86186bef6e4d1f160',
- 'info_dict': {
- 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
- 'ext': 'mp4',
- 'title': 'China Moses Is Crazy About the Blues',
- 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
- 'duration': 128,
- }
- }, {
- 'url': 'https://in.lifestyle.yahoo.com/video/connect-dots-dark-side-virgo-090247395.html',
- 'md5': 'd9a083ccf1379127bf25699d67e4791b',
- 'info_dict': {
- 'id': '52aeeaa3-b3d1-30d8-9ef8-5d0cf05efb7c',
- 'ext': 'mp4',
- 'title': 'Connect the Dots: Dark Side of Virgo',
- 'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
- 'duration': 201,
- },
- 'skip': 'Domain name in.lifestyle.yahoo.com gone',
- }, {
- 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
- 'md5': '989396ae73d20c6f057746fb226aa215',
- 'info_dict': {
- 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
- 'ext': 'mp4',
- 'title': '\'True Story\' Trailer',
- 'description': 'True Story',
- 'duration': 150,
- },
- }, {
- 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
- 'only_matching': True,
- }, {
- 'note': 'NBC Sports embeds',
- 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
- 'info_dict': {
- 'id': '9CsDKds0kvHI',
- 'ext': 'flv',
- 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
- 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
- 'upload_date': '20150313',
- 'uploader': 'NBCU-SPORTS',
- 'timestamp': 1426270238,
- }
- }, {
- 'url': 'https://tw.news.yahoo.com/-100120367.html',
- 'only_matching': True,
- }, {
- # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
- 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
- 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
- 'info_dict': {
- 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
- 'ext': 'mp4',
- 'title': 'Communitary - Community Episode 1: Ladders',
- 'description': 'md5:8fc39608213295748e1e289807838c97',
- 'duration': 1646,
- },
- }, {
- # it uses an alias to get the video_id
- 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html',
- 'info_dict': {
- 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737',
- 'ext': 'mp4',
- 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking',
- 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
- },
+ }, {
+ # ytwnews://cavideo/
+ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+ 'info_dict': {
+ 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+ 'ext': 'mp4',
+ 'title': '單車天使 - 中文版預',
+ 'description': '中文版預',
+ 'timestamp': 1476696196,
+ 'upload_date': '20161017',
},
- {
- # config['models']['applet_model']['data']['sapi'] has no query
- 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016',
- 'md5': 'dac0c72d502bc5facda80c9e6d5c98db',
- 'info_dict': {
- 'id': 'a6015640-e9e5-3efb-bb60-05589a183919',
- 'ext': 'mp4',
- 'description': 'Galactic',
- 'title': 'Dolla Diva (feat. Maggie Koerner)',
- },
- 'skip': 'redirect to https://www.yahoo.com/music',
+ 'params': {
+ 'skip_download': True,
},
- {
- # yahoo://article/
- 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
- 'info_dict': {
- 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
- 'ext': 'mp4',
- 'title': "'True Story' Trailer",
- 'description': 'True Story',
- },
- 'params': {
- 'skip_download': True,
- },
+ }, {
+ # Contains both a Yahoo hosted video and multiple Youtube embeds
+ 'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
+ 'info_dict': {
+ 'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
+ 'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead',
+ 'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.',
},
- {
- # ytwnews://cavideo/
- 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+ 'playlist': [{
'info_dict': {
- 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+ 'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6',
'ext': 'mp4',
- 'title': '單車天使 - 中文版預',
- 'description': '中文版預',
+ 'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
+ 'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
+ 'timestamp': 1572406500,
+ 'upload_date': '20191030',
},
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # custom brightcove
- 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/',
+ }, {
'info_dict': {
- 'id': '5575377707001',
+ 'id': '352CFDOQrKg',
'ext': 'mp4',
- 'title': "Clown entertainers say 'It' is hurting their business",
- 'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.',
- 'timestamp': 1505341164,
- 'upload_date': '20170913',
- 'uploader_id': '2376984109001',
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
+ 'description': 'md5:35b61e94c2ae214bc965ff4245f80d11',
+ 'uploader': 'The Voice',
+ 'uploader_id': 'NBCTheVoice',
+ 'upload_date': '20191029',
},
+ }],
+ 'params': {
+ 'playlistend': 2,
},
- {
- # custom brightcove, geo-restricted to Australia, bypassable
- 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/',
- 'only_matching': True,
- }
- ]
+ 'expected_warnings': ['HTTP Error 404'],
+ }, {
+ 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_id = mobj.group('id')
- display_id = mobj.group('display_id') or page_id
- host = mobj.group('host')
- webpage, urlh = self._download_webpage_handle(url, display_id)
- if 'err=404' in urlh.geturl():
- raise ExtractorError('Video gone', expected=True)
-
- # Look for iframed media first
- entries = []
- iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
- for idx, iframe_url in enumerate(iframe_urls):
- entries.append(self.url_result(host + iframe_url, 'Yahoo'))
- if entries:
- return self.playlist_result(entries, page_id)
-
- # Look for NBCSports iframes
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
-
- # Look for Brightcove Legacy Studio embeds
- bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
- if bc_url:
- return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
-
- def brightcove_url_result(bc_url):
- return self.url_result(
- smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
- BrightcoveNewIE.ie_key())
-
- # Look for Brightcove New Studio embeds
- bc_url = BrightcoveNewIE._extract_url(self, webpage)
- if bc_url:
- return brightcove_url_result(bc_url)
-
- brightcove_iframe = self._search_regex(
- r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
- 'brightcove iframe', default=None)
- if brightcove_iframe:
- attr = extract_attributes(brightcove_iframe)
- src = attr.get('src')
- if src:
- parsed_src = compat_urlparse.urlparse(src)
- qs = compat_urlparse.parse_qs(parsed_src.query)
- account_id = qs.get('accountId', ['2376984109001'])[0]
- brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
- if account_id and brightcove_id:
- return brightcove_url_result(
- 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
- % (account_id, brightcove_id))
-
- # Query result is often embedded in webpage as JSON. Sometimes explicit requests
- # to video API results in a failure with geo restriction reason therefore using
- # embedded query result when present sounds reasonable.
- config_json = self._search_regex(
- r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
- webpage, 'videoplayer applet', default=None)
- if config_json:
- config = self._parse_json(config_json, display_id, fatal=False)
- if config:
- sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
- if sapi and 'query' in sapi:
- info = self._extract_info(display_id, sapi, webpage)
- self._sort_formats(info['formats'])
- return info
-
- items_json = self._search_regex(
- r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
- default=None)
- if items_json is None:
- alias = self._search_regex(
- r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
- if alias is not None:
- alias_info = self._download_json(
- 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
- display_id, 'Downloading alias info')
- video_id = alias_info[0]['id']
- else:
- CONTENT_ID_REGEXES = [
- r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
- r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
- r'"first_videoid"\s*:\s*"([^"]+)"',
- r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
- r'<article[^>]data-uuid=["\']([^"\']+)',
- r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
- r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
- ]
- video_id = self._search_regex(
- CONTENT_ID_REGEXES, webpage, 'content ID')
+ url, country, display_id = re.match(self._VALID_URL, url).groups()
+ if not country:
+ country = 'us'
else:
- items = json.loads(items_json)
- info = items['mediaItems']['query']['results']['mediaObj'][0]
- # The 'meta' field is not always in the video webpage, we request it
- # from another page
- video_id = info['id']
- return self._get_info(video_id, display_id, webpage)
-
- def _extract_info(self, display_id, query, webpage):
- info = query['query']['results']['mediaObj'][0]
- meta = info.get('meta')
- video_id = info.get('id')
-
- if not meta:
- msg = info['status'].get('msg')
- if msg:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, msg), expected=True)
- raise ExtractorError('Unable to extract media object meta')
+ country = country.split('-')[0]
+ api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
+
+ for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
+ content = self._download_json(
+ api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
+ display_id, 'Downloading content JSON metadata', fatal=i == 1)
+ if content:
+ item = content['items'][0]
+ break
+
+ if item.get('type') != 'video':
+ entries = []
+
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+ for e in item.get('body', []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if not iframe_url:
+ continue
+ entries.append(self.url_result(iframe_url))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ video_id = item['uuid']
+ video = self._download_json(
+ api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
+ video_id, 'Downloading video JSON metadata')[0]
+ title = video['title']
+
+ if country == 'malaysia':
+ country = 'my'
+
+ is_live = video.get('live_state') == 'live'
+ fmts = ('m3u8',) if is_live else ('webm', 'mp4')
+
+ urls = []
formats = []
- for s in info['streams']:
- tbr = int_or_none(s.get('bitrate'))
- format_info = {
- 'width': int_or_none(s.get('width')),
- 'height': int_or_none(s.get('height')),
- 'tbr': tbr,
- }
-
- host = s['host']
- path = s['path']
- if host.startswith('rtmp'):
- fmt = 'rtmp'
- format_info.update({
- 'url': host,
- 'play_path': path,
- 'ext': 'flv',
- })
- else:
- if s.get('format') == 'm3u8_playlist':
- fmt = 'hls'
- format_info.update({
- 'protocol': 'm3u8_native',
- 'ext': 'mp4',
- })
- else:
- fmt = format_info['ext'] = determine_ext(path)
- format_url = compat_urlparse.urljoin(host, path)
- format_info['url'] = format_url
- format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
- formats.append(format_info)
-
- closed_captions = self._html_search_regex(
- r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
- default='[]')
-
- cc_json = self._parse_json(closed_captions, video_id, fatal=False)
subtitles = {}
- if cc_json:
- for closed_caption in cc_json:
- lang = closed_caption['lang']
- if lang not in subtitles:
- subtitles[lang] = []
- subtitles[lang].append({
- 'url': closed_caption['url'],
- 'ext': mimetype2ext(closed_caption['content_type']),
+ for fmt in fmts:
+ media_obj = self._download_json(
+ 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+ video_id, 'Downloading %s JSON metadata' % fmt,
+ headers=self.geo_verification_headers(), query={
+ 'format': fmt,
+ 'region': country.upper(),
+ })['query']['results']['mediaObj'][0]
+ msg = media_obj.get('status', {}).get('msg')
+
+ for s in media_obj.get('streams', []):
+ host = s.get('host')
+ path = s.get('path')
+ if not host or not path:
+ continue
+ s_url = host + path
+ if s.get('format') == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ s_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ continue
+ tbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'url': s_url,
+ 'format_id': fmt + ('-%d' % tbr if tbr else ''),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'tbr': tbr,
+ 'fps': int_or_none(s.get('framerate')),
+ })
+
+ for cc in media_obj.get('closedcaptions', []):
+ cc_url = cc.get('url')
+ if not cc_url or cc_url in urls:
+ continue
+ urls.append(cc_url)
+ subtitles.setdefault(cc.get('lang') or 'en-US', []).append({
+ 'url': cc_url,
+ 'ext': mimetype2ext(cc.get('content_type')),
})
+ streaming_url = video.get('streaming_url')
+ if streaming_url and not is_live:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ if not formats and msg == 'geo restricted':
+ self.raise_geo_restricted()
+
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumb in video.get('thumbnails', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb.get('tag'),
+ 'url': thumb.get('url'),
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ series_info = video.get('series_info') or {}
+
return {
'id': video_id,
- 'display_id': display_id,
- 'title': unescapeHTML(meta['title']),
+ 'title': self._live_title(title) if is_live else title,
'formats': formats,
- 'description': clean_html(meta['description']),
- 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
- 'duration': int_or_none(meta.get('duration')),
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(video.get('description')),
+ 'timestamp': parse_iso8601(video.get('publish_time')),
'subtitles': subtitles,
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('view_count')),
+ 'is_live': is_live,
+ 'series': video.get('show_name'),
+ 'season_number': int_or_none(series_info.get('season_number')),
+ 'episode_number': int_or_none(series_info.get('episode_number')),
}
- def _get_info(self, video_id, display_id, webpage):
- region = self._search_regex(
- r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
- webpage, 'region', fatal=False, default='US').upper()
- formats = []
- info = {}
- for fmt in ('webm', 'mp4'):
- query_result = self._download_json(
- 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
- display_id, 'Downloading %s video info' % fmt, query={
- 'protocol': 'http',
- 'region': region,
- 'format': fmt,
- })
- info = self._extract_info(display_id, query_result, webpage)
- formats.extend(info['formats'])
- formats.extend(self._extract_m3u8_formats(
- 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
- info['formats'] = formats
- return info
-
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'
@@ -477,3 +342,228 @@ class YahooSearchIE(SearchInfoExtractor):
'id': query,
'entries': entries,
}
+
+
+class YahooGyaOPlayerIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao:player'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
+ 'info_dict': {
+ 'id': '5993125228001',
+ 'ext': 'mp4',
+ 'title': 'フューリー 【字幕版】',
+ 'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
+ 'uploader_id': '4235717419001',
+ 'upload_date': '20190124',
+ 'timestamp': 1548294365,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', ':')
+ headers = self.geo_verification_headers()
+ headers['Accept'] = 'application/json'
+ resp = self._download_json(
+ 'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
+ 'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
+ 'query': '''{
+ content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
+ video {
+ delivery {
+ id
+ }
+ title
+ }
+ }
+}''' % video_id,
+ }, headers=headers)
+ content = resp['data']['content']
+ if not content:
+ msg = resp['errors'][0]['message']
+ if msg == 'not in japan':
+ self.raise_geo_restricted(countries=['JP'])
+ raise ExtractorError(msg)
+ video = content['video']
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': video['title'],
+ 'url': smuggle_url(
+ 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
+ {'geo_countries': ['JP']}),
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ }
+
+
+class YahooGyaOIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
+ 'info_dict': {
+ 'id': '00449:v03102',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url).replace('/', ':')
+ videos = self._download_json(
+ 'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos']
+ entries = []
+ for video in videos:
+ video_id = video.get('id')
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
+ YahooGyaOPlayerIE.ie_key(), video_id))
+ return self.playlist_result(entries, program_id)
+
+
+class YahooJapanNewsIE(InfoExtractor):
+ IE_NAME = 'yahoo:japannews'
+ IE_DESC = 'Yahoo! Japan News'
+ _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
+ _GEO_COUNTRIES = ['JP']
+ _TESTS = [{
+ 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
+ 'info_dict': {
+ 'id': '1736242',
+ 'ext': 'mp4',
+ 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
+ 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
+ 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted
+ 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://headlines.yahoo.co.jp/videonews/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp/feature/1356',
+ 'only_matching': True
+ }]
+
+ def _extract_formats(self, json_data, content_id):
+ formats = []
+
+ video_data = try_get(
+ json_data,
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+ for vid in video_data or []:
+ delivery = vid.get('delivery')
+ url = url_or_none(vid.get('Url'))
+ if not delivery or not url:
+ continue
+ elif delivery == 'hls':
+ formats.extend(
+ self._extract_m3u8_formats(
+ url, content_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': url,
+ 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
+ 'height': int_or_none(vid.get('height')),
+ 'width': int_or_none(vid.get('width')),
+ 'tbr': int_or_none(vid.get('bitrate')),
+ })
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ display_id = mobj.group('id') or host
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, 'title', default=None
+ ) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
+
+ if display_id == host:
+ # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
+ stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
+ entries = [
+ self.url_result(
+ smuggle_url(
+ 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
+ {'geo_countries': ['JP']}),
+ ie='BrightcoveNew', video_id=plist_id)
+ for plist_id in stream_plists]
+ return self.playlist_result(entries, playlist_title=title)
+
+ # Article page
+ description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image', webpage, 'thumbnail', default=None)
+ space_id = self._search_regex([
+ r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
+ r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
+ r'<!--\s+SpaceID=(\d+)'
+ ], webpage, 'spaceid')
+
+ content_id = self._search_regex(
+ r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
+ webpage, 'contentid', group='contentid')
+
+ json_data = self._download_json(
+ 'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
+ content_id,
+ query={
+ 'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
+ 'output': 'json',
+ 'space_id': space_id,
+ 'domain': host,
+ 'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
+ 'device_type': '1100',
+ })
+ formats = self._extract_formats(json_data, content_id)
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
index 009203851..08d35e04c 100644
--- a/youtube_dl/extractor/yandexmusic.py
+++ b/youtube_dl/extractor/yandexmusic.py
@@ -10,6 +10,7 @@ from ..utils import (
ExtractorError,
int_or_none,
float_or_none,
+ try_get,
)
@@ -51,43 +52,66 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
IE_DESC = 'Яндекс.Музыка - Трек'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
- _TEST = {
+ _TESTS = [{
'url': 'http://music.yandex.ru/album/540508/track/4878838',
'md5': 'f496818aa2f60b6c0062980d2e00dc20',
'info_dict': {
'id': '4878838',
'ext': 'mp3',
- 'title': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
+ 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
'filesize': 4628061,
'duration': 193.04,
'track': 'Gypsy Eyes 1',
'album': 'Gypsy Soul',
'album_artist': 'Carlo Ambrosio',
- 'artist': 'Carlo Ambrosio, Carlo Ambrosio & Fabio Di Bari',
+ 'artist': 'Carlo Ambrosio & Fabio Di Bari',
'release_year': 2009,
},
'skip': 'Travis CI servers blocked by YandexMusic',
- }
+ }, {
+ # multiple disks
+ 'url': 'http://music.yandex.ru/album/3840501/track/705105',
+ 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e',
+ 'info_dict': {
+ 'id': '705105',
+ 'ext': 'mp3',
+ 'title': 'Hooverphonic - Sometimes',
+ 'filesize': 5743386,
+ 'duration': 239.27,
+ 'track': 'Sometimes',
+ 'album': 'The Best of Hooverphonic',
+ 'album_artist': 'Hooverphonic',
+ 'artist': 'Hooverphonic',
+ 'release_year': 2016,
+ 'genre': 'pop',
+ 'disc_number': 2,
+ 'track_number': 9,
+ },
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
- def _get_track_url(self, storage_dir, track_id):
- data = self._download_json(
- 'http://music.yandex.ru/api/v1.5/handlers/api-jsonp.jsx?action=getTrackSrc&p=download-info/%s'
- % storage_dir,
- track_id, 'Downloading track location JSON')
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id, track_id = mobj.group('album_id'), mobj.group('id')
- # Each string is now wrapped in a list, this is probably only temporarily thus
- # supporting both scenarios (see https://github.com/rg3/youtube-dl/issues/10193)
- for k, v in data.items():
- if v and isinstance(v, list):
- data[k] = v[0]
+ track = self._download_json(
+ 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+ track_id, 'Downloading track JSON')['track']
+ track_title = track['title']
- key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + data['path'][1:] + data['s']).encode('utf-8')).hexdigest()
- storage = storage_dir.split('.')
+ download_data = self._download_json(
+ 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
+ track_id, 'Downloading track location url JSON',
+ headers={'X-Retpath-Y': url})
- return ('http://%s/get-mp3/%s/%s?track-id=%s&from=service-10-track&similarities-experiment=default'
- % (data['host'], key, data['ts'] + data['path'], storage[1]))
+ fd_data = self._download_json(
+ download_data['src'], track_id,
+ 'Downloading track location JSON',
+ query={'format': 'json'})
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
+ storage = track['storageDir'].split('.')
+ f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
- def _get_track_info(self, track):
thumbnail = None
cover_uri = track.get('albums', [{}])[0].get('coverUri')
if cover_uri:
@@ -95,20 +119,33 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
if not thumbnail.startswith('http'):
thumbnail = 'http://' + thumbnail
- track_title = track['title']
track_info = {
- 'id': track['id'],
+ 'id': track_id,
'ext': 'mp3',
- 'url': self._get_track_url(track['storageDir'], track['id']),
+ 'url': f_url,
'filesize': int_or_none(track.get('fileSize')),
'duration': float_or_none(track.get('durationMs'), 1000),
'thumbnail': thumbnail,
'track': track_title,
+ 'acodec': download_data.get('codec'),
+ 'abr': int_or_none(download_data.get('bitrate')),
}
+ def extract_artist_name(artist):
+ decomposed = artist.get('decomposed')
+ if not isinstance(decomposed, list):
+ return artist['name']
+ parts = [artist['name']]
+ for element in decomposed:
+ if isinstance(element, dict) and element.get('name'):
+ parts.append(element['name'])
+ elif isinstance(element, compat_str):
+ parts.append(element)
+ return ''.join(parts)
+
def extract_artist(artist_list):
if artist_list and isinstance(artist_list, list):
- artists_names = [a['name'] for a in artist_list if a.get('name')]
+ artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
if artists_names:
return ', '.join(artists_names)
@@ -117,10 +154,17 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
album = albums[0]
if isinstance(album, dict):
year = album.get('year')
+ disc_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['volume']))
+ track_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['index']))
track_info.update({
'album': album.get('title'),
'album_artist': extract_artist(album.get('artists')),
'release_year': int_or_none(year),
+ 'genre': album.get('genre'),
+ 'disc_number': disc_number,
+ 'track_number': track_number,
})
track_artist = extract_artist(track.get('artists'))
@@ -131,17 +175,8 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
})
else:
track_info['title'] = track_title
- return track_info
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- album_id, track_id = mobj.group('album_id'), mobj.group('id')
-
- track = self._download_json(
- 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
- track_id, 'Downloading track JSON')['track']
- return self._get_track_info(track)
+ return track_info
class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
@@ -157,7 +192,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
IE_DESC = 'Яндекс.Музыка - Альбом'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
- _TEST = {
+ _TESTS = [{
'url': 'http://music.yandex.ru/album/540508',
'info_dict': {
'id': '540508',
@@ -165,7 +200,15 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
},
'playlist_count': 50,
'skip': 'Travis CI servers blocked by YandexMusic',
- }
+ }, {
+ 'url': 'https://music.yandex.ru/album/3840501',
+ 'info_dict': {
+ 'id': '3840501',
+ 'title': 'Hooverphonic - The Best of Hooverphonic (2016)',
+ },
+ 'playlist_count': 33,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
def _real_extract(self, url):
album_id = self._match_id(url)
@@ -174,7 +217,7 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
album_id, 'Downloading album JSON')
- entries = self._build_playlist(album['volumes'][0])
+ entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
title = '%s - %s' % (album['artists'][0]['name'], album['title'])
year = album.get('year')
@@ -200,7 +243,7 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'skip': 'Travis CI servers blocked by YandexMusic',
}, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
- # https://github.com/rg3/youtube-dl/issues/6666)
+ # https://github.com/ytdl-org/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
'info_dict': {
'id': '1036',
diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dl/extractor/yandexvideo.py
new file mode 100644
index 000000000..46529be05
--- /dev/null
+++ b/youtube_dl/extractor/yandexvideo.py
@@ -0,0 +1,104 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ url_or_none,
+)
+
+
+class YandexVideoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ yandex\.ru(?:/portal/(?:video|efir))?/?\?.*?stream_id=|
+ frontend\.vh\.yandex\.ru/player/
+ )
+ (?P<id>[\da-f]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://yandex.ru/portal/video?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
+ 'md5': '33955d7ae052f15853dc41f35f17581c',
+ 'info_dict': {
+ 'id': '4dbb262b4fe5cf15a215de4f34eee34d',
+ 'ext': 'mp4',
+ 'title': 'В Нью-Йорке баржи и теплоход оторвались от причала и расплылись по Гудзону',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 0,
+ 'duration': 30,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://yandex.ru/portal/efir?stream_id=4dbb36ec4e0526d58f9f2dc8f0ecf374&from=morda',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://yandex.ru/?stream_id=4dbb262b4fe5cf15a215de4f34eee34d',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://frontend.vh.yandex.ru/player/4dbb262b4fe5cf15a215de4f34eee34d?from=morda',
+ 'only_matching': True,
+ }, {
+ # vod-episode, series episode
+ 'url': 'https://yandex.ru/portal/video?stream_id=45b11db6e4b68797919c93751a938cee',
+ 'only_matching': True,
+ }, {
+ # episode, sports
+ 'url': 'https://yandex.ru/?stream_channel=1538487871&stream_id=4132a07f71fb0396be93d74b3477131d',
+ 'only_matching': True,
+ }, {
+ # DASH with DRM
+ 'url': 'https://yandex.ru/portal/video?from=morda&stream_id=485a92d94518d73a9d0ff778e13505f8',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ content = self._download_json(
+ 'https://frontend.vh.yandex.ru/v22/player/%s.json' % video_id,
+ video_id, query={
+ 'stream_options': 'hires',
+ 'disable_trackings': 1,
+ })['content']
+
+ content_url = url_or_none(content.get('content_url')) or url_or_none(
+ content['streams'][0]['url'])
+ title = content.get('title') or content.get('computed_title')
+
+ ext = determine_ext(content_url)
+
+ if ext == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ content_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ elif ext == 'mpd':
+ formats = self._extract_mpd_formats(
+ content_url, video_id, mpd_id='dash')
+ else:
+ formats = [{'url': content_url}]
+
+ self._sort_formats(formats)
+
+ description = content.get('description')
+ thumbnail = content.get('thumbnail')
+ timestamp = (int_or_none(content.get('release_date'))
+ or int_or_none(content.get('release_date_ut'))
+ or int_or_none(content.get('start_time')))
+ duration = int_or_none(content.get('duration'))
+ series = content.get('program_title')
+ age_limit = int_or_none(content.get('restriction_age'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': series,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
index dff69fcb7..88aabd272 100644
--- a/youtube_dl/extractor/youjizz.py
+++ b/youtube_dl/extractor/youjizz.py
@@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
encodings = self._parse_json(
self._search_regex(
- r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+ r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
default='[]'),
video_id, fatal=False)
for encoding in encodings:
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 2f5a7b023..61d1ab209 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -258,8 +258,8 @@ class YoukuShowIE(InfoExtractor):
transform_source=lambda s: js_to_json(strip_jsonp(s))).get('html')
if playlist_data is None:
return [None, None]
- drama_list = (get_element_by_class('p-drama-grid', playlist_data) or
- get_element_by_class('p-drama-half-row', playlist_data))
+ drama_list = (get_element_by_class('p-drama-grid', playlist_data)
+ or get_element_by_class('p-drama-half-row', playlist_data))
if drama_list is None:
raise ExtractorError('No episodes found')
video_urls = re.findall(r'<a[^>]+href="([^"]+)"', drama_list)
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
index ea0bce784..e7fca22de 100644
--- a/youtube_dl/extractor/youporn.py
+++ b/youtube_dl/extractor/youporn.py
@@ -5,7 +5,6 @@ import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
- sanitized_Request,
str_to_int,
unescapeHTML,
unified_strdate,
@@ -15,7 +14,7 @@ from ..aes import aes_decrypt_text
class YouPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
_TESTS = [{
'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
'md5': '3744d24c50438cf5b6f6d59feb5055c2',
@@ -57,22 +56,32 @@ class YouPornIE(InfoExtractor):
'params': {
'skip_download': True,
},
+ }, {
+ 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youporn.com/watch/505835',
+ 'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
+ webpage)
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- display_id = mobj.group('display_id')
+ display_id = mobj.group('display_id') or video_id
- request = sanitized_Request(url)
- request.add_header('Cookie', 'age_verified=1')
- webpage = self._download_webpage(request, display_id)
+ webpage = self._download_webpage(
+ 'http://www.youporn.com/watch/%s' % video_id, display_id,
+ headers={'Cookie': 'age_verified=1'})
- title = self._search_regex(
- [r'(?:video_titles|videoTitle)\s*[:=]\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'<h1[^>]+class=["\']heading\d?["\'][^>]*>(?P<title>[^<]+)<'],
- webpage, 'title', group='title',
- default=None) or self._og_search_title(
+ title = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
+ webpage, 'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True)
@@ -134,7 +143,11 @@ class YouPornIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
- description = self._og_search_description(webpage, default=None)
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
+ webpage, 'description',
+ default=None) or self._og_search_description(
+ webpage, default=None)
thumbnail = self._search_regex(
r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
webpage, 'thumbnail', fatal=False, group='thumbnail')
diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py
index 6602f7c03..98347491e 100644
--- a/youtube_dl/extractor/yourporn.py
+++ b/youtube_dl/extractor/yourporn.py
@@ -1,41 +1,67 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import urljoin
+from ..compat import compat_str
+from ..utils import (
+ parse_duration,
+ urljoin,
+)
class YourPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
- _TEST = {
- 'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
+ _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
+ _TESTS = [{
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
'info_dict': {
'id': '57ffcb2e1179b',
'ext': 'mp4',
'title': 'md5:c9f43630bd968267672651ba905a7d35',
'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 165,
+ 'age_limit': 18,
},
- }
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- video_url = urljoin(url, self._parse_json(
+ parts = self._parse_json(
self._search_regex(
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
group='data'),
- video_id)[video_id])
+ video_id)[video_id].split('/')
+
+ num = 0
+ for c in parts[6] + parts[7]:
+ if c.isnumeric():
+ num += int(c)
+ parts[5] = compat_str(int(parts[5]) - num)
+ parts[1] += '8'
+ video_url = urljoin(url, '/'.join(parts))
title = (self._search_regex(
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
default=None) or self._og_search_description(webpage)).strip()
thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
+ default=None))
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'ext': 'mp4',
}
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 78203ef84..b35bf03aa 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter
from ..compat import (
compat_chr,
+ compat_HTTPError,
compat_kwargs,
compat_parse_qs,
compat_urllib_parse_unquote,
@@ -26,8 +27,10 @@ from ..compat import (
compat_str,
)
from ..utils import (
+ bool_or_none,
clean_html,
error_to_compat_str,
+ extract_attributes,
ExtractorError,
float_or_none,
get_element_by_attribute,
@@ -37,16 +40,17 @@ from ..utils import (
orderedSet,
parse_codecs,
parse_duration,
- qualities,
remove_quotes,
remove_start,
smuggle_url,
+ str_or_none,
str_to_int,
try_get,
unescapeHTML,
unified_strdate,
unsmuggle_url,
uppercase_escape,
+ url_or_none,
urlencode_postdata,
)
@@ -64,11 +68,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
- _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
+ _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
+
+ _YOUTUBE_CLIENT_HEADERS = {
+ 'x-youtube-client-name': '1',
+ 'x-youtube-client-version': '1.20200609.04.02',
+ }
def _set_language(self):
self._set_cookie(
- '.youtube.com', 'PREF', 'f1=50000000&hl=en',
+ '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
# YouTube sets the expire time to about two months
expire_time=time.time() + 2 * 30 * 24 * 3600)
@@ -112,6 +121,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'f.req': json.dumps(f_req),
'flowName': 'GlifWebSignIn',
'flowEntry': 'ServiceLogin',
+ # TODO: reverse actual botguard identifier generation algo
+ 'bgRequest': '["identifier",""]',
})
return self._download_json(
url, None, note=note, errnote=errnote,
@@ -285,10 +296,26 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
if not mobj:
break
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
+ count = 0
+ retries = 3
+ while count <= retries:
+ try:
+ # Downloading page may result in intermittent 5xx HTTP error
+ # that is usually worked around with a retry
+ more = self._download_json(
+ 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s%s'
+ % (page_num, ' (retry #%d)' % count if count else ''),
+ transform_source=uppercase_escape,
+ headers=self._YOUTUBE_CLIENT_HEADERS)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
+ count += 1
+ if count <= retries:
+ continue
+ raise
+
content_html = more['content_html']
if not content_html.strip():
# Some webpages show a "Load more" button but they don't
@@ -302,17 +329,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
for video_id, video_title in self.extract_videos_from_page(content):
yield self.url_result(video_id, 'Youtube', video_id, video_title)
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
- for mobj in re.finditer(self._VIDEO_RE, page):
+ def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
+ for mobj in re.finditer(video_re, page):
# The link with index 0 is not the first video of the playlist (not sure if still actual)
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
continue
video_id = mobj.group('id')
- video_title = unescapeHTML(mobj.group('title'))
+ video_title = unescapeHTML(
+ mobj.group('title')) if 'title' in mobj.groupdict() else None
if video_title:
video_title = video_title.strip()
+ if video_title == '► Play all':
+ video_title = None
try:
idx = ids_in_page.index(video_id)
if video_title and not titles_in_page[idx]:
@@ -320,6 +348,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
except ValueError:
ids_in_page.append(video_id)
titles_in_page.append(video_title)
+
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+ self.extract_videos_from_page_impl(
+ self._VIDEO_RE, page, ids_in_page, titles_in_page)
return zip(ids_in_page, titles_in_page)
@@ -343,13 +377,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_VALID_URL = r"""(?x)^
(
(?:https?://|//) # http(s):// or protocol-independent URL
- (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
(?:www\.)?deturl\.com/www\.youtube\.com/|
(?:www\.)?pwnyoutube\.com/|
(?:www\.)?hooktube\.com/|
(?:www\.)?yourepeat\.com/|
tube\.majestyc\.net/|
- (?:www\.)?invidio\.us/|
+ # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
+ (?:(?:www|dev)\.)?invidio\.us/|
+ (?:(?:www|no)\.)?invidiou\.sh/|
+ (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
+ (?:www\.)?invidious\.kabi\.tk/|
+ (?:www\.)?invidious\.13ad\.de/|
+ (?:www\.)?invidious\.mastodon\.host/|
+ (?:www\.)?invidious\.nixnet\.xyz/|
+ (?:www\.)?invidious\.drycat\.fr/|
+ (?:www\.)?tube\.poal\.co/|
+ (?:www\.)?vid\.wxzm\.sx/|
+ (?:www\.)?yewtu\.be/|
+ (?:www\.)?yt\.elukerio\.org/|
+ (?:www\.)?yt\.lelux\.fi/|
+ (?:www\.)?invidious\.ggc-project\.de/|
+ (?:www\.)?yt\.maisputain\.ovh/|
+ (?:www\.)?invidious\.13ad\.de/|
+ (?:www\.)?invidious\.toot\.koeln/|
+ (?:www\.)?invidious\.fdn\.fr/|
+ (?:www\.)?watch\.nettohikari\.com/|
+ (?:www\.)?kgg2m7yk5aybusll\.onion/|
+ (?:www\.)?qklhadlycap4cnod\.onion/|
+ (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
+ (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
+ (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
+ (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
+ (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
+ (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
@@ -379,6 +440,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?(1).+)? # if we found the ID, everything can follow
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
+ _PLAYER_INFO_RE = (
+ r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
+ r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
+ )
_formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@@ -425,7 +490,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
'137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
'160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
'212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
'264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
@@ -477,8 +542,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
+
+ # av01 video only formats sometimes served with "unknown" codecs
+ '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
}
- _SUBTITLE_FORMATS = ('ttml', 'vtt')
+ _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False
@@ -496,11 +567,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
'upload_date': '20121002',
- 'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
+ 'view_count': int,
'like_count': int,
'dislike_count': int,
'start_time': 1,
@@ -516,7 +587,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20120506',
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
'alt_title': 'I Love It (feat. Charli XCX)',
- 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
+ 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
'iconic ep', 'iconic', 'love', 'it'],
@@ -524,7 +595,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Icona Pop',
'uploader_id': 'IconaPop',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
- 'license': 'Standard YouTube License',
'creator': 'Icona Pop',
'track': 'I Love It (feat. Charli XCX)',
'artist': 'Icona Pop',
@@ -537,14 +607,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': '07FYdnEawAQ',
'ext': 'mp4',
'upload_date': '20130703',
- 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
'alt_title': 'Tunnel Vision',
- 'description': 'md5:64249768eec3bc4276236606ea996373',
+ 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
'duration': 419,
'uploader': 'justintimberlakeVEVO',
'uploader_id': 'justintimberlakeVEVO',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
- 'license': 'Standard YouTube License',
'creator': 'Justin Timberlake',
'track': 'Tunnel Vision',
'artist': 'Justin Timberlake',
@@ -563,7 +632,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'SET India',
'uploader_id': 'setindia',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
- 'license': 'Standard YouTube License',
'age_limit': 18,
}
},
@@ -578,11 +646,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': 'phihag',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
'upload_date': '20121002',
- 'license': 'Standard YouTube License',
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
+ 'view_count': int,
'like_count': int,
'dislike_count': int,
},
@@ -601,7 +669,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
'description': '',
'uploader': '8KVIDEO',
- 'license': 'Standard YouTube License',
'title': 'UHDTV TEST 8K VIDEO.mp4'
},
'params': {
@@ -616,13 +683,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'info_dict': {
'id': 'IB3lcPjvWLA',
'ext': 'm4a',
- 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
- 'description': 'md5:1900ed86ee514927b9e00fbead6969a5',
+ 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
+ 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
'duration': 244,
'uploader': 'AfrojackVEVO',
'uploader_id': 'AfrojackVEVO',
'upload_date': '20131011',
- 'license': 'Standard YouTube License',
},
'params': {
'youtube_include_dash_manifest': True,
@@ -636,14 +702,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'nfWlot6h_JM',
'ext': 'm4a',
'title': 'Taylor Swift - Shake It Off',
- 'alt_title': 'Shake It Off',
- 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
+ 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
'duration': 242,
'uploader': 'TaylorSwiftVEVO',
'uploader_id': 'TaylorSwiftVEVO',
'upload_date': '20140818',
- 'license': 'Standard YouTube License',
- 'creator': 'Taylor Swift',
},
'params': {
'youtube_include_dash_manifest': True,
@@ -658,10 +721,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': 'mp4',
'duration': 219,
'upload_date': '20100909',
- 'uploader': 'TJ Kirk',
+ 'uploader': 'Amazing Atheist',
'uploader_id': 'TheAmazingAtheist',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
- 'license': 'Standard YouTube License',
'title': 'Burning Everyone\'s Koran',
'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
}
@@ -679,7 +741,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': 'WitcherGame',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
'upload_date': '20140605',
- 'license': 'Standard YouTube License',
'age_limit': 18,
},
},
@@ -688,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
'info_dict': {
'id': '6kLq3WMV1nU',
- 'ext': 'webm',
+ 'ext': 'mp4',
'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
'duration': 246,
@@ -696,11 +757,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': 'LloydVEVO',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
'upload_date': '20110629',
- 'license': 'Standard YouTube License',
'age_limit': 18,
},
},
- # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
# YouTube Red ad is not captured for creator
{
'url': '__2ABJjxzNo',
@@ -711,18 +771,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20100430',
'uploader_id': 'deadmau5',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
- 'creator': 'deadmau5',
+ 'creator': 'Dada Life, deadmau5',
'description': 'md5:12c56784b8032162bb936a5f76d55360',
'uploader': 'deadmau5',
- 'license': 'Standard YouTube License',
'title': 'Deadmau5 - Some Chords (HD)',
- 'alt_title': 'Some Chords',
+ 'alt_title': 'This Machine Kills Some Chords',
},
'expected_warnings': [
'DASH manifest missing',
]
},
- # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
+ # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
{
'url': 'lqQg6PlCWgI',
'info_dict': {
@@ -732,7 +791,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'upload_date': '20150827',
'uploader_id': 'olympic',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'license': 'Standard YouTube License',
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
'uploader': 'Olympic',
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
@@ -754,7 +812,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
'uploader': '孫ᄋᄅ',
- 'license': 'Standard YouTube License',
'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
},
},
@@ -775,7 +832,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'skip': 'This live event has ended.',
},
- # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
+ # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
{
'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
'info_dict': {
@@ -788,7 +845,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': 'dorappi2000',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
'uploader': 'dorappi2000',
- 'license': 'Standard YouTube License',
'formats': 'mincount:31',
},
'skip': 'not actual anymore',
@@ -804,7 +860,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'Airtek',
'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
- 'license': 'Standard YouTube License',
'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
},
'params': {
@@ -877,9 +932,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'params': {
'skip_download': True,
},
+ 'skip': 'This video is not available.',
},
{
- # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
+ # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
'info_dict': {
'id': 'gVfLd0zydlo',
@@ -897,10 +953,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'only_matching': True,
},
{
- # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
# Also tests cut-off URL expansion in video description (see
- # https://github.com/rg3/youtube-dl/issues/1892,
- # https://github.com/rg3/youtube-dl/issues/8164)
+ # https://github.com/ytdl-org/youtube-dl/issues/1892,
+ # https://github.com/ytdl-org/youtube-dl/issues/8164)
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
@@ -913,17 +969,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': 'IronSoulElf',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
'uploader': 'IronSoulElf',
- 'license': 'Standard YouTube License',
'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
'track': 'Dark Walk - Position Music',
'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
},
'params': {
'skip_download': True,
},
},
{
- # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
'only_matching': True,
},
@@ -987,7 +1043,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'only_matching': True,
},
{
- # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
+ # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
'only_matching': True,
},
@@ -1017,13 +1073,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'id': 'iqKdEhx-dD4',
'ext': 'mp4',
'title': 'Isolation - Mind Field (Ep 1)',
- 'description': 'md5:25b78d2f64ae81719f5c96319889b736',
+ 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
'duration': 2085,
'upload_date': '20170118',
'uploader': 'Vsauce',
'uploader_id': 'Vsauce',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
- 'license': 'Standard YouTube License',
'series': 'Mind Field',
'season_number': 1,
'episode_number': 1,
@@ -1049,7 +1104,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader': 'New Century Foundation',
'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
- 'license': 'Standard YouTube License',
},
'params': {
'skip_download': True,
@@ -1073,6 +1127,144 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
'only_matching': True,
},
+ {
+ # DRM protected
+ 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+ 'only_matching': True,
+ },
+ {
+ # Video with unsupported adaptive stream type formats
+ 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
+ 'info_dict': {
+ 'id': 'Z4Vy8R84T1U',
+ 'ext': 'mp4',
+ 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 433,
+ 'upload_date': '20130923',
+ 'uploader': 'Amelia Putri Harwita',
+ 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
+ 'formats': 'maxcount:10',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'youtube_include_dash_manifest': False,
+ },
+ 'skip': 'not actual anymore',
+ },
+ {
+ # Youtube Music Auto-generated description
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'uploader': 'Stephen - Topic',
+ 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
+ 'artist': 'Stephen',
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'release_year': 2019,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # Retrieve 'artist' field from 'Artist:' in video description
+ # when it is present on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
+ 'info_dict': {
+ 'id': 'k0jLE7tTwjY',
+ 'ext': 'mp4',
+ 'title': 'Latch Feat. Sam Smith',
+ 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
+ 'upload_date': '20150110',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
+ 'artist': 'Disclosure',
+ 'track': 'Latch Feat. Sam Smith',
+ 'album': 'Latch Featuring Sam Smith',
+ 'release_date': '20121008',
+ 'release_year': 2012,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle multiple artists on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
+ 'info_dict': {
+ 'id': '74qn0eJSjpA',
+ 'ext': 'mp4',
+ 'title': 'Eastside',
+ 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
+ 'upload_date': '20180710',
+ 'uploader': 'Benny Blanco - Topic',
+ 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
+ 'artist': 'benny blanco, Halsey, Khalid',
+ 'track': 'Eastside',
+ 'album': 'Eastside',
+ 'release_date': '20180713',
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle youtube music video with release_year and no release_date
+ 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
+ 'info_dict': {
+ 'id': '-hcAI0g-f5M',
+ 'ext': 'mp4',
+ 'title': 'Put It On Me',
+ 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
+ 'upload_date': '20180426',
+ 'uploader': 'Matt Maeson - Topic',
+ 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
+ 'artist': 'Matt Maeson',
+ 'track': 'Put It On Me',
+ 'album': 'The Hearse',
+ 'release_date': None,
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
+ 'only_matching': True,
+ },
+ {
+ # invalid -> valid video id redirection
+ 'url': 'DJztXj2GPfl',
+ 'info_dict': {
+ 'id': 'DJztXj2GPfk',
+ 'ext': 'mp4',
+ 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
+ 'description': 'md5:bf577a41da97918e94fa9798d9228825',
+ 'upload_date': '20090125',
+ 'uploader': 'Prochorowka',
+ 'uploader_id': 'Prochorowka',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
+ 'artist': 'Panjabi MC',
+ 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
+ 'album': 'Beware of the Boys (Mundian To Bach Ke)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
]
def __init__(self, *args, **kwargs):
@@ -1099,14 +1291,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
""" Return a string representation of a signature """
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
- def _extract_signature_function(self, video_id, player_url, example_sig):
- id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
- player_url)
- if not id_m:
+ @classmethod
+ def _extract_player_info(cls, player_url):
+ for player_re in cls._PLAYER_INFO_RE:
+ id_m = re.search(player_re, player_url)
+ if id_m:
+ break
+ else:
raise ExtractorError('Cannot identify player %r' % player_url)
- player_type = id_m.group('ext')
- player_id = id_m.group('id')
+ return id_m.group('ext'), id_m.group('id')
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
+ player_type, player_id = self._extract_player_info(player_url)
# Read from filesystem cache
func_id = '%s_%s_%s' % (
@@ -1186,10 +1382,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
- (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ # Obsolete patterns
+ r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode)
@@ -1269,8 +1474,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# regex won't capture the whole JSON. Yet working around by trying more
# concrete regex first keeping in mind proper quoted string handling
# to be implemented in future that will replace this workaround (see
- # https://github.com/rg3/youtube-dl/issues/7468,
- # https://github.com/rg3/youtube-dl/pull/7599)
+ # https://github.com/ytdl-org/youtube-dl/issues/7468,
+ # https://github.com/ytdl-org/youtube-dl/pull/7599)
r';ytplayer\.config\s*=\s*({.+?});ytplayer',
r';ytplayer\.config\s*=\s*({.+?});',
)
@@ -1382,8 +1587,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._downloader.report_warning(err_msg)
return {}
- def _mark_watched(self, video_id, video_info):
- playback_url = video_info.get('videostats_playback_base_url', [None])[0]
+ def _mark_watched(self, video_id, video_info, player_response):
+ playback_url = url_or_none(try_get(
+ player_response,
+ lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
+ video_info, lambda x: x['videostats_playback_base_url'][0]))
if not playback_url:
return
parsed_playback_url = compat_urlparse.urlparse(playback_url)
@@ -1450,12 +1658,63 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_id = mobj.group(2)
return video_id
- def _extract_annotations(self, video_id):
- url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
- return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
+ def _extract_chapters_from_json(self, webpage, video_id, duration):
+ if not webpage:
+ return
+ player = self._parse_json(
+ self._search_regex(
+ r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
+ 'player args', default='{}'),
+ video_id, fatal=False)
+ if not player or not isinstance(player, dict):
+ return
+ watch_next_response = player.get('watch_next_response')
+ if not isinstance(watch_next_response, compat_str):
+ return
+ response = self._parse_json(watch_next_response, video_id, fatal=False)
+ if not response or not isinstance(response, dict):
+ return
+ chapters_list = try_get(
+ response,
+ lambda x: x['playerOverlays']
+ ['playerOverlayRenderer']
+ ['decoratedPlayerBarRenderer']
+ ['decoratedPlayerBarRenderer']
+ ['playerBar']
+ ['chapteredPlayerBarRenderer']
+ ['chapters'],
+ list)
+ if not chapters_list:
+ return
+
+ def chapter_time(chapter):
+ return float_or_none(
+ try_get(
+ chapter,
+ lambda x: x['chapterRenderer']['timeRangeStartMillis'],
+ int),
+ scale=1000)
+ chapters = []
+ for next_num, chapter in enumerate(chapters_list, start=1):
+ start_time = chapter_time(chapter)
+ if start_time is None:
+ continue
+ end_time = (chapter_time(chapters_list[next_num])
+ if next_num < len(chapters_list) else duration)
+ if end_time is None:
+ continue
+ title = try_get(
+ chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
+ compat_str)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': title,
+ })
+ return chapters
@staticmethod
- def _extract_chapters(description, duration):
+ def _extract_chapters_from_description(description, duration):
if not description:
return None
chapter_lines = re.findall(
@@ -1489,6 +1748,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
})
return chapters
+ def _extract_chapters(self, webpage, description, video_id, duration):
+ return (self._extract_chapters_from_json(webpage, video_id, duration)
+ or self._extract_chapters_from_description(description, duration))
+
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -1516,7 +1779,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video webpage
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
- video_webpage = self._download_webpage(url, video_id)
+ video_webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
+ video_id = qs.get('v', [None])[0] or video_id
# Attempt to extract SWF player URL
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
@@ -1532,13 +1798,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if dash_mpd and dash_mpd[0] not in dash_mpds:
dash_mpds.append(dash_mpd[0])
+ def add_dash_mpd_pr(pl_response):
+ dash_mpd = url_or_none(try_get(
+ pl_response, lambda x: x['streamingData']['dashManifestUrl'],
+ compat_str))
+ if dash_mpd and dash_mpd not in dash_mpds:
+ dash_mpds.append(dash_mpd)
+
is_live = None
view_count = None
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+ def extract_player_response(player_response, video_id):
+ pl_response = str_or_none(player_response)
+ if not pl_response:
+ return
+ pl_response = self._parse_json(pl_response, video_id, fatal=False)
+ if isinstance(pl_response, dict):
+ add_dash_mpd_pr(pl_response)
+ return pl_response
+
+ player_response = {}
+
# Get video info
+ video_info = {}
embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
age_gate = True
@@ -1553,16 +1838,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(
- video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- add_dash_mpd(video_info)
+ try:
+ video_info_webpage = self._download_webpage(
+ video_info_url, video_id,
+ note='Refetching age-gated info webpage',
+ errnote='unable to download video info webpage')
+ except ExtractorError:
+ video_info_webpage = None
+ if video_info_webpage:
+ video_info = compat_parse_qs(video_info_webpage)
+ pl_response = video_info.get('player_response', [None])[0]
+ player_response = extract_player_response(pl_response, video_id)
+ add_dash_mpd(video_info)
+ view_count = extract_view_count(video_info)
else:
age_gate = False
- video_info = None
- sts = None
# Try looking directly into the video webpage
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
@@ -1573,94 +1863,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
add_dash_mpd(video_info)
# Rental video is not rented but preview is available (e.g.
# https://www.youtube.com/watch?v=yYr8q0y5Jfg,
- # https://github.com/rg3/youtube-dl/issues/10532)
+ # https://github.com/ytdl-org/youtube-dl/issues/10532)
if not video_info and args.get('ypc_vid'):
return self.url_result(
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
- sts = ytplayer_config.get('sts')
+ if not player_response:
+ player_response = extract_player_response(args.get('player_response'), video_id)
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
- # We also try looking in get_video_info since it may contain different dashmpd
- # URL that points to a DASH manifest with possibly different itag set (some itags
- # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
- # manifest pointed by get_video_info's dashmpd).
- # The general idea is to take a union of itags of both DASH manifests (for example
- # video with such 'manifest behavior' see https://github.com/rg3/youtube-dl/issues/6093)
- self.report_video_info_webpage_download(video_id)
- for el in ('info', 'embedded', 'detailpage', 'vevo', ''):
- query = {
- 'video_id': video_id,
- 'ps': 'default',
- 'eurl': '',
- 'gl': 'US',
- 'hl': 'en',
- }
- if el:
- query['el'] = el
- if sts:
- query['sts'] = sts
- video_info_webpage = self._download_webpage(
- '%s://www.youtube.com/get_video_info' % proto,
- video_id, note=False,
- errnote='unable to download video info webpage',
- fatal=False, query=query)
- if not video_info_webpage:
- continue
- get_video_info = compat_parse_qs(video_info_webpage)
- add_dash_mpd(get_video_info)
- if view_count is None:
- view_count = extract_view_count(get_video_info)
- if not video_info:
- video_info = get_video_info
- if 'token' in get_video_info:
- # Different get_video_info requests may report different results, e.g.
- # some may report video unavailability, but some may serve it without
- # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
- # the original webpage as well as el=info and el=embedded get_video_info
- # requests report video unavailability due to geo restriction while
- # el=detailpage succeeds and returns valid data). This is probably
- # due to YouTube measures against IP ranges of hosting providers.
- # Working around by preferring the first succeeded video_info containing
- # the token if no such video_info yet was found.
- if 'token' not in video_info:
- video_info = get_video_info
- break
+ add_dash_mpd_pr(player_response)
def extract_unavailable_message():
- return self._html_search_regex(
- r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
- video_webpage, 'unavailable message', default=None)
+ messages = []
+ for tag, kind in (('h1', 'message'), ('div', 'submessage')):
+ msg = self._html_search_regex(
+ r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
+ video_webpage, 'unavailable %s' % kind, default=None)
+ if msg:
+ messages.append(msg)
+ if messages:
+ return '\n'.join(messages)
+
+ if not video_info and not player_response:
+ unavailable_message = extract_unavailable_message()
+ if not unavailable_message:
+ unavailable_message = 'Unable to extract video data'
+ raise ExtractorError(
+ 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
- if 'token' not in video_info:
- if 'reason' in video_info:
- if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta(
- 'regionsAllowed', video_webpage, default=None)
- countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- msg=video_info['reason'][0], countries=countries)
- reason = video_info['reason'][0]
- if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
- raise ExtractorError(
- 'YouTube said: %s' % reason,
- expected=True, video_id=video_id)
- else:
- raise ExtractorError(
- '"token" parameter not in video info for unknown reason',
- video_id=video_id)
+ if not isinstance(video_info, dict):
+ video_info = {}
- # title
- if 'title' in video_info:
- video_title = video_info['title'][0]
- else:
+ video_details = try_get(
+ player_response, lambda x: x['videoDetails'], dict) or {}
+
+ microformat = try_get(
+ player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
+
+ video_title = video_info.get('title', [None])[0] or video_details.get('title')
+ if not video_title:
self._downloader.report_warning('Unable to extract video title')
video_title = '_'
- # description
description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
@@ -1685,48 +1930,71 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
''', replace_url, video_description)
video_description = clean_html(video_description)
else:
- fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
- if fd_mobj:
- video_description = unescapeHTML(fd_mobj.group(1))
- else:
- video_description = ''
+ video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage)
- if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
+ if not smuggled_data.get('force_singlefeed', False):
if not self._downloader.params.get('noplaylist'):
- entries = []
- feed_ids = []
- multifeed_metadata_list = video_info['multifeed_metadata_list'][0]
- for feed in multifeed_metadata_list.split(','):
- # Unquote should take place before split on comma (,) since textual
- # fields may contain comma as well (see
- # https://github.com/rg3/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
- entries.append({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- 'url': smuggle_url(
- '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
- {'force_singlefeed': True}),
- 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
- })
- feed_ids.append(feed_data['id'][0])
- self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
- return self.playlist_result(entries, video_id, video_title, video_description)
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ multifeed_metadata_list = try_get(
+ player_response,
+ lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
+ compat_str) or try_get(
+ video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
+ if multifeed_metadata_list:
+ entries = []
+ feed_ids = []
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
+
+ def feed_entry(name):
+ return try_get(feed_data, lambda x: x[name][0], compat_str)
+
+ feed_id = feed_entry('id')
+ if not feed_id:
+ continue
+ feed_title = feed_entry('title')
+ title = video_title
+ if feed_title:
+ title += ' (%s)' % feed_title
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': title,
+ })
+ feed_ids.append(feed_id)
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(entries, video_id, video_title, video_description)
+ else:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
if view_count is None:
view_count = extract_view_count(video_info)
+ if view_count is None and video_details:
+ view_count = int_or_none(video_details.get('viewCount'))
+ if view_count is None and microformat:
+ view_count = int_or_none(microformat.get('viewCount'))
+
+ if is_live is None:
+ is_live = bool_or_none(video_details.get('isLive'))
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
+ raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
def _extract_filesize(media_url):
return int_or_none(self._search_regex(
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
+ streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
+ streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
+
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@@ -1735,10 +2003,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': video_info['conn'][0],
'player_url': player_url,
}]
- elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+ elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
+ raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
+ formats = []
formats_spec = {}
fmt_list = video_info.get('fmt_list', [''])[0]
if fmt_list:
@@ -1752,67 +2021,92 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'width': int_or_none(width_height[0]),
'height': int_or_none(width_height[1]),
}
- q = qualities(['small', 'medium', 'hd720'])
- formats = []
- for url_data_str in encoded_url_map.split(','):
- url_data = compat_parse_qs(url_data_str)
- if 'itag' not in url_data or 'url' not in url_data:
+ for fmt in streaming_formats:
+ itag = str_or_none(fmt.get('itag'))
+ if not itag:
continue
- format_id = url_data['itag'][0]
- url = url_data['url'][0]
-
- if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')
+ quality = fmt.get('quality')
+ quality_label = fmt.get('qualityLabel') or quality
+ formats_spec[itag] = {
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt.get('contentLength')),
+ 'format_note': quality_label,
+ 'fps': int_or_none(fmt.get('fps')),
+ 'height': int_or_none(fmt.get('height')),
+ # bitrate for itag 43 is always 2147483647
+ 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+ 'width': int_or_none(fmt.get('width')),
+ }
+
+ for fmt in streaming_formats:
+ if fmt.get('drmFamilies') or fmt.get('drm_families'):
+ continue
+ url = url_or_none(fmt.get('url'))
- player_url = json.loads(jsplayer_url_json)
- if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, 'age gate player URL')
- player_url = json.loads(player_url_json)
+ if not url:
+ cipher = fmt.get('cipher') or fmt.get('signatureCipher')
+ if not cipher:
+ continue
+ url_data = compat_parse_qs(cipher)
+ url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
+ if not url:
+ continue
+ else:
+ cipher = None
+ url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
+ stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
+ # Unsupported FORMAT_STREAM_TYPE_OTF
+ if stream_type == 3:
+ continue
+
+ format_id = fmt.get('itag') or url_data['itag'][0]
+ if not format_id:
+ continue
+ format_id = compat_str(format_id)
- if self._downloader.params.get('verbose'):
+ if cipher:
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+ ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE,
+ embed_webpage if age_gate else video_webpage,
+ 'JS player URL (1)', default=None)
+ if not jsplayer_url_json and not age_gate:
+ # We need the embed website after all
+ if embed_webpage is None:
+ embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE, embed_webpage, 'JS player URL')
+
+ player_url = json.loads(jsplayer_url_json)
if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
- else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- 'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, 'age gate player URL')
+ player_url = json.loads(player_url_json)
+
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
+ if self._downloader.params.get('verbose'):
+ if player_url is None:
+ player_desc = 'unknown'
else:
- player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
- player_url,
- 'html5 player', fatal=False)
- player_desc = 'html5 player %s' % player_version
-
- parts_sizes = self._signature_cache_id(encrypted_sig)
- self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- url += '&signature=' + signature
+ player_type, player_version = self._extract_player_info(player_url)
+ player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
+ parts_sizes = self._signature_cache_id(encrypted_sig)
+ self.to_screen('{%s} signature length %s, %s' %
+ (format_id, parts_sizes, player_desc))
+
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
+ sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+ url += '&%s=%s' % (sp, signature)
if 'ratebypass' not in url:
url += '&ratebypass=yes'
@@ -1827,29 +2121,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
dct.update(formats_spec[format_id])
# Some itags are not included in DASH manifest thus corresponding formats will
- # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
+ # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+ if width is None:
+ width = int_or_none(fmt.get('width'))
+ if height is None:
+ height = int_or_none(fmt.get('height'))
+
filesize = int_or_none(url_data.get(
'clen', [None])[0]) or _extract_filesize(url)
- quality = url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0]
+ quality = url_data.get('quality', [None])[0] or fmt.get('quality')
+ quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
+
+ tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
+ or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
+ fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
more_fields = {
'filesize': filesize,
- 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
+ 'tbr': tbr,
'width': width,
'height': height,
- 'fps': int_or_none(url_data.get('fps', [None])[0]),
- 'format_note': quality,
- 'quality': q(quality),
+ 'fps': fps,
+ 'format_note': quality_label or quality,
}
for key, value in more_fields.items():
if value:
dct[key] = value
- type_ = url_data.get('type', [None])[0]
+ type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
if type_:
type_split = type_.split(';')
kind_ext = type_split[0].split('/')
@@ -1871,34 +2174,48 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'http_chunk_size': 10485760,
}
formats.append(dct)
- elif video_info.get('hlsvp'):
- manifest_url = video_info['hlsvp'][0]
- formats = []
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', fatal=False)
- for a_format in m3u8_formats:
- itag = self._search_regex(
- r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
- if itag:
- a_format['format_id'] = itag
- if itag in self._formats:
- dct = self._formats[itag].copy()
- dct.update(a_format)
- a_format = dct
- a_format['player_url'] = player_url
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
- formats.append(a_format)
else:
- error_message = clean_html(video_info.get('reason', [None])[0])
- if not error_message:
+ manifest_url = (
+ url_or_none(try_get(
+ player_response,
+ lambda x: x['streamingData']['hlsManifestUrl'],
+ compat_str))
+ or url_or_none(try_get(
+ video_info, lambda x: x['hlsvp'][0], compat_str)))
+ if manifest_url:
+ formats = []
+ m3u8_formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', fatal=False)
+ for a_format in m3u8_formats:
+ itag = self._search_regex(
+ r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
+ if itag:
+ a_format['format_id'] = itag
+ if itag in self._formats:
+ dct = self._formats[itag].copy()
+ dct.update(a_format)
+ a_format = dct
+ a_format['player_url'] = player_url
+ # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
+ a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
+ formats.append(a_format)
+ else:
error_message = extract_unavailable_message()
- if error_message:
- raise ExtractorError(error_message, expected=True)
- raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
+ if not error_message:
+ error_message = clean_html(try_get(
+ player_response, lambda x: x['playabilityStatus']['reason'],
+ compat_str))
+ if not error_message:
+ error_message = clean_html(
+ try_get(video_info, lambda x: x['reason'][0], compat_str))
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+ raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
# uploader
- video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
+ video_uploader = try_get(
+ video_info, lambda x: x['author'][0],
+ compat_str) or str_or_none(video_details.get('author'))
if video_uploader:
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
else:
@@ -1914,23 +2231,49 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_uploader_id = mobj.group('uploader_id')
video_uploader_url = mobj.group('uploader_url')
else:
- self._downloader.report_warning('unable to extract uploader nickname')
-
- channel_id = self._html_search_meta(
- 'channelId', video_webpage, 'channel id')
+ owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
+ if owner_profile_url:
+ video_uploader_id = self._search_regex(
+ r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
+ default=None)
+ video_uploader_url = owner_profile_url
+
+ channel_id = (
+ str_or_none(video_details.get('channelId'))
+ or self._html_search_meta(
+ 'channelId', video_webpage, 'channel id', default=None)
+ or self._search_regex(
+ r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ video_webpage, 'channel id', default=None, group='id'))
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
- # thumbnail image
- # We try first to get a high quality image:
- m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
- video_webpage, re.DOTALL)
- if m_thumb is not None:
- video_thumbnail = m_thumb.group(1)
- elif 'thumbnail_url' not in video_info:
- self._downloader.report_warning('unable to extract video thumbnail')
+ thumbnails = []
+ thumbnails_list = try_get(
+ video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
+ for t in thumbnails_list:
+ if not isinstance(t, dict):
+ continue
+ thumbnail_url = url_or_none(t.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(t.get('width')),
+ 'height': int_or_none(t.get('height')),
+ })
+
+ if not thumbnails:
video_thumbnail = None
- else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
+ # We try first to get a high quality image:
+ m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+ video_webpage, re.DOTALL)
+ if m_thumb is not None:
+ video_thumbnail = m_thumb.group(1)
+ thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
+ if thumbnail_url:
+ video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
+ if video_thumbnail:
+ thumbnails.append({'url': video_thumbnail})
# upload date
upload_date = self._html_search_meta(
@@ -1940,6 +2283,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
[r'(?s)id="eow-date.*?>(.*?)</span>',
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
video_webpage, 'upload date', default=None)
+ if not upload_date:
+ upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
upload_date = unified_strdate(upload_date)
video_license = self._html_search_regex(
@@ -1976,12 +2321,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
track = extract_meta('Song')
artist = extract_meta('Artist')
+ album = extract_meta('Album')
+
+ # Youtube Music Auto-generated description
+ release_date = release_year = None
+ if video_description:
+ mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
+ if mobj:
+ if not track:
+ track = mobj.group('track').strip()
+ if not artist:
+ artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
+ if not album:
+ album = mobj.group('album'.strip())
+ release_year = mobj.group('release_year')
+ release_date = mobj.group('release_date')
+ if release_date:
+ release_date = release_date.replace('-', '')
+ if not release_year:
+ release_year = int(release_date[:4])
+ if release_year:
+ release_year = int(release_year)
m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage)
if m_episode:
- series = m_episode.group('series')
+ series = unescapeHTML(m_episode.group('series'))
season_number = int(m_episode.group('season'))
episode_number = int(m_episode.group('episode'))
else:
@@ -1990,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None)
+ category = None
if m_cat_container:
category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
+ if not category:
+ category = try_get(
+ microformat, lambda x: x['category'], compat_str)
+ video_categories = None if category is None else [category]
video_tags = [
unescapeHTML(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+ if not video_tags:
+ video_tags = try_get(video_details, lambda x: x['keywords'], list)
def _extract_count(count_name):
return str_to_int(self._search_regex(
@@ -2011,6 +2381,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
like_count = _extract_count('like')
dislike_count = _extract_count('dislike')
+ if view_count is None:
+ view_count = str_to_int(self._search_regex(
+ r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
+ 'view count', default=None))
+
+ average_rating = (
+ float_or_none(video_details.get('averageRating'))
+ or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
+
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
@@ -2018,15 +2397,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0]))
if not video_duration:
+ video_duration = int_or_none(video_details.get('lengthSeconds'))
+ if not video_duration:
video_duration = parse_duration(self._html_search_meta(
'duration', video_webpage, 'video duration'))
# annotations
video_annotations = None
if self._downloader.params.get('writeannotations', False):
- video_annotations = self._extract_annotations(video_id)
-
- chapters = self._extract_chapters(description_original, video_duration)
+ xsrf_token = self._search_regex(
+ r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
+ video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+ invideo_url = try_get(
+ player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
+ if xsrf_token and invideo_url:
+ xsrf_field_name = self._search_regex(
+ r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
+ video_webpage, 'xsrf field name',
+ group='xsrf_field_name', default='session_token')
+ video_annotations = self._download_webpage(
+ self._proto_relative_url(invideo_url),
+ video_id, note='Downloading annotations',
+ errnote='Unable to download video annotations', fatal=False,
+ data=urlencode_postdata({xsrf_field_name: xsrf_token}))
+
+ chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
@@ -2062,7 +2457,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Remove the formats we found through non-DASH, they
# contain less info and it can be wrong, because we use
# fixed values (for example the resolution). See
- # https://github.com/rg3/youtube-dl/issues/5774 for an
+ # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
# example.
formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
formats.extend(dash_formats.values())
@@ -2082,9 +2477,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio
+ if not formats:
+ if 'reason' in video_info:
+ if 'The uploader has not made this video available in your country.' in video_info['reason']:
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
+ reason = video_info['reason'][0]
+ if 'Invalid parameters' in reason:
+ unavailable_message = extract_unavailable_message()
+ if unavailable_message:
+ reason = unavailable_message
+ raise ExtractorError(
+ 'YouTube said: %s' % reason,
+ expected=True, video_id=video_id)
+ if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
self._sort_formats(formats)
- self.mark_watched(video_id, video_info)
+ self.mark_watched(video_id, video_info, player_response)
return {
'id': video_id,
@@ -2098,7 +2512,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'creator': video_creator or artist,
'title': video_title,
'alt_title': video_alt_title or track,
- 'thumbnail': video_thumbnail,
+ 'thumbnails': thumbnails,
'description': video_description,
'categories': video_categories,
'tags': video_tags,
@@ -2112,7 +2526,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
- 'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
+ 'average_rating': average_rating,
'formats': formats,
'is_live': is_live,
'start_time': start_time,
@@ -2122,6 +2536,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'episode_number': episode_number,
'track': track,
'artist': artist,
+ 'album': album,
+ 'release_date': release_date,
+ 'release_year': release_year,
}
@@ -2131,7 +2548,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(?:https?://)?
(?:\w+\.)?
(?:
- youtube\.com/
+ (?:
+ youtube(?:kids)?\.com|
+ invidio\.us
+ )
+ /
(?:
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
\? (?:.*?[&;])*? (?:p|a|list)=
@@ -2140,7 +2561,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
)
(
- (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
+ (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
# Top tracks, they can also include dots
|(?:MC)[\w\.]*
)
@@ -2149,37 +2570,45 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(%(playlist_id)s)
)""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
+ _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
+ _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
IE_NAME = 'youtube:playlist'
_TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
'info_dict': {
- 'title': 'ytdl test PL',
- 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'title': 'youtube-dl public playlist',
},
- 'playlist_count': 3,
+ 'playlist_count': 1,
}, {
- 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
'info_dict': {
- 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'title': 'YDL_Empty_List',
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'title': 'youtube-dl empty playlist',
},
'playlist_count': 0,
- 'skip': 'This playlist is private',
}, {
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
'info_dict': {
'title': '29C3: Not my department',
'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'uploader': 'Christiaan008',
+ 'uploader_id': 'ChRiStIaAn008',
},
- 'playlist_count': 95,
+ 'playlist_count': 96,
}, {
'note': 'issue #673',
'url': 'PLBB231211A4F62143',
'info_dict': {
'title': '[OLD]Team Fortress 2 (Class-based LP)',
'id': 'PLBB231211A4F62143',
+ 'uploader': 'Wickydoo',
+ 'uploader_id': 'Wickydoo',
},
'playlist_mincount': 26,
}, {
@@ -2188,6 +2617,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'info_dict': {
'title': 'Uploads from Cauchemar',
'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'uploader': 'Cauchemar',
+ 'uploader_id': 'Cauchemar89',
},
'playlist_mincount': 799,
}, {
@@ -2205,13 +2636,17 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'info_dict': {
'title': 'JODA15',
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'uploader': 'milan',
+ 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
}
}, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 485,
'info_dict': {
- 'title': '2017 華語最新單曲 (2/24更新)',
+ 'title': '2018 Chinese New Singles (11/6 updated)',
'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'uploader': 'LBK',
+ 'uploader_id': 'sdragonfang',
}
}, {
'note': 'Embedded SWF player',
@@ -2220,13 +2655,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'info_dict': {
'title': 'JODA7',
'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
- }
+ },
+ 'skip': 'This playlist does not exist',
}, {
'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
'info_dict': {
'title': 'Uploads from Interstellar Movie',
'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+ 'uploader': 'Interstellar Movie',
+ 'uploader_id': 'InterstellarMovie1',
},
'playlist_mincount': 21,
}, {
@@ -2244,12 +2682,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
'categories': ['People & Blogs'],
'tags': list,
+ 'view_count': int,
'like_count': int,
'dislike_count': int,
},
'params': {
'skip_download': True,
},
+ 'skip': 'This video is not available.',
'add_ie': [YoutubeIE.ie_key()],
}, {
'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
@@ -2261,7 +2701,6 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'uploader_id': 'backuspagemuseum',
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
'upload_date': '20161008',
- 'license': 'Standard YouTube License',
'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
'categories': ['Nonprofits & Activism'],
'tags': list,
@@ -2273,6 +2712,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'skip_download': True,
},
}, {
+ # https://github.com/ytdl-org/youtube-dl/issues/21844
+ 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'info_dict': {
+ 'title': 'Data Analysis with Dr Mike Pound',
+ 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'uploader_id': 'Computerphile',
+ 'uploader': 'Computerphile',
+ },
+ 'playlist_mincount': 11,
+ }, {
'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
'only_matching': True,
}, {
@@ -2282,18 +2731,52 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
# music album playlist
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
'only_matching': True,
+ }, {
+ 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
+ 'only_matching': True,
}]
def _real_initialize(self):
self._login()
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+
+ for item in re.findall(
+ r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
+ attrs = extract_attributes(item)
+ video_id = attrs['data-video-id']
+ video_title = unescapeHTML(attrs.get('data-title'))
+ if video_title:
+ video_title = video_title.strip()
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+
+ # Fallback with old _VIDEO_RE
+ self.extract_videos_from_page_impl(
+ self._VIDEO_RE, page, ids_in_page, titles_in_page)
+
+ # Relaxed fallbacks
+ self.extract_videos_from_page_impl(
+ r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
+ ids_in_page, titles_in_page)
+ self.extract_videos_from_page_impl(
+ r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
+ ids_in_page, titles_in_page)
+
+ return zip(ids_in_page, titles_in_page)
+
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
ids = []
last_id = playlist_id[-11:]
for n in itertools.count(1):
- url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
webpage = self._download_webpage(
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
new_ids = orderedSet(re.findall(
@@ -2312,9 +2795,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = (
- search_title('playlist-title') or
- search_title('title long-title') or
- search_title('title'))
+ search_title('playlist-title')
+ or search_title('title long-title')
+ or search_title('title'))
title = clean_html(title_span)
return self.playlist_result(url_results, playlist_id, title)
@@ -2323,7 +2806,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
url = self._TEMPLATE_URL % playlist_id
page = self._download_webpage(url, playlist_id)
- # the yt-alert-message now has tabindex attribute (see https://github.com/rg3/youtube-dl/issues/11604)
+ # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
match = match.strip()
# Check if the playlist exists or is private
@@ -2349,7 +2832,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
page, 'title', default=None)
_UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
- uploader = self._search_regex(
+ uploader = self._html_search_regex(
r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
page, 'uploader', default=None)
mobj = re.search(
@@ -2416,7 +2899,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return playlist
# Some playlist URLs don't actually serve a playlist (see
- # https://github.com/rg3/youtube-dl/issues/10537).
+ # https://github.com/ytdl-org/youtube-dl/issues/10537).
# Fallback to plain video extraction if there is a video id
# along with playlist id.
return self.url_result(video_id, 'Youtube', video_id=video_id)
@@ -2424,7 +2907,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
IE_DESC = 'YouTube.com channels'
- _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
+ _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
IE_NAME = 'youtube:channel'
@@ -2435,6 +2918,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
'info_dict': {
'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
'title': 'Uploads from lex will',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
}
}, {
'note': 'Age restricted channel',
@@ -2444,10 +2929,15 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
'info_dict': {
'id': 'UUs0ifCMCm1icqRbqhUINa0w',
'title': 'Uploads from Deus Ex',
+ 'uploader': 'Deus Ex',
+ 'uploader_id': 'DeusExOfficial',
},
}, {
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
+ 'only_matching': True,
}]
@classmethod
@@ -2528,6 +3018,8 @@ class YoutubeUserIE(YoutubeChannelIE):
'info_dict': {
'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
'title': 'Uploads from The Linux Foundation',
+ 'uploader': 'The Linux Foundation',
+ 'uploader_id': 'TheLinuxFoundation',
}
}, {
# Only available via https://www.youtube.com/c/12minuteathlete/videos
@@ -2537,6 +3029,8 @@ class YoutubeUserIE(YoutubeChannelIE):
'info_dict': {
'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
'title': 'Uploads from 12 Minute Athlete',
+ 'uploader': '12 Minute Athlete',
+ 'uploader_id': 'the12minuteathlete',
}
}, {
'url': 'ytuser:phihag',
@@ -2622,7 +3116,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user/channel playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
+ _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
IE_NAME = 'youtube:playlists'
_TESTS = [{
@@ -2630,7 +3124,7 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'playlist_mincount': 4,
'info_dict': {
'id': 'ThirstForScience',
- 'title': 'Thirst for Science',
+ 'title': 'ThirstForScience',
},
}, {
# with "Load more" button
@@ -2647,6 +3141,10 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
'title': 'Chem Player',
},
+ 'skip': 'Blocked',
+ }, {
+ 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+ 'only_matching': True,
}]
@@ -2791,9 +3289,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
break
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
+ 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
+ transform_source=uppercase_escape,
+ headers=self._YOUTUBE_CLIENT_HEADERS)
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py
index bacb82eee..f6496f516 100644
--- a/youtube_dl/extractor/zapiks.py
+++ b/youtube_dl/extractor/zapiks.py
@@ -29,7 +29,6 @@ class ZapiksIE(InfoExtractor):
'timestamp': 1359044972,
'upload_date': '20130124',
'view_count': int,
- 'comment_count': int,
},
},
{
diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dl/extractor/zattoo.py
index bbe0aecb6..6bac3026e 100644
--- a/youtube_dl/extractor/zattoo.py
+++ b/youtube_dl/extractor/zattoo.py
@@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
_power_guide_hash = None
def _host_url(self):
- return 'https://%s' % self._HOST
+ return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
def _login(self):
username, password = self._get_login_info()
@@ -86,8 +86,8 @@ class ZattooPlatformBaseIE(InfoExtractor):
return next(
chan['cid'] for chan in channel_list
if chan.get('cid') and (
- chan.get('display_alias') == channel_name or
- chan.get('cid') == channel_name))
+ chan.get('display_alias') == channel_name
+ or chan.get('cid') == channel_name))
except StopIteration:
raise ExtractorError('Could not extract channel id')
@@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE):
class NetPlusIE(ZattooIE):
_NETRC_MACHINE = 'netplus'
_HOST = 'netplus.tv'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
@@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
- 'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc',
+ 'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
'only_matching': True,
}]
@@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
- 'url': 'https://www.player.waly.tv/watch/abc/123-abc',
+ 'url': 'https://player.waly.tv/watch/abc/123-abc',
'only_matching': True,
}]
@@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE):
class BBVTVIE(ZattooIE):
_NETRC_MACHINE = 'bbvtv'
_HOST = 'bbv-tv.net'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
@@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE):
class VTXTVIE(ZattooIE):
_NETRC_MACHINE = 'vtxtv'
_HOST = 'vtxtv.ch'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
@@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE):
class MyVisionTVIE(ZattooIE):
_NETRC_MACHINE = 'myvisiontv'
_HOST = 'myvisiontv.ch'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
@@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
- 'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc',
+ 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
'only_matching': True,
}]
@@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE):
class SAKTVIE(ZattooIE):
_NETRC_MACHINE = 'saktv'
_HOST = 'saktv.ch'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
@@ -377,7 +382,7 @@ class EWETVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
- 'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc',
+ 'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
'only_matching': True,
}]
@@ -385,6 +390,7 @@ class EWETVIE(ZattooIE):
class QuantumTVIE(ZattooIE):
_NETRC_MACHINE = 'quantumtv'
_HOST = 'quantum-tv.com'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
@@ -395,11 +401,11 @@ class QuantumTVIE(ZattooIE):
class OsnatelTVIE(ZattooIE):
_NETRC_MACHINE = 'osnateltv'
- _HOST = 'onlinetv.osnatel.de'
+ _HOST = 'tvonline.osnatel.de'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
- 'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc',
+ 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
'only_matching': True,
}]
@@ -407,9 +413,21 @@ class OsnatelTVIE(ZattooIE):
class EinsUndEinsTVIE(ZattooIE):
_NETRC_MACHINE = '1und1tv'
_HOST = '1und1.tv'
+ _API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{
'url': 'https://www.1und1.tv/watch/abc/123-abc',
'only_matching': True,
}]
+
+
+class SaltTVIE(ZattooIE):
+ _NETRC_MACHINE = 'salttv'
+ _HOST = 'tv.salt.ch'
+ _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
+
+ _TESTS = [{
+ 'url': 'https://tv.salt.ch/watch/abc/123-abc',
+ 'only_matching': True,
+ }]
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index afa3f6c47..656864b2e 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -41,6 +41,7 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
+ _GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
@@ -243,14 +244,14 @@ class ZDFChannelIE(ZDFBaseIE):
'id': 'das-aktuelle-sportstudio',
'title': 'das aktuelle sportstudio | ZDF',
},
- 'playlist_count': 21,
+ 'playlist_mincount': 23,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': {
'id': 'planet-e',
'title': 'planet e.',
},
- 'playlist_count': 4,
+ 'playlist_mincount': 50,
}, {
'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True,
diff --git a/youtube_dl/extractor/zype.py b/youtube_dl/extractor/zype.py
new file mode 100644
index 000000000..2e2e97a0c
--- /dev/null
+++ b/youtube_dl/extractor/zype.py
@@ -0,0 +1,134 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class ZypeIE(InfoExtractor):
+ _ID_RE = r'[\da-fA-F]+'
+ _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
+ _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
+ _TEST = {
+ 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
+ 'md5': 'eaee31d474c76a955bdaba02a505c595',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'timestamp': 1504915200,
+ 'upload_date': '20170909',
+ },
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(re.sub(
+ r'\.(?:js|html)\?', '.json?', url), video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), video_id)['message'], expected=True)
+ raise
+
+ body = response['body']
+ video = response['video']
+ title = video['title']
+
+ if isinstance(body, dict):
+ formats = []
+ for output in body.get('outputs', []):
+ output_url = output.get('url')
+ if not output_url:
+ continue
+ name = output.get('name')
+ if name == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ output_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ else:
+ f = {
+ 'format_id': name,
+ 'tbr': int_or_none(output.get('bitrate')),
+ 'url': output_url,
+ }
+ if name in ('m4a', 'mp3'):
+ f['vcodec'] = 'none'
+ else:
+ f.update({
+ 'height': int_or_none(output.get('height')),
+ 'width': int_or_none(output.get('width')),
+ })
+ formats.append(f)
+ text_tracks = body.get('subtitles') or []
+ else:
+ m3u8_url = self._search_regex(
+ r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
+ body, 'm3u8 url', group='url')
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ text_tracks = self._search_regex(
+ r'textTracks\s*:\s*(\[[^]]+\])',
+ body, 'text tracks', default=None)
+ if text_tracks:
+ text_tracks = self._parse_json(
+ text_tracks, video_id, js_to_json, False)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if text_tracks:
+ for text_track in text_tracks:
+ tt_url = dict_get(text_track, ('file', 'src'))
+ if not tt_url:
+ continue
+ subtitles.setdefault(text_track.get('label') or 'English', []).append({
+ 'url': tt_url,
+ })
+
+ thumbnails = []
+ for thumbnail in video.get('thumbnails', []):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': video.get('friendly_title'),
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
+ 'timestamp': parse_iso8601(video.get('published_at')),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('request_count')),
+ 'average_rating': int_or_none(video.get('rating')),
+ 'season_number': int_or_none(video.get('season')),
+ 'episode_number': int_or_none(video.get('episode')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index e7d8e8910..6d5ac62b3 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -45,7 +45,7 @@ def parseOpts(overrideArguments=None):
except IOError:
return default # silently skip if file is not present
try:
- # FIXME: https://github.com/rg3/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
+ # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
contents = optionf.read()
if sys.version_info < (3,):
contents = contents.decode(preferredencoding())
@@ -134,7 +134,7 @@ def parseOpts(overrideArguments=None):
action='help',
help='Print this help text and exit')
general.add_option(
- '-v', '--version',
+ '--version',
action='version',
help='Print program version and exit')
general.add_option(
@@ -853,7 +853,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option(
'--exec',
metavar='CMD', dest='exec_cmd',
- help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+ help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
postproc.add_option(
'--convert-subs', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None,
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 757b496a1..5f7298345 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -9,9 +9,6 @@ import re
from .common import AudioConversionError, PostProcessor
-from ..compat import (
- compat_subprocess_get_DEVNULL,
-)
from ..utils import (
encodeArgument,
encodeFilename,
@@ -79,6 +76,20 @@ class FFmpegPostProcessor(PostProcessor):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
prefer_ffmpeg = True
+ def get_ffmpeg_version(path):
+ ver = get_exe_version(path, args=['-version'])
+ if ver:
+ regexs = [
+ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
+ r'n([0-9.]+)$', # Arch Linux
+ # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+ ]
+ for regex in regexs:
+ mobj = re.match(regex, ver)
+ if mobj:
+ ver = mobj.group(1)
+ return ver
+
self.basename = None
self.probe_basename = None
@@ -110,11 +121,10 @@ class FFmpegPostProcessor(PostProcessor):
self._paths = dict(
(p, os.path.join(location, p)) for p in programs)
self._versions = dict(
- (p, get_exe_version(self._paths[p], args=['-version']))
- for p in programs)
+ (p, get_ffmpeg_version(self._paths[p])) for p in programs)
if self._versions is None:
self._versions = dict(
- (p, get_exe_version(p, args=['-version'])) for p in programs)
+ (p, get_ffmpeg_version(p)) for p in programs)
self._paths = dict((p, p) for p in programs)
if prefer_ffmpeg is False:
@@ -152,27 +162,45 @@ class FFmpegPostProcessor(PostProcessor):
return self._paths[self.probe_basename]
def get_audio_codec(self, path):
- if not self.probe_available:
- raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
+ if not self.probe_available and not self.available:
+ raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.')
try:
- cmd = [
- encodeFilename(self.probe_executable, True),
- encodeArgument('-show_streams'),
- encodeFilename(self._ffmpeg_filename_argument(path), True)]
+ if self.probe_available:
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-show_streams')]
+ else:
+ cmd = [
+ encodeFilename(self.executable, True),
+ encodeArgument('-i')]
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
- handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
- output = handle.communicate()[0]
- if handle.wait() != 0:
+ self._downloader.to_screen(
+ '[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ handle = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout_data, stderr_data = handle.communicate()
+ expected_ret = 0 if self.probe_available else 1
+ if handle.wait() != expected_ret:
return None
except (IOError, OSError):
return None
- audio_codec = None
- for line in output.decode('ascii', 'ignore').split('\n'):
- if line.startswith('codec_name='):
- audio_codec = line.split('=')[1].strip()
- elif line.strip() == 'codec_type=audio' and audio_codec is not None:
- return audio_codec
+ output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
+ if self.probe_available:
+ audio_codec = None
+ for line in output.split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ else:
+ # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
+ mobj = re.search(
+ r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
+ output)
+ if mobj:
+ return mobj.group(1)
return None
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
@@ -189,10 +217,13 @@ class FFmpegPostProcessor(PostProcessor):
encodeArgument('-i'),
encodeFilename(self._ffmpeg_filename_argument(path), True)
])
- cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
- files_cmd +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
+ cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
+ # avconv does not have repeat option
+ if self.basename == 'ffmpeg':
+ cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
+ cmd += (files_cmd
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
if self._downloader.params.get('verbose', False):
self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
@@ -295,8 +326,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
information['ext'] = extension
# If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
- if (new_path == path or
- (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+ if (new_path == path
+ or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
return [], information
@@ -362,7 +393,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
sub_ext = sub_info['ext']
if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
- sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
+ sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
@@ -379,14 +410,16 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
# Don't copy the existing subtitles, we may be running the
# postprocessor a second time
'-map', '-0:s',
+ # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
+ # https://trac.ffmpeg.org/ticket/6016)
+ '-map', '-0:d',
]
if information['ext'] == 'mp4':
opts += ['-c:s', 'mov_text']
for (i, lang) in enumerate(sub_langs):
opts.extend(['-map', '%d:0' % (i + 1)])
- lang_code = ISO639Utils.short2long(lang)
- if lang_code is not None:
- opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+ lang_code = ISO639Utils.short2long(lang) or lang
+ opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
temp_filename = prepend_extension(filename, 'temp')
self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
@@ -414,6 +447,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
metadata[meta_f] = info[info_f]
break
+ # See [1-4] for some info on media metadata/metadata supported
+ # by ffmpeg.
+ # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
+ # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
+ # 3. https://kodi.wiki/view/Video_file_tagging
+ # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html
+
add('title', ('track', 'title'))
add('date', 'upload_date')
add(('description', 'comment'), 'description')
@@ -424,6 +464,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
add('album')
add('album_artist')
add('disc', 'disc_number')
+ add('show', 'series')
+ add('season_number')
+ add('episode_id', ('episode', 'episode_id'))
+ add('episode_sort', 'episode_number')
if not metadata:
self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
@@ -573,9 +617,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
self._downloader.to_screen(
'[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
continue
- old_file = subtitles_filename(filename, lang, ext)
+ old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
sub_filenames.append(old_file)
- new_file = subtitles_filename(filename, lang, new_ext)
+ new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
if ext in ('dfxp', 'ttml', 'tt'):
self._downloader.report_warning(
@@ -583,7 +627,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
'which results in style information loss')
dfxp_file = old_file
- srt_file = subtitles_filename(filename, lang, 'srt')
+ srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read())
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py
index b0aed9ca7..814dabecf 100644
--- a/youtube_dl/postprocessor/xattrpp.py
+++ b/youtube_dl/postprocessor/xattrpp.py
@@ -64,8 +64,8 @@ class XAttrMetadataPP(PostProcessor):
except XAttrMetadataError as e:
if e.reason == 'NO_SPACE':
self._downloader.report_warning(
- 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' +
- (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
+ 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
+ + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize())
elif e.reason == 'VALUE_TOO_LONG':
self._downloader.report_warning(
'Unable to write extended attributes due to too long values.')
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index ebce9666a..84c964617 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -9,6 +9,7 @@ import subprocess
import sys
from zipimport import zipimporter
+from .compat import compat_realpath
from .utils import encode_compat_str
from .version import __version__
@@ -31,7 +32,7 @@ def rsa_verify(message, signature, key):
def update_self(to_screen, verbose, opener):
"""Update the program file with the latest version from the repository"""
- UPDATE_URL = 'https://rg3.github.io/youtube-dl/update/'
+ UPDATE_URL = 'https://yt-dl.org/update/'
VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
@@ -84,7 +85,9 @@ def update_self(to_screen, verbose, opener):
print_notes(to_screen, versions_info['versions'])
# sys.executable is set to the full pathname of the exe-file for py2exe
- filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
+ # though symlinks are not followed so that we need to do this manually
+ # with help of realpath
+ filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
if not os.access(filename, os.W_OK):
to_screen('ERROR: no write permissions on %s' % filename)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e84d35d4d..d1eca3760 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -7,6 +7,7 @@ import base64
import binascii
import calendar
import codecs
+import collections
import contextlib
import ctypes
import datetime
@@ -30,6 +31,7 @@ import ssl
import subprocess
import sys
import tempfile
+import time
import traceback
import xml.etree.ElementTree
import zlib
@@ -39,12 +41,14 @@ from .compat import (
compat_HTMLParser,
compat_basestring,
compat_chr,
+ compat_cookiejar,
compat_ctypes_WINFUNCTYPE,
compat_etree_fromstring,
compat_expanduser,
compat_html_entities,
compat_html_entities_html5,
compat_http_client,
+ compat_integer_types,
compat_kwargs,
compat_os_name,
compat_parse_qs,
@@ -80,8 +84,1592 @@ def register_socks_protocols():
# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))
+
+def random_user_agent():
+ _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+ _CHROME_VERSIONS = (
+ '74.0.3729.129',
+ '76.0.3780.3',
+ '76.0.3780.2',
+ '74.0.3729.128',
+ '76.0.3780.1',
+ '76.0.3780.0',
+ '75.0.3770.15',
+ '74.0.3729.127',
+ '74.0.3729.126',
+ '76.0.3779.1',
+ '76.0.3779.0',
+ '75.0.3770.14',
+ '74.0.3729.125',
+ '76.0.3778.1',
+ '76.0.3778.0',
+ '75.0.3770.13',
+ '74.0.3729.124',
+ '74.0.3729.123',
+ '73.0.3683.121',
+ '76.0.3777.1',
+ '76.0.3777.0',
+ '75.0.3770.12',
+ '74.0.3729.122',
+ '76.0.3776.4',
+ '75.0.3770.11',
+ '74.0.3729.121',
+ '76.0.3776.3',
+ '76.0.3776.2',
+ '73.0.3683.120',
+ '74.0.3729.120',
+ '74.0.3729.119',
+ '74.0.3729.118',
+ '76.0.3776.1',
+ '76.0.3776.0',
+ '76.0.3775.5',
+ '75.0.3770.10',
+ '74.0.3729.117',
+ '76.0.3775.4',
+ '76.0.3775.3',
+ '74.0.3729.116',
+ '75.0.3770.9',
+ '76.0.3775.2',
+ '76.0.3775.1',
+ '76.0.3775.0',
+ '75.0.3770.8',
+ '74.0.3729.115',
+ '74.0.3729.114',
+ '76.0.3774.1',
+ '76.0.3774.0',
+ '75.0.3770.7',
+ '74.0.3729.113',
+ '74.0.3729.112',
+ '74.0.3729.111',
+ '76.0.3773.1',
+ '76.0.3773.0',
+ '75.0.3770.6',
+ '74.0.3729.110',
+ '74.0.3729.109',
+ '76.0.3772.1',
+ '76.0.3772.0',
+ '75.0.3770.5',
+ '74.0.3729.108',
+ '74.0.3729.107',
+ '76.0.3771.1',
+ '76.0.3771.0',
+ '75.0.3770.4',
+ '74.0.3729.106',
+ '74.0.3729.105',
+ '75.0.3770.3',
+ '74.0.3729.104',
+ '74.0.3729.103',
+ '74.0.3729.102',
+ '75.0.3770.2',
+ '74.0.3729.101',
+ '75.0.3770.1',
+ '75.0.3770.0',
+ '74.0.3729.100',
+ '75.0.3769.5',
+ '75.0.3769.4',
+ '74.0.3729.99',
+ '75.0.3769.3',
+ '75.0.3769.2',
+ '75.0.3768.6',
+ '74.0.3729.98',
+ '75.0.3769.1',
+ '75.0.3769.0',
+ '74.0.3729.97',
+ '73.0.3683.119',
+ '73.0.3683.118',
+ '74.0.3729.96',
+ '75.0.3768.5',
+ '75.0.3768.4',
+ '75.0.3768.3',
+ '75.0.3768.2',
+ '74.0.3729.95',
+ '74.0.3729.94',
+ '75.0.3768.1',
+ '75.0.3768.0',
+ '74.0.3729.93',
+ '74.0.3729.92',
+ '73.0.3683.117',
+ '74.0.3729.91',
+ '75.0.3766.3',
+ '74.0.3729.90',
+ '75.0.3767.2',
+ '75.0.3767.1',
+ '75.0.3767.0',
+ '74.0.3729.89',
+ '73.0.3683.116',
+ '75.0.3766.2',
+ '74.0.3729.88',
+ '75.0.3766.1',
+ '75.0.3766.0',
+ '74.0.3729.87',
+ '73.0.3683.115',
+ '74.0.3729.86',
+ '75.0.3765.1',
+ '75.0.3765.0',
+ '74.0.3729.85',
+ '73.0.3683.114',
+ '74.0.3729.84',
+ '75.0.3764.1',
+ '75.0.3764.0',
+ '74.0.3729.83',
+ '73.0.3683.113',
+ '75.0.3763.2',
+ '75.0.3761.4',
+ '74.0.3729.82',
+ '75.0.3763.1',
+ '75.0.3763.0',
+ '74.0.3729.81',
+ '73.0.3683.112',
+ '75.0.3762.1',
+ '75.0.3762.0',
+ '74.0.3729.80',
+ '75.0.3761.3',
+ '74.0.3729.79',
+ '73.0.3683.111',
+ '75.0.3761.2',
+ '74.0.3729.78',
+ '74.0.3729.77',
+ '75.0.3761.1',
+ '75.0.3761.0',
+ '73.0.3683.110',
+ '74.0.3729.76',
+ '74.0.3729.75',
+ '75.0.3760.0',
+ '74.0.3729.74',
+ '75.0.3759.8',
+ '75.0.3759.7',
+ '75.0.3759.6',
+ '74.0.3729.73',
+ '75.0.3759.5',
+ '74.0.3729.72',
+ '73.0.3683.109',
+ '75.0.3759.4',
+ '75.0.3759.3',
+ '74.0.3729.71',
+ '75.0.3759.2',
+ '74.0.3729.70',
+ '73.0.3683.108',
+ '74.0.3729.69',
+ '75.0.3759.1',
+ '75.0.3759.0',
+ '74.0.3729.68',
+ '73.0.3683.107',
+ '74.0.3729.67',
+ '75.0.3758.1',
+ '75.0.3758.0',
+ '74.0.3729.66',
+ '73.0.3683.106',
+ '74.0.3729.65',
+ '75.0.3757.1',
+ '75.0.3757.0',
+ '74.0.3729.64',
+ '73.0.3683.105',
+ '74.0.3729.63',
+ '75.0.3756.1',
+ '75.0.3756.0',
+ '74.0.3729.62',
+ '73.0.3683.104',
+ '75.0.3755.3',
+ '75.0.3755.2',
+ '73.0.3683.103',
+ '75.0.3755.1',
+ '75.0.3755.0',
+ '74.0.3729.61',
+ '73.0.3683.102',
+ '74.0.3729.60',
+ '75.0.3754.2',
+ '74.0.3729.59',
+ '75.0.3753.4',
+ '74.0.3729.58',
+ '75.0.3754.1',
+ '75.0.3754.0',
+ '74.0.3729.57',
+ '73.0.3683.101',
+ '75.0.3753.3',
+ '75.0.3752.2',
+ '75.0.3753.2',
+ '74.0.3729.56',
+ '75.0.3753.1',
+ '75.0.3753.0',
+ '74.0.3729.55',
+ '73.0.3683.100',
+ '74.0.3729.54',
+ '75.0.3752.1',
+ '75.0.3752.0',
+ '74.0.3729.53',
+ '73.0.3683.99',
+ '74.0.3729.52',
+ '75.0.3751.1',
+ '75.0.3751.0',
+ '74.0.3729.51',
+ '73.0.3683.98',
+ '74.0.3729.50',
+ '75.0.3750.0',
+ '74.0.3729.49',
+ '74.0.3729.48',
+ '74.0.3729.47',
+ '75.0.3749.3',
+ '74.0.3729.46',
+ '73.0.3683.97',
+ '75.0.3749.2',
+ '74.0.3729.45',
+ '75.0.3749.1',
+ '75.0.3749.0',
+ '74.0.3729.44',
+ '73.0.3683.96',
+ '74.0.3729.43',
+ '74.0.3729.42',
+ '75.0.3748.1',
+ '75.0.3748.0',
+ '74.0.3729.41',
+ '75.0.3747.1',
+ '73.0.3683.95',
+ '75.0.3746.4',
+ '74.0.3729.40',
+ '74.0.3729.39',
+ '75.0.3747.0',
+ '75.0.3746.3',
+ '75.0.3746.2',
+ '74.0.3729.38',
+ '75.0.3746.1',
+ '75.0.3746.0',
+ '74.0.3729.37',
+ '73.0.3683.94',
+ '75.0.3745.5',
+ '75.0.3745.4',
+ '75.0.3745.3',
+ '75.0.3745.2',
+ '74.0.3729.36',
+ '75.0.3745.1',
+ '75.0.3745.0',
+ '75.0.3744.2',
+ '74.0.3729.35',
+ '73.0.3683.93',
+ '74.0.3729.34',
+ '75.0.3744.1',
+ '75.0.3744.0',
+ '74.0.3729.33',
+ '73.0.3683.92',
+ '74.0.3729.32',
+ '74.0.3729.31',
+ '73.0.3683.91',
+ '75.0.3741.2',
+ '75.0.3740.5',
+ '74.0.3729.30',
+ '75.0.3741.1',
+ '75.0.3741.0',
+ '74.0.3729.29',
+ '75.0.3740.4',
+ '73.0.3683.90',
+ '74.0.3729.28',
+ '75.0.3740.3',
+ '73.0.3683.89',
+ '75.0.3740.2',
+ '74.0.3729.27',
+ '75.0.3740.1',
+ '75.0.3740.0',
+ '74.0.3729.26',
+ '73.0.3683.88',
+ '73.0.3683.87',
+ '74.0.3729.25',
+ '75.0.3739.1',
+ '75.0.3739.0',
+ '73.0.3683.86',
+ '74.0.3729.24',
+ '73.0.3683.85',
+ '75.0.3738.4',
+ '75.0.3738.3',
+ '75.0.3738.2',
+ '75.0.3738.1',
+ '75.0.3738.0',
+ '74.0.3729.23',
+ '73.0.3683.84',
+ '74.0.3729.22',
+ '74.0.3729.21',
+ '75.0.3737.1',
+ '75.0.3737.0',
+ '74.0.3729.20',
+ '73.0.3683.83',
+ '74.0.3729.19',
+ '75.0.3736.1',
+ '75.0.3736.0',
+ '74.0.3729.18',
+ '73.0.3683.82',
+ '74.0.3729.17',
+ '75.0.3735.1',
+ '75.0.3735.0',
+ '74.0.3729.16',
+ '73.0.3683.81',
+ '75.0.3734.1',
+ '75.0.3734.0',
+ '74.0.3729.15',
+ '73.0.3683.80',
+ '74.0.3729.14',
+ '75.0.3733.1',
+ '75.0.3733.0',
+ '75.0.3732.1',
+ '74.0.3729.13',
+ '74.0.3729.12',
+ '73.0.3683.79',
+ '74.0.3729.11',
+ '75.0.3732.0',
+ '74.0.3729.10',
+ '73.0.3683.78',
+ '74.0.3729.9',
+ '74.0.3729.8',
+ '74.0.3729.7',
+ '75.0.3731.3',
+ '75.0.3731.2',
+ '75.0.3731.0',
+ '74.0.3729.6',
+ '73.0.3683.77',
+ '73.0.3683.76',
+ '75.0.3730.5',
+ '75.0.3730.4',
+ '73.0.3683.75',
+ '74.0.3729.5',
+ '73.0.3683.74',
+ '75.0.3730.3',
+ '75.0.3730.2',
+ '74.0.3729.4',
+ '73.0.3683.73',
+ '73.0.3683.72',
+ '75.0.3730.1',
+ '75.0.3730.0',
+ '74.0.3729.3',
+ '73.0.3683.71',
+ '74.0.3729.2',
+ '73.0.3683.70',
+ '74.0.3729.1',
+ '74.0.3729.0',
+ '74.0.3726.4',
+ '73.0.3683.69',
+ '74.0.3726.3',
+ '74.0.3728.0',
+ '74.0.3726.2',
+ '73.0.3683.68',
+ '74.0.3726.1',
+ '74.0.3726.0',
+ '74.0.3725.4',
+ '73.0.3683.67',
+ '73.0.3683.66',
+ '74.0.3725.3',
+ '74.0.3725.2',
+ '74.0.3725.1',
+ '74.0.3724.8',
+ '74.0.3725.0',
+ '73.0.3683.65',
+ '74.0.3724.7',
+ '74.0.3724.6',
+ '74.0.3724.5',
+ '74.0.3724.4',
+ '74.0.3724.3',
+ '74.0.3724.2',
+ '74.0.3724.1',
+ '74.0.3724.0',
+ '73.0.3683.64',
+ '74.0.3723.1',
+ '74.0.3723.0',
+ '73.0.3683.63',
+ '74.0.3722.1',
+ '74.0.3722.0',
+ '73.0.3683.62',
+ '74.0.3718.9',
+ '74.0.3702.3',
+ '74.0.3721.3',
+ '74.0.3721.2',
+ '74.0.3721.1',
+ '74.0.3721.0',
+ '74.0.3720.6',
+ '73.0.3683.61',
+ '72.0.3626.122',
+ '73.0.3683.60',
+ '74.0.3720.5',
+ '72.0.3626.121',
+ '74.0.3718.8',
+ '74.0.3720.4',
+ '74.0.3720.3',
+ '74.0.3718.7',
+ '74.0.3720.2',
+ '74.0.3720.1',
+ '74.0.3720.0',
+ '74.0.3718.6',
+ '74.0.3719.5',
+ '73.0.3683.59',
+ '74.0.3718.5',
+ '74.0.3718.4',
+ '74.0.3719.4',
+ '74.0.3719.3',
+ '74.0.3719.2',
+ '74.0.3719.1',
+ '73.0.3683.58',
+ '74.0.3719.0',
+ '73.0.3683.57',
+ '73.0.3683.56',
+ '74.0.3718.3',
+ '73.0.3683.55',
+ '74.0.3718.2',
+ '74.0.3718.1',
+ '74.0.3718.0',
+ '73.0.3683.54',
+ '74.0.3717.2',
+ '73.0.3683.53',
+ '74.0.3717.1',
+ '74.0.3717.0',
+ '73.0.3683.52',
+ '74.0.3716.1',
+ '74.0.3716.0',
+ '73.0.3683.51',
+ '74.0.3715.1',
+ '74.0.3715.0',
+ '73.0.3683.50',
+ '74.0.3711.2',
+ '74.0.3714.2',
+ '74.0.3713.3',
+ '74.0.3714.1',
+ '74.0.3714.0',
+ '73.0.3683.49',
+ '74.0.3713.1',
+ '74.0.3713.0',
+ '72.0.3626.120',
+ '73.0.3683.48',
+ '74.0.3712.2',
+ '74.0.3712.1',
+ '74.0.3712.0',
+ '73.0.3683.47',
+ '72.0.3626.119',
+ '73.0.3683.46',
+ '74.0.3710.2',
+ '72.0.3626.118',
+ '74.0.3711.1',
+ '74.0.3711.0',
+ '73.0.3683.45',
+ '72.0.3626.117',
+ '74.0.3710.1',
+ '74.0.3710.0',
+ '73.0.3683.44',
+ '72.0.3626.116',
+ '74.0.3709.1',
+ '74.0.3709.0',
+ '74.0.3704.9',
+ '73.0.3683.43',
+ '72.0.3626.115',
+ '74.0.3704.8',
+ '74.0.3704.7',
+ '74.0.3708.0',
+ '74.0.3706.7',
+ '74.0.3704.6',
+ '73.0.3683.42',
+ '72.0.3626.114',
+ '74.0.3706.6',
+ '72.0.3626.113',
+ '74.0.3704.5',
+ '74.0.3706.5',
+ '74.0.3706.4',
+ '74.0.3706.3',
+ '74.0.3706.2',
+ '74.0.3706.1',
+ '74.0.3706.0',
+ '73.0.3683.41',
+ '72.0.3626.112',
+ '74.0.3705.1',
+ '74.0.3705.0',
+ '73.0.3683.40',
+ '72.0.3626.111',
+ '73.0.3683.39',
+ '74.0.3704.4',
+ '73.0.3683.38',
+ '74.0.3704.3',
+ '74.0.3704.2',
+ '74.0.3704.1',
+ '74.0.3704.0',
+ '73.0.3683.37',
+ '72.0.3626.110',
+ '72.0.3626.109',
+ '74.0.3703.3',
+ '74.0.3703.2',
+ '73.0.3683.36',
+ '74.0.3703.1',
+ '74.0.3703.0',
+ '73.0.3683.35',
+ '72.0.3626.108',
+ '74.0.3702.2',
+ '74.0.3699.3',
+ '74.0.3702.1',
+ '74.0.3702.0',
+ '73.0.3683.34',
+ '72.0.3626.107',
+ '73.0.3683.33',
+ '74.0.3701.1',
+ '74.0.3701.0',
+ '73.0.3683.32',
+ '73.0.3683.31',
+ '72.0.3626.105',
+ '74.0.3700.1',
+ '74.0.3700.0',
+ '73.0.3683.29',
+ '72.0.3626.103',
+ '74.0.3699.2',
+ '74.0.3699.1',
+ '74.0.3699.0',
+ '73.0.3683.28',
+ '72.0.3626.102',
+ '73.0.3683.27',
+ '73.0.3683.26',
+ '74.0.3698.0',
+ '74.0.3696.2',
+ '72.0.3626.101',
+ '73.0.3683.25',
+ '74.0.3696.1',
+ '74.0.3696.0',
+ '74.0.3694.8',
+ '72.0.3626.100',
+ '74.0.3694.7',
+ '74.0.3694.6',
+ '74.0.3694.5',
+ '74.0.3694.4',
+ '72.0.3626.99',
+ '72.0.3626.98',
+ '74.0.3694.3',
+ '73.0.3683.24',
+ '72.0.3626.97',
+ '72.0.3626.96',
+ '72.0.3626.95',
+ '73.0.3683.23',
+ '72.0.3626.94',
+ '73.0.3683.22',
+ '73.0.3683.21',
+ '72.0.3626.93',
+ '74.0.3694.2',
+ '72.0.3626.92',
+ '74.0.3694.1',
+ '74.0.3694.0',
+ '74.0.3693.6',
+ '73.0.3683.20',
+ '72.0.3626.91',
+ '74.0.3693.5',
+ '74.0.3693.4',
+ '74.0.3693.3',
+ '74.0.3693.2',
+ '73.0.3683.19',
+ '74.0.3693.1',
+ '74.0.3693.0',
+ '73.0.3683.18',
+ '72.0.3626.90',
+ '74.0.3692.1',
+ '74.0.3692.0',
+ '73.0.3683.17',
+ '72.0.3626.89',
+ '74.0.3687.3',
+ '74.0.3691.1',
+ '74.0.3691.0',
+ '73.0.3683.16',
+ '72.0.3626.88',
+ '72.0.3626.87',
+ '73.0.3683.15',
+ '74.0.3690.1',
+ '74.0.3690.0',
+ '73.0.3683.14',
+ '72.0.3626.86',
+ '73.0.3683.13',
+ '73.0.3683.12',
+ '74.0.3689.1',
+ '74.0.3689.0',
+ '73.0.3683.11',
+ '72.0.3626.85',
+ '73.0.3683.10',
+ '72.0.3626.84',
+ '73.0.3683.9',
+ '74.0.3688.1',
+ '74.0.3688.0',
+ '73.0.3683.8',
+ '72.0.3626.83',
+ '74.0.3687.2',
+ '74.0.3687.1',
+ '74.0.3687.0',
+ '73.0.3683.7',
+ '72.0.3626.82',
+ '74.0.3686.4',
+ '72.0.3626.81',
+ '74.0.3686.3',
+ '74.0.3686.2',
+ '74.0.3686.1',
+ '74.0.3686.0',
+ '73.0.3683.6',
+ '72.0.3626.80',
+ '74.0.3685.1',
+ '74.0.3685.0',
+ '73.0.3683.5',
+ '72.0.3626.79',
+ '74.0.3684.1',
+ '74.0.3684.0',
+ '73.0.3683.4',
+ '72.0.3626.78',
+ '72.0.3626.77',
+ '73.0.3683.3',
+ '73.0.3683.2',
+ '72.0.3626.76',
+ '73.0.3683.1',
+ '73.0.3683.0',
+ '72.0.3626.75',
+ '71.0.3578.141',
+ '73.0.3682.1',
+ '73.0.3682.0',
+ '72.0.3626.74',
+ '71.0.3578.140',
+ '73.0.3681.4',
+ '73.0.3681.3',
+ '73.0.3681.2',
+ '73.0.3681.1',
+ '73.0.3681.0',
+ '72.0.3626.73',
+ '71.0.3578.139',
+ '72.0.3626.72',
+ '72.0.3626.71',
+ '73.0.3680.1',
+ '73.0.3680.0',
+ '72.0.3626.70',
+ '71.0.3578.138',
+ '73.0.3678.2',
+ '73.0.3679.1',
+ '73.0.3679.0',
+ '72.0.3626.69',
+ '71.0.3578.137',
+ '73.0.3678.1',
+ '73.0.3678.0',
+ '71.0.3578.136',
+ '73.0.3677.1',
+ '73.0.3677.0',
+ '72.0.3626.68',
+ '72.0.3626.67',
+ '71.0.3578.135',
+ '73.0.3676.1',
+ '73.0.3676.0',
+ '73.0.3674.2',
+ '72.0.3626.66',
+ '71.0.3578.134',
+ '73.0.3674.1',
+ '73.0.3674.0',
+ '72.0.3626.65',
+ '71.0.3578.133',
+ '73.0.3673.2',
+ '73.0.3673.1',
+ '73.0.3673.0',
+ '72.0.3626.64',
+ '71.0.3578.132',
+ '72.0.3626.63',
+ '72.0.3626.62',
+ '72.0.3626.61',
+ '72.0.3626.60',
+ '73.0.3672.1',
+ '73.0.3672.0',
+ '72.0.3626.59',
+ '71.0.3578.131',
+ '73.0.3671.3',
+ '73.0.3671.2',
+ '73.0.3671.1',
+ '73.0.3671.0',
+ '72.0.3626.58',
+ '71.0.3578.130',
+ '73.0.3670.1',
+ '73.0.3670.0',
+ '72.0.3626.57',
+ '71.0.3578.129',
+ '73.0.3669.1',
+ '73.0.3669.0',
+ '72.0.3626.56',
+ '71.0.3578.128',
+ '73.0.3668.2',
+ '73.0.3668.1',
+ '73.0.3668.0',
+ '72.0.3626.55',
+ '71.0.3578.127',
+ '73.0.3667.2',
+ '73.0.3667.1',
+ '73.0.3667.0',
+ '72.0.3626.54',
+ '71.0.3578.126',
+ '73.0.3666.1',
+ '73.0.3666.0',
+ '72.0.3626.53',
+ '71.0.3578.125',
+ '73.0.3665.4',
+ '73.0.3665.3',
+ '72.0.3626.52',
+ '73.0.3665.2',
+ '73.0.3664.4',
+ '73.0.3665.1',
+ '73.0.3665.0',
+ '72.0.3626.51',
+ '71.0.3578.124',
+ '72.0.3626.50',
+ '73.0.3664.3',
+ '73.0.3664.2',
+ '73.0.3664.1',
+ '73.0.3664.0',
+ '73.0.3663.2',
+ '72.0.3626.49',
+ '71.0.3578.123',
+ '73.0.3663.1',
+ '73.0.3663.0',
+ '72.0.3626.48',
+ '71.0.3578.122',
+ '73.0.3662.1',
+ '73.0.3662.0',
+ '72.0.3626.47',
+ '71.0.3578.121',
+ '73.0.3661.1',
+ '72.0.3626.46',
+ '73.0.3661.0',
+ '72.0.3626.45',
+ '71.0.3578.120',
+ '73.0.3660.2',
+ '73.0.3660.1',
+ '73.0.3660.0',
+ '72.0.3626.44',
+ '71.0.3578.119',
+ '73.0.3659.1',
+ '73.0.3659.0',
+ '72.0.3626.43',
+ '71.0.3578.118',
+ '73.0.3658.1',
+ '73.0.3658.0',
+ '72.0.3626.42',
+ '71.0.3578.117',
+ '73.0.3657.1',
+ '73.0.3657.0',
+ '72.0.3626.41',
+ '71.0.3578.116',
+ '73.0.3656.1',
+ '73.0.3656.0',
+ '72.0.3626.40',
+ '71.0.3578.115',
+ '73.0.3655.1',
+ '73.0.3655.0',
+ '72.0.3626.39',
+ '71.0.3578.114',
+ '73.0.3654.1',
+ '73.0.3654.0',
+ '72.0.3626.38',
+ '71.0.3578.113',
+ '73.0.3653.1',
+ '73.0.3653.0',
+ '72.0.3626.37',
+ '71.0.3578.112',
+ '73.0.3652.1',
+ '73.0.3652.0',
+ '72.0.3626.36',
+ '71.0.3578.111',
+ '73.0.3651.1',
+ '73.0.3651.0',
+ '72.0.3626.35',
+ '71.0.3578.110',
+ '73.0.3650.1',
+ '73.0.3650.0',
+ '72.0.3626.34',
+ '71.0.3578.109',
+ '73.0.3649.1',
+ '73.0.3649.0',
+ '72.0.3626.33',
+ '71.0.3578.108',
+ '73.0.3648.2',
+ '73.0.3648.1',
+ '73.0.3648.0',
+ '72.0.3626.32',
+ '71.0.3578.107',
+ '73.0.3647.2',
+ '73.0.3647.1',
+ '73.0.3647.0',
+ '72.0.3626.31',
+ '71.0.3578.106',
+ '73.0.3635.3',
+ '73.0.3646.2',
+ '73.0.3646.1',
+ '73.0.3646.0',
+ '72.0.3626.30',
+ '71.0.3578.105',
+ '72.0.3626.29',
+ '73.0.3645.2',
+ '73.0.3645.1',
+ '73.0.3645.0',
+ '72.0.3626.28',
+ '71.0.3578.104',
+ '72.0.3626.27',
+ '72.0.3626.26',
+ '72.0.3626.25',
+ '72.0.3626.24',
+ '73.0.3644.0',
+ '73.0.3643.2',
+ '72.0.3626.23',
+ '71.0.3578.103',
+ '73.0.3643.1',
+ '73.0.3643.0',
+ '72.0.3626.22',
+ '71.0.3578.102',
+ '73.0.3642.1',
+ '73.0.3642.0',
+ '72.0.3626.21',
+ '71.0.3578.101',
+ '73.0.3641.1',
+ '73.0.3641.0',
+ '72.0.3626.20',
+ '71.0.3578.100',
+ '72.0.3626.19',
+ '73.0.3640.1',
+ '73.0.3640.0',
+ '72.0.3626.18',
+ '73.0.3639.1',
+ '71.0.3578.99',
+ '73.0.3639.0',
+ '72.0.3626.17',
+ '73.0.3638.2',
+ '72.0.3626.16',
+ '73.0.3638.1',
+ '73.0.3638.0',
+ '72.0.3626.15',
+ '71.0.3578.98',
+ '73.0.3635.2',
+ '71.0.3578.97',
+ '73.0.3637.1',
+ '73.0.3637.0',
+ '72.0.3626.14',
+ '71.0.3578.96',
+ '71.0.3578.95',
+ '72.0.3626.13',
+ '71.0.3578.94',
+ '73.0.3636.2',
+ '71.0.3578.93',
+ '73.0.3636.1',
+ '73.0.3636.0',
+ '72.0.3626.12',
+ '71.0.3578.92',
+ '73.0.3635.1',
+ '73.0.3635.0',
+ '72.0.3626.11',
+ '71.0.3578.91',
+ '73.0.3634.2',
+ '73.0.3634.1',
+ '73.0.3634.0',
+ '72.0.3626.10',
+ '71.0.3578.90',
+ '71.0.3578.89',
+ '73.0.3633.2',
+ '73.0.3633.1',
+ '73.0.3633.0',
+ '72.0.3610.4',
+ '72.0.3626.9',
+ '71.0.3578.88',
+ '73.0.3632.5',
+ '73.0.3632.4',
+ '73.0.3632.3',
+ '73.0.3632.2',
+ '73.0.3632.1',
+ '73.0.3632.0',
+ '72.0.3626.8',
+ '71.0.3578.87',
+ '73.0.3631.2',
+ '73.0.3631.1',
+ '73.0.3631.0',
+ '72.0.3626.7',
+ '71.0.3578.86',
+ '72.0.3626.6',
+ '73.0.3630.1',
+ '73.0.3630.0',
+ '72.0.3626.5',
+ '71.0.3578.85',
+ '72.0.3626.4',
+ '73.0.3628.3',
+ '73.0.3628.2',
+ '73.0.3629.1',
+ '73.0.3629.0',
+ '72.0.3626.3',
+ '71.0.3578.84',
+ '73.0.3628.1',
+ '73.0.3628.0',
+ '71.0.3578.83',
+ '73.0.3627.1',
+ '73.0.3627.0',
+ '72.0.3626.2',
+ '71.0.3578.82',
+ '71.0.3578.81',
+ '71.0.3578.80',
+ '72.0.3626.1',
+ '72.0.3626.0',
+ '71.0.3578.79',
+ '70.0.3538.124',
+ '71.0.3578.78',
+ '72.0.3623.4',
+ '72.0.3625.2',
+ '72.0.3625.1',
+ '72.0.3625.0',
+ '71.0.3578.77',
+ '70.0.3538.123',
+ '72.0.3624.4',
+ '72.0.3624.3',
+ '72.0.3624.2',
+ '71.0.3578.76',
+ '72.0.3624.1',
+ '72.0.3624.0',
+ '72.0.3623.3',
+ '71.0.3578.75',
+ '70.0.3538.122',
+ '71.0.3578.74',
+ '72.0.3623.2',
+ '72.0.3610.3',
+ '72.0.3623.1',
+ '72.0.3623.0',
+ '72.0.3622.3',
+ '72.0.3622.2',
+ '71.0.3578.73',
+ '70.0.3538.121',
+ '72.0.3622.1',
+ '72.0.3622.0',
+ '71.0.3578.72',
+ '70.0.3538.120',
+ '72.0.3621.1',
+ '72.0.3621.0',
+ '71.0.3578.71',
+ '70.0.3538.119',
+ '72.0.3620.1',
+ '72.0.3620.0',
+ '71.0.3578.70',
+ '70.0.3538.118',
+ '71.0.3578.69',
+ '72.0.3619.1',
+ '72.0.3619.0',
+ '71.0.3578.68',
+ '70.0.3538.117',
+ '71.0.3578.67',
+ '72.0.3618.1',
+ '72.0.3618.0',
+ '71.0.3578.66',
+ '70.0.3538.116',
+ '72.0.3617.1',
+ '72.0.3617.0',
+ '71.0.3578.65',
+ '70.0.3538.115',
+ '72.0.3602.3',
+ '71.0.3578.64',
+ '72.0.3616.1',
+ '72.0.3616.0',
+ '71.0.3578.63',
+ '70.0.3538.114',
+ '71.0.3578.62',
+ '72.0.3615.1',
+ '72.0.3615.0',
+ '71.0.3578.61',
+ '70.0.3538.113',
+ '72.0.3614.1',
+ '72.0.3614.0',
+ '71.0.3578.60',
+ '70.0.3538.112',
+ '72.0.3613.1',
+ '72.0.3613.0',
+ '71.0.3578.59',
+ '70.0.3538.111',
+ '72.0.3612.2',
+ '72.0.3612.1',
+ '72.0.3612.0',
+ '70.0.3538.110',
+ '71.0.3578.58',
+ '70.0.3538.109',
+ '72.0.3611.2',
+ '72.0.3611.1',
+ '72.0.3611.0',
+ '71.0.3578.57',
+ '70.0.3538.108',
+ '72.0.3610.2',
+ '71.0.3578.56',
+ '71.0.3578.55',
+ '72.0.3610.1',
+ '72.0.3610.0',
+ '71.0.3578.54',
+ '70.0.3538.107',
+ '71.0.3578.53',
+ '72.0.3609.3',
+ '71.0.3578.52',
+ '72.0.3609.2',
+ '71.0.3578.51',
+ '72.0.3608.5',
+ '72.0.3609.1',
+ '72.0.3609.0',
+ '71.0.3578.50',
+ '70.0.3538.106',
+ '72.0.3608.4',
+ '72.0.3608.3',
+ '72.0.3608.2',
+ '71.0.3578.49',
+ '72.0.3608.1',
+ '72.0.3608.0',
+ '70.0.3538.105',
+ '71.0.3578.48',
+ '72.0.3607.1',
+ '72.0.3607.0',
+ '71.0.3578.47',
+ '70.0.3538.104',
+ '72.0.3606.2',
+ '72.0.3606.1',
+ '72.0.3606.0',
+ '71.0.3578.46',
+ '70.0.3538.103',
+ '70.0.3538.102',
+ '72.0.3605.3',
+ '72.0.3605.2',
+ '72.0.3605.1',
+ '72.0.3605.0',
+ '71.0.3578.45',
+ '70.0.3538.101',
+ '71.0.3578.44',
+ '71.0.3578.43',
+ '70.0.3538.100',
+ '70.0.3538.99',
+ '71.0.3578.42',
+ '72.0.3604.1',
+ '72.0.3604.0',
+ '71.0.3578.41',
+ '70.0.3538.98',
+ '71.0.3578.40',
+ '72.0.3603.2',
+ '72.0.3603.1',
+ '72.0.3603.0',
+ '71.0.3578.39',
+ '70.0.3538.97',
+ '72.0.3602.2',
+ '71.0.3578.38',
+ '71.0.3578.37',
+ '72.0.3602.1',
+ '72.0.3602.0',
+ '71.0.3578.36',
+ '70.0.3538.96',
+ '72.0.3601.1',
+ '72.0.3601.0',
+ '71.0.3578.35',
+ '70.0.3538.95',
+ '72.0.3600.1',
+ '72.0.3600.0',
+ '71.0.3578.34',
+ '70.0.3538.94',
+ '72.0.3599.3',
+ '72.0.3599.2',
+ '72.0.3599.1',
+ '72.0.3599.0',
+ '71.0.3578.33',
+ '70.0.3538.93',
+ '72.0.3598.1',
+ '72.0.3598.0',
+ '71.0.3578.32',
+ '70.0.3538.87',
+ '72.0.3597.1',
+ '72.0.3597.0',
+ '72.0.3596.2',
+ '71.0.3578.31',
+ '70.0.3538.86',
+ '71.0.3578.30',
+ '71.0.3578.29',
+ '72.0.3596.1',
+ '72.0.3596.0',
+ '71.0.3578.28',
+ '70.0.3538.85',
+ '72.0.3595.2',
+ '72.0.3591.3',
+ '72.0.3595.1',
+ '72.0.3595.0',
+ '71.0.3578.27',
+ '70.0.3538.84',
+ '72.0.3594.1',
+ '72.0.3594.0',
+ '71.0.3578.26',
+ '70.0.3538.83',
+ '72.0.3593.2',
+ '72.0.3593.1',
+ '72.0.3593.0',
+ '71.0.3578.25',
+ '70.0.3538.82',
+ '72.0.3589.3',
+ '72.0.3592.2',
+ '72.0.3592.1',
+ '72.0.3592.0',
+ '71.0.3578.24',
+ '72.0.3589.2',
+ '70.0.3538.81',
+ '70.0.3538.80',
+ '72.0.3591.2',
+ '72.0.3591.1',
+ '72.0.3591.0',
+ '71.0.3578.23',
+ '70.0.3538.79',
+ '71.0.3578.22',
+ '72.0.3590.1',
+ '72.0.3590.0',
+ '71.0.3578.21',
+ '70.0.3538.78',
+ '70.0.3538.77',
+ '72.0.3589.1',
+ '72.0.3589.0',
+ '71.0.3578.20',
+ '70.0.3538.76',
+ '71.0.3578.19',
+ '70.0.3538.75',
+ '72.0.3588.1',
+ '72.0.3588.0',
+ '71.0.3578.18',
+ '70.0.3538.74',
+ '72.0.3586.2',
+ '72.0.3587.0',
+ '71.0.3578.17',
+ '70.0.3538.73',
+ '72.0.3586.1',
+ '72.0.3586.0',
+ '71.0.3578.16',
+ '70.0.3538.72',
+ '72.0.3585.1',
+ '72.0.3585.0',
+ '71.0.3578.15',
+ '70.0.3538.71',
+ '71.0.3578.14',
+ '72.0.3584.1',
+ '72.0.3584.0',
+ '71.0.3578.13',
+ '70.0.3538.70',
+ '72.0.3583.2',
+ '71.0.3578.12',
+ '72.0.3583.1',
+ '72.0.3583.0',
+ '71.0.3578.11',
+ '70.0.3538.69',
+ '71.0.3578.10',
+ '72.0.3582.0',
+ '72.0.3581.4',
+ '71.0.3578.9',
+ '70.0.3538.67',
+ '72.0.3581.3',
+ '72.0.3581.2',
+ '72.0.3581.1',
+ '72.0.3581.0',
+ '71.0.3578.8',
+ '70.0.3538.66',
+ '72.0.3580.1',
+ '72.0.3580.0',
+ '71.0.3578.7',
+ '70.0.3538.65',
+ '71.0.3578.6',
+ '72.0.3579.1',
+ '72.0.3579.0',
+ '71.0.3578.5',
+ '70.0.3538.64',
+ '71.0.3578.4',
+ '71.0.3578.3',
+ '71.0.3578.2',
+ '71.0.3578.1',
+ '71.0.3578.0',
+ '70.0.3538.63',
+ '69.0.3497.128',
+ '70.0.3538.62',
+ '70.0.3538.61',
+ '70.0.3538.60',
+ '70.0.3538.59',
+ '71.0.3577.1',
+ '71.0.3577.0',
+ '70.0.3538.58',
+ '69.0.3497.127',
+ '71.0.3576.2',
+ '71.0.3576.1',
+ '71.0.3576.0',
+ '70.0.3538.57',
+ '70.0.3538.56',
+ '71.0.3575.2',
+ '70.0.3538.55',
+ '69.0.3497.126',
+ '70.0.3538.54',
+ '71.0.3575.1',
+ '71.0.3575.0',
+ '71.0.3574.1',
+ '71.0.3574.0',
+ '70.0.3538.53',
+ '69.0.3497.125',
+ '70.0.3538.52',
+ '71.0.3573.1',
+ '71.0.3573.0',
+ '70.0.3538.51',
+ '69.0.3497.124',
+ '71.0.3572.1',
+ '71.0.3572.0',
+ '70.0.3538.50',
+ '69.0.3497.123',
+ '71.0.3571.2',
+ '70.0.3538.49',
+ '69.0.3497.122',
+ '71.0.3571.1',
+ '71.0.3571.0',
+ '70.0.3538.48',
+ '69.0.3497.121',
+ '71.0.3570.1',
+ '71.0.3570.0',
+ '70.0.3538.47',
+ '69.0.3497.120',
+ '71.0.3568.2',
+ '71.0.3569.1',
+ '71.0.3569.0',
+ '70.0.3538.46',
+ '69.0.3497.119',
+ '70.0.3538.45',
+ '71.0.3568.1',
+ '71.0.3568.0',
+ '70.0.3538.44',
+ '69.0.3497.118',
+ '70.0.3538.43',
+ '70.0.3538.42',
+ '71.0.3567.1',
+ '71.0.3567.0',
+ '70.0.3538.41',
+ '69.0.3497.117',
+ '71.0.3566.1',
+ '71.0.3566.0',
+ '70.0.3538.40',
+ '69.0.3497.116',
+ '71.0.3565.1',
+ '71.0.3565.0',
+ '70.0.3538.39',
+ '69.0.3497.115',
+ '71.0.3564.1',
+ '71.0.3564.0',
+ '70.0.3538.38',
+ '69.0.3497.114',
+ '71.0.3563.0',
+ '71.0.3562.2',
+ '70.0.3538.37',
+ '69.0.3497.113',
+ '70.0.3538.36',
+ '70.0.3538.35',
+ '71.0.3562.1',
+ '71.0.3562.0',
+ '70.0.3538.34',
+ '69.0.3497.112',
+ '70.0.3538.33',
+ '71.0.3561.1',
+ '71.0.3561.0',
+ '70.0.3538.32',
+ '69.0.3497.111',
+ '71.0.3559.6',
+ '71.0.3560.1',
+ '71.0.3560.0',
+ '71.0.3559.5',
+ '71.0.3559.4',
+ '70.0.3538.31',
+ '69.0.3497.110',
+ '71.0.3559.3',
+ '70.0.3538.30',
+ '69.0.3497.109',
+ '71.0.3559.2',
+ '71.0.3559.1',
+ '71.0.3559.0',
+ '70.0.3538.29',
+ '69.0.3497.108',
+ '71.0.3558.2',
+ '71.0.3558.1',
+ '71.0.3558.0',
+ '70.0.3538.28',
+ '69.0.3497.107',
+ '71.0.3557.2',
+ '71.0.3557.1',
+ '71.0.3557.0',
+ '70.0.3538.27',
+ '69.0.3497.106',
+ '71.0.3554.4',
+ '70.0.3538.26',
+ '71.0.3556.1',
+ '71.0.3556.0',
+ '70.0.3538.25',
+ '71.0.3554.3',
+ '69.0.3497.105',
+ '71.0.3554.2',
+ '70.0.3538.24',
+ '69.0.3497.104',
+ '71.0.3555.2',
+ '70.0.3538.23',
+ '71.0.3555.1',
+ '71.0.3555.0',
+ '70.0.3538.22',
+ '69.0.3497.103',
+ '71.0.3554.1',
+ '71.0.3554.0',
+ '70.0.3538.21',
+ '69.0.3497.102',
+ '71.0.3553.3',
+ '70.0.3538.20',
+ '69.0.3497.101',
+ '71.0.3553.2',
+ '69.0.3497.100',
+ '71.0.3553.1',
+ '71.0.3553.0',
+ '70.0.3538.19',
+ '69.0.3497.99',
+ '69.0.3497.98',
+ '69.0.3497.97',
+ '71.0.3552.6',
+ '71.0.3552.5',
+ '71.0.3552.4',
+ '71.0.3552.3',
+ '71.0.3552.2',
+ '71.0.3552.1',
+ '71.0.3552.0',
+ '70.0.3538.18',
+ '69.0.3497.96',
+ '71.0.3551.3',
+ '71.0.3551.2',
+ '71.0.3551.1',
+ '71.0.3551.0',
+ '70.0.3538.17',
+ '69.0.3497.95',
+ '71.0.3550.3',
+ '71.0.3550.2',
+ '71.0.3550.1',
+ '71.0.3550.0',
+ '70.0.3538.16',
+ '69.0.3497.94',
+ '71.0.3549.1',
+ '71.0.3549.0',
+ '70.0.3538.15',
+ '69.0.3497.93',
+ '69.0.3497.92',
+ '71.0.3548.1',
+ '71.0.3548.0',
+ '70.0.3538.14',
+ '69.0.3497.91',
+ '71.0.3547.1',
+ '71.0.3547.0',
+ '70.0.3538.13',
+ '69.0.3497.90',
+ '71.0.3546.2',
+ '69.0.3497.89',
+ '71.0.3546.1',
+ '71.0.3546.0',
+ '70.0.3538.12',
+ '69.0.3497.88',
+ '71.0.3545.4',
+ '71.0.3545.3',
+ '71.0.3545.2',
+ '71.0.3545.1',
+ '71.0.3545.0',
+ '70.0.3538.11',
+ '69.0.3497.87',
+ '71.0.3544.5',
+ '71.0.3544.4',
+ '71.0.3544.3',
+ '71.0.3544.2',
+ '71.0.3544.1',
+ '71.0.3544.0',
+ '69.0.3497.86',
+ '70.0.3538.10',
+ '69.0.3497.85',
+ '70.0.3538.9',
+ '69.0.3497.84',
+ '71.0.3543.4',
+ '70.0.3538.8',
+ '71.0.3543.3',
+ '71.0.3543.2',
+ '71.0.3543.1',
+ '71.0.3543.0',
+ '70.0.3538.7',
+ '69.0.3497.83',
+ '71.0.3542.2',
+ '71.0.3542.1',
+ '71.0.3542.0',
+ '70.0.3538.6',
+ '69.0.3497.82',
+ '69.0.3497.81',
+ '71.0.3541.1',
+ '71.0.3541.0',
+ '70.0.3538.5',
+ '69.0.3497.80',
+ '71.0.3540.1',
+ '71.0.3540.0',
+ '70.0.3538.4',
+ '69.0.3497.79',
+ '70.0.3538.3',
+ '71.0.3539.1',
+ '71.0.3539.0',
+ '69.0.3497.78',
+ '68.0.3440.134',
+ '69.0.3497.77',
+ '70.0.3538.2',
+ '70.0.3538.1',
+ '70.0.3538.0',
+ '69.0.3497.76',
+ '68.0.3440.133',
+ '69.0.3497.75',
+ '70.0.3537.2',
+ '70.0.3537.1',
+ '70.0.3537.0',
+ '69.0.3497.74',
+ '68.0.3440.132',
+ '70.0.3536.0',
+ '70.0.3535.5',
+ '70.0.3535.4',
+ '70.0.3535.3',
+ '69.0.3497.73',
+ '68.0.3440.131',
+ '70.0.3532.8',
+ '70.0.3532.7',
+ '69.0.3497.72',
+ '69.0.3497.71',
+ '70.0.3535.2',
+ '70.0.3535.1',
+ '70.0.3535.0',
+ '69.0.3497.70',
+ '68.0.3440.130',
+ '69.0.3497.69',
+ '68.0.3440.129',
+ '70.0.3534.4',
+ '70.0.3534.3',
+ '70.0.3534.2',
+ '70.0.3534.1',
+ '70.0.3534.0',
+ '69.0.3497.68',
+ '68.0.3440.128',
+ '70.0.3533.2',
+ '70.0.3533.1',
+ '70.0.3533.0',
+ '69.0.3497.67',
+ '68.0.3440.127',
+ '70.0.3532.6',
+ '70.0.3532.5',
+ '70.0.3532.4',
+ '69.0.3497.66',
+ '68.0.3440.126',
+ '70.0.3532.3',
+ '70.0.3532.2',
+ '70.0.3532.1',
+ '69.0.3497.60',
+ '69.0.3497.65',
+ '69.0.3497.64',
+ '70.0.3532.0',
+ '70.0.3531.0',
+ '70.0.3530.4',
+ '70.0.3530.3',
+ '70.0.3530.2',
+ '69.0.3497.58',
+ '68.0.3440.125',
+ '69.0.3497.57',
+ '69.0.3497.56',
+ '69.0.3497.55',
+ '69.0.3497.54',
+ '70.0.3530.1',
+ '70.0.3530.0',
+ '69.0.3497.53',
+ '68.0.3440.124',
+ '69.0.3497.52',
+ '70.0.3529.3',
+ '70.0.3529.2',
+ '70.0.3529.1',
+ '70.0.3529.0',
+ '69.0.3497.51',
+ '70.0.3528.4',
+ '68.0.3440.123',
+ '70.0.3528.3',
+ '70.0.3528.2',
+ '70.0.3528.1',
+ '70.0.3528.0',
+ '69.0.3497.50',
+ '68.0.3440.122',
+ '70.0.3527.1',
+ '70.0.3527.0',
+ '69.0.3497.49',
+ '68.0.3440.121',
+ '70.0.3526.1',
+ '70.0.3526.0',
+ '68.0.3440.120',
+ '69.0.3497.48',
+ '69.0.3497.47',
+ '68.0.3440.119',
+ '68.0.3440.118',
+ '70.0.3525.5',
+ '70.0.3525.4',
+ '70.0.3525.3',
+ '68.0.3440.117',
+ '69.0.3497.46',
+ '70.0.3525.2',
+ '70.0.3525.1',
+ '70.0.3525.0',
+ '69.0.3497.45',
+ '68.0.3440.116',
+ '70.0.3524.4',
+ '70.0.3524.3',
+ '69.0.3497.44',
+ '70.0.3524.2',
+ '70.0.3524.1',
+ '70.0.3524.0',
+ '70.0.3523.2',
+ '69.0.3497.43',
+ '68.0.3440.115',
+ '70.0.3505.9',
+ '69.0.3497.42',
+ '70.0.3505.8',
+ '70.0.3523.1',
+ '70.0.3523.0',
+ '69.0.3497.41',
+ '68.0.3440.114',
+ '70.0.3505.7',
+ '69.0.3497.40',
+ '70.0.3522.1',
+ '70.0.3522.0',
+ '70.0.3521.2',
+ '69.0.3497.39',
+ '68.0.3440.113',
+ '70.0.3505.6',
+ '70.0.3521.1',
+ '70.0.3521.0',
+ '69.0.3497.38',
+ '68.0.3440.112',
+ '70.0.3520.1',
+ '70.0.3520.0',
+ '69.0.3497.37',
+ '68.0.3440.111',
+ '70.0.3519.3',
+ '70.0.3519.2',
+ '70.0.3519.1',
+ '70.0.3519.0',
+ '69.0.3497.36',
+ '68.0.3440.110',
+ '70.0.3518.1',
+ '70.0.3518.0',
+ '69.0.3497.35',
+ '69.0.3497.34',
+ '68.0.3440.109',
+ '70.0.3517.1',
+ '70.0.3517.0',
+ '69.0.3497.33',
+ '68.0.3440.108',
+ '69.0.3497.32',
+ '70.0.3516.3',
+ '70.0.3516.2',
+ '70.0.3516.1',
+ '70.0.3516.0',
+ '69.0.3497.31',
+ '68.0.3440.107',
+ '70.0.3515.4',
+ '68.0.3440.106',
+ '70.0.3515.3',
+ '70.0.3515.2',
+ '70.0.3515.1',
+ '70.0.3515.0',
+ '69.0.3497.30',
+ '68.0.3440.105',
+ '68.0.3440.104',
+ '70.0.3514.2',
+ '70.0.3514.1',
+ '70.0.3514.0',
+ '69.0.3497.29',
+ '68.0.3440.103',
+ '70.0.3513.1',
+ '70.0.3513.0',
+ '69.0.3497.28',
+ )
+ return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+
+
std_headers = {
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
+ 'User-Agent': random_user_agent(),
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
@@ -124,8 +1712,8 @@ KNOWN_EXTENSIONS = (
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
- itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
- 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
'%d %B %Y',
@@ -133,13 +1721,16 @@ DATE_FORMATS = (
'%B %d %Y',
'%B %dst %Y',
'%B %dnd %Y',
+ '%B %drd %Y',
'%B %dth %Y',
'%b %d %Y',
'%b %dst %Y',
'%b %dnd %Y',
+ '%b %drd %Y',
'%b %dth %Y',
'%b %dst %Y %I:%M',
'%b %dnd %Y %I:%M',
+ '%b %drd %Y %I:%M',
'%b %dth %Y %I:%M',
'%Y %m %d',
'%Y-%m-%d',
@@ -183,7 +1774,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
def preferredencoding():
@@ -246,6 +1837,12 @@ def write_json_file(obj, fn):
os.unlink(fn)
except OSError:
pass
+ try:
+ mask = os.umask(0)
+ os.umask(mask)
+ os.chmod(tf.name, 0o666 & ~mask)
+ except OSError:
+ pass
os.rename(tf.name, fn)
except Exception:
try:
@@ -545,7 +2142,7 @@ def sanitize_url(url):
return 'http:%s' % url
# Fix some common typos seen so far
COMMON_TYPOS = (
- # https://github.com/rg3/youtube-dl/issues/15649
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
(r'^httpss://', r'https://'),
# https://bx1.be/lives/direct-tv/
(r'^rmtp([es]?)://', r'rtmp\1://'),
@@ -595,7 +2192,7 @@ def _htmlentity_transform(entity_with_semicolon):
numstr = '0%s' % numstr
else:
base = 10
- # See https://github.com/rg3/youtube-dl/issues/7518
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
try:
return compat_chr(int(numstr, base))
except ValueError:
@@ -860,8 +2457,8 @@ class XAttrMetadataError(YoutubeDLError):
self.msg = msg
# Parsing code and msg
- if (self.code in (errno.ENOSPC, errno.EDQUOT) or
- 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG'
@@ -876,7 +2473,7 @@ class XAttrUnavailableError(YoutubeDLError):
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
- # https://github.com/rg3/youtube-dl/issues/6727)
+ # https://github.com/ytdl-org/youtube-dl/issues/6727)
if sys.version_info < (3, 0):
kwargs['strict'] = True
hc = http_class(*args, **compat_kwargs(kwargs))
@@ -1050,7 +2647,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
- # https://github.com/rg3/youtube-dl/issues/6457).
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
if 300 <= resp.code < 400:
location = resp.headers.get('Location')
if location:
@@ -1139,6 +2736,124 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
req, **kwargs)
+class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ """
+ See [1] for cookie file format.
+
+ 1. https://curl.haxx.se/docs/http-cookies.html
+ """
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by youtube-dl. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """
+ Save cookies to a file.
+
+ Most of the code is taken from CPython 3.8 and slightly adapted
+ to support cookie files with UTF-8 in both python 2 and 3.
+ """
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ # Store session cookies with `expires` set to 0 instead of an empty
+ # string
+ for cookie in self:
+ if cookie.expires is None:
+ cookie.expires = 0
+
+ with io.open(filename, 'w', encoding='utf-8') as f:
+ f.write(self._HEADER)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure:
+ secure = 'TRUE'
+ else:
+ secure = 'FALSE'
+ if cookie.domain.startswith('.'):
+ initial_dot = 'TRUE'
+ else:
+ initial_dot = 'FALSE'
+ if cookie.expires is not None:
+ expires = compat_str(cookie.expires)
+ else:
+ expires = ''
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ''
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ '\t'.join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value]) + '\n')
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
+ cf = io.StringIO()
+ with io.open(filename, encoding='utf-8') as f:
+ for line in f:
+ try:
+ cf.write(prepare_line(line))
+ except compat_cookiejar.LoadError as e:
+ write_string(
+ 'WARNING: skipping cookie file entry due to %s: %r\n'
+ % (e, line), sys.stderr)
+ continue
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
+ # Session cookies are denoted by either `expires` field set to
+ # an empty string or 0. MozillaCookieJar only recognizes the former
+ # (see [1]). So we need force the latter to be recognized as session
+ # cookies on our own.
+ # Session cookies may be important for cookies-based authentication,
+ # e.g. usually, when user does not check 'Remember me' check box while
+ # logging in on a site, some important cookies are stored as session
+ # cookies so that not recognizing them will result in failed login.
+ # 1. https://bugs.python.org/issue17164
+ for cookie in self:
+ # Treat `expires=0` cookies as session cookies
+ if cookie.expires == 0:
+ cookie.expires = None
+ cookie.discard = True
+
+
class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
@@ -1146,7 +2861,7 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
def http_response(self, request, response):
# Python 2 will choke on next HTTP request in row if there are non-ASCII
# characters in Set-Cookie HTTP header of last response (see
- # https://github.com/rg3/youtube-dl/issues/6769).
+ # https://github.com/ytdl-org/youtube-dl/issues/6769).
# In order to at least prevent crashing we will percent encode Set-Cookie
# header before HTTPCookieProcessor starts processing it.
# if sys.version_info < (3, 0) and response.headers:
@@ -1163,6 +2878,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
https_response = http_response
+class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+ if sys.version_info[0] < 3:
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ # On python 2 urlh.geturl() may sometimes return redirect URL
+ # as byte string instead of unicode. This workaround allows
+ # to force it always return unicode.
+ return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+
+
def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
@@ -1278,8 +3002,8 @@ def determine_ext(url, default_ext='unknown_video'):
return default_ext
-def subtitles_filename(filename, sub_lang, sub_format):
- return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
+ return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
def date_from_str(date_str):
@@ -1409,8 +3133,8 @@ def _windows_write_string(s, out):
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
- GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+ or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
@@ -1446,8 +3170,8 @@ def write_string(s, out=None, encoding=None):
if _windows_write_string(s, out):
return
- if ('b' in getattr(out, 'mode', '') or
- sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ if ('b' in getattr(out, 'mode', '')
+ or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
byt = s.encode(encoding or preferredencoding(), 'ignore')
out.write(byt)
elif hasattr(out, 'buffer'):
@@ -1754,6 +3478,14 @@ def parse_resolution(s):
return {}
+def parse_bitrate(s):
+ if not isinstance(s, compat_str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
def month_by_name(name, lang='en'):
""" Return the number of a month by (locale-independently) English name """
@@ -1840,7 +3572,7 @@ def urljoin(base, path):
path = path.decode('utf-8')
if not isinstance(path, compat_str) or not path:
return None
- if re.match(r'^(?:https?:)?//', path):
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode('utf-8')
@@ -1870,7 +3602,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
return default
try:
return int(v) * invscale // scale
- except ValueError:
+ except (ValueError, TypeError):
return default
@@ -1880,10 +3612,11 @@ def str_or_none(v, default=None):
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
- if int_str is None:
- return None
- int_str = re.sub(r'[,\.\+]', '', int_str)
- return int(int_str)
+ if isinstance(int_str, compat_integer_types):
+ return int_str
+ elif isinstance(int_str, compat_str):
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int_or_none(int_str)
def float_or_none(v, scale=1, invscale=1, default=None):
@@ -1891,7 +3624,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
return default
try:
return float(v) * invscale / scale
- except ValueError:
+ except (ValueError, TypeError):
return default
@@ -1899,8 +3632,8 @@ def bool_or_none(v, default=None):
return v if isinstance(v, bool) else default
-def strip_or_none(v):
- return None if v is None else v.strip()
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, compat_str) else default
def url_or_none(url):
@@ -2000,7 +3733,7 @@ def get_exe_version(exe, args=['--version'],
try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if youtube-dl is run in the background.
- # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
[encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
@@ -2276,10 +4009,10 @@ def merge_dicts(*dicts):
for k, v in a_dict.items():
if v is None:
continue
- if (k not in merged or
- (isinstance(v, compat_str) and v and
- isinstance(merged[k], compat_str) and
- not merged[k])):
+ if (k not in merged
+ or (isinstance(v, compat_str) and v
+ and isinstance(merged[k], compat_str)
+ and not merged[k])):
merged[k] = v
return merged
@@ -2477,7 +4210,7 @@ def parse_codecs(codecs_str):
vcodec, acodec = None, None
for full_codec in splited_codecs:
codec = full_codec.split('.')[0]
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
if not vcodec:
vcodec = full_codec
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
@@ -2488,13 +4221,8 @@ def parse_codecs(codecs_str):
if not vcodec and not acodec:
if len(splited_codecs) == 2:
return {
- 'vcodec': vcodec,
- 'acodec': acodec,
- }
- elif len(splited_codecs) == 1:
- return {
- 'vcodec': 'none',
- 'acodec': vcodec,
+ 'vcodec': splited_codecs[0],
+ 'acodec': splited_codecs[1],
}
else:
return {
@@ -2605,14 +4333,14 @@ def _match_one(filter_part, dct):
if m:
op = COMPARISON_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
- if (m.group('quotedstrval') is not None or
- m.group('strval') is not None or
+ if (m.group('quotedstrval') is not None
+ or m.group('strval') is not None
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
- # https://github.com/rg3/youtube-dl/issues/11082).
- actual_value is not None and m.group('intval') is not None and
- isinstance(actual_value, compat_str)):
+ # https://github.com/ytdl-org/youtube-dl/issues/11082).
+ or actual_value is not None and m.group('intval') is not None
+ and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
@@ -2940,6 +4668,7 @@ class ISO639Utils(object):
'gv': 'glv',
'ha': 'hau',
'he': 'heb',
+ 'iw': 'heb', # Replaced by he in 1989 revision
'hi': 'hin',
'ho': 'hmo',
'hr': 'hrv',
@@ -2949,6 +4678,7 @@ class ISO639Utils(object):
'hz': 'her',
'ia': 'ina',
'id': 'ind',
+ 'in': 'ind', # Replaced by id in 1989 revision
'ie': 'ile',
'ig': 'ibo',
'ii': 'iii',
@@ -3063,6 +4793,7 @@ class ISO639Utils(object):
'wo': 'wol',
'xh': 'xho',
'yi': 'yid',
+ 'ji': 'yid', # Replaced by yi in 1989 revision
'yo': 'yor',
'za': 'zha',
'zh': 'zho',
@@ -3345,7 +5076,7 @@ class ISO3166Utils(object):
class GeoUtils(object):
# Major IPv4 address blocks per country
_country_ip_map = {
- 'AD': '85.94.160.0/19',
+ 'AD': '46.172.224.0/19',
'AE': '94.200.0.0/13',
'AF': '149.54.0.0/17',
'AG': '209.59.64.0/18',
@@ -3353,28 +5084,30 @@ class GeoUtils(object):
'AL': '46.99.0.0/16',
'AM': '46.70.0.0/15',
'AO': '105.168.0.0/13',
- 'AP': '159.117.192.0/21',
+ 'AP': '182.50.184.0/21',
+ 'AQ': '23.154.160.0/24',
'AR': '181.0.0.0/12',
'AS': '202.70.112.0/20',
- 'AT': '84.112.0.0/13',
+ 'AT': '77.116.0.0/14',
'AU': '1.128.0.0/11',
'AW': '181.41.0.0/18',
- 'AZ': '5.191.0.0/16',
+ 'AX': '185.217.4.0/22',
+ 'AZ': '5.197.0.0/16',
'BA': '31.176.128.0/17',
'BB': '65.48.128.0/17',
'BD': '114.130.0.0/16',
'BE': '57.0.0.0/8',
- 'BF': '129.45.128.0/17',
+ 'BF': '102.178.0.0/15',
'BG': '95.42.0.0/15',
'BH': '37.131.0.0/17',
'BI': '154.117.192.0/18',
'BJ': '137.255.0.0/16',
- 'BL': '192.131.134.0/24',
+ 'BL': '185.212.72.0/23',
'BM': '196.12.64.0/18',
'BN': '156.31.0.0/16',
'BO': '161.56.0.0/16',
'BQ': '161.0.80.0/20',
- 'BR': '152.240.0.0/12',
+ 'BR': '191.128.0.0/12',
'BS': '24.51.64.0/18',
'BT': '119.2.96.0/19',
'BW': '168.167.0.0/16',
@@ -3382,20 +5115,20 @@ class GeoUtils(object):
'BZ': '179.42.192.0/18',
'CA': '99.224.0.0/11',
'CD': '41.243.0.0/16',
- 'CF': '196.32.200.0/21',
- 'CG': '197.214.128.0/17',
+ 'CF': '197.242.176.0/21',
+ 'CG': '160.113.0.0/16',
'CH': '85.0.0.0/13',
- 'CI': '154.232.0.0/14',
+ 'CI': '102.136.0.0/14',
'CK': '202.65.32.0/19',
'CL': '152.172.0.0/14',
- 'CM': '165.210.0.0/15',
+ 'CM': '102.244.0.0/14',
'CN': '36.128.0.0/10',
'CO': '181.240.0.0/12',
'CR': '201.192.0.0/12',
'CU': '152.206.0.0/15',
'CV': '165.90.96.0/19',
'CW': '190.88.128.0/17',
- 'CY': '46.198.0.0/15',
+ 'CY': '31.153.0.0/16',
'CZ': '88.100.0.0/14',
'DE': '53.0.0.0/8',
'DJ': '197.241.0.0/17',
@@ -3412,6 +5145,7 @@ class GeoUtils(object):
'EU': '2.16.0.0/13',
'FI': '91.152.0.0/13',
'FJ': '144.120.0.0/16',
+ 'FK': '80.73.208.0/21',
'FM': '119.252.112.0/20',
'FO': '88.85.32.0/19',
'FR': '90.0.0.0/9',
@@ -3421,8 +5155,8 @@ class GeoUtils(object):
'GE': '31.146.0.0/16',
'GF': '161.22.64.0/18',
'GG': '62.68.160.0/19',
- 'GH': '45.208.0.0/14',
- 'GI': '85.115.128.0/19',
+ 'GH': '154.160.0.0/12',
+ 'GI': '95.164.0.0/16',
'GL': '88.83.0.0/19',
'GM': '160.182.0.0/15',
'GN': '197.149.192.0/18',
@@ -3451,13 +5185,13 @@ class GeoUtils(object):
'JE': '87.244.64.0/18',
'JM': '72.27.0.0/17',
'JO': '176.29.0.0/16',
- 'JP': '126.0.0.0/8',
+ 'JP': '133.0.0.0/8',
'KE': '105.48.0.0/12',
'KG': '158.181.128.0/17',
'KH': '36.37.128.0/17',
'KI': '103.25.140.0/22',
'KM': '197.255.224.0/20',
- 'KN': '198.32.32.0/19',
+ 'KN': '198.167.192.0/19',
'KP': '175.45.176.0/22',
'KR': '175.192.0.0/10',
'KW': '37.36.0.0/14',
@@ -3465,10 +5199,10 @@ class GeoUtils(object):
'KZ': '2.72.0.0/13',
'LA': '115.84.64.0/18',
'LB': '178.135.0.0/16',
- 'LC': '192.147.231.0/24',
+ 'LC': '24.92.144.0/20',
'LI': '82.117.0.0/19',
'LK': '112.134.0.0/15',
- 'LR': '41.86.0.0/19',
+ 'LR': '102.183.0.0/16',
'LS': '129.232.0.0/17',
'LT': '78.56.0.0/13',
'LU': '188.42.0.0/16',
@@ -3493,7 +5227,7 @@ class GeoUtils(object):
'MT': '46.11.0.0/16',
'MU': '105.16.0.0/12',
'MV': '27.114.128.0/18',
- 'MW': '105.234.0.0/16',
+ 'MW': '102.70.0.0/15',
'MX': '187.192.0.0/11',
'MY': '175.136.0.0/13',
'MZ': '197.218.0.0/15',
@@ -3524,23 +5258,23 @@ class GeoUtils(object):
'PW': '202.124.224.0/20',
'PY': '181.120.0.0/14',
'QA': '37.210.0.0/15',
- 'RE': '139.26.0.0/16',
+ 'RE': '102.35.0.0/16',
'RO': '79.112.0.0/13',
- 'RS': '178.220.0.0/14',
+ 'RS': '93.86.0.0/15',
'RU': '5.136.0.0/13',
- 'RW': '105.178.0.0/15',
+ 'RW': '41.186.0.0/16',
'SA': '188.48.0.0/13',
'SB': '202.1.160.0/19',
'SC': '154.192.0.0/11',
- 'SD': '154.96.0.0/13',
+ 'SD': '102.120.0.0/13',
'SE': '78.64.0.0/12',
- 'SG': '152.56.0.0/14',
+ 'SG': '8.128.0.0/10',
'SI': '188.196.0.0/14',
'SK': '78.98.0.0/15',
- 'SL': '197.215.0.0/17',
+ 'SL': '102.143.0.0/17',
'SM': '89.186.32.0/19',
'SN': '41.82.0.0/15',
- 'SO': '197.220.64.0/19',
+ 'SO': '154.115.192.0/18',
'SR': '186.179.128.0/17',
'SS': '105.235.208.0/21',
'ST': '197.159.160.0/19',
@@ -3563,15 +5297,15 @@ class GeoUtils(object):
'TV': '202.2.96.0/19',
'TW': '120.96.0.0/11',
'TZ': '156.156.0.0/14',
- 'UA': '93.72.0.0/13',
- 'UG': '154.224.0.0/13',
- 'US': '3.0.0.0/8',
+ 'UA': '37.52.0.0/14',
+ 'UG': '102.80.0.0/13',
+ 'US': '6.0.0.0/8',
'UY': '167.56.0.0/13',
- 'UZ': '82.215.64.0/18',
+ 'UZ': '84.54.64.0/18',
'VA': '212.77.0.0/19',
- 'VC': '24.92.144.0/20',
+ 'VC': '207.191.240.0/21',
'VE': '186.88.0.0/13',
- 'VG': '172.103.64.0/18',
+ 'VG': '66.81.192.0/20',
'VI': '146.226.0.0/16',
'VN': '14.160.0.0/11',
'VU': '202.80.32.0/20',
@@ -3580,8 +5314,8 @@ class GeoUtils(object):
'YE': '134.35.0.0/16',
'YT': '41.242.116.0/22',
'ZA': '41.0.0.0/11',
- 'ZM': '165.56.0.0/13',
- 'ZW': '41.85.192.0/19',
+ 'ZM': '102.144.0.0/13',
+ 'ZW': '102.177.192.0/18',
}
@classmethod
@@ -3743,6 +5477,19 @@ def decode_packed_codes(code):
obfucasted_code)
+def caesar(s, alphabet, shift):
+ if shift == 0:
+ return s
+ l = len(alphabet)
+ return ''.join(
+ alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
+ for c in s)
+
+
+def rot47(s):
+ return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
+
+
def parse_m3u8_attributes(attrib):
info = {}
for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
@@ -3757,7 +5504,7 @@ def urshift(val, n):
# Based on png2str() written by @gdkchan and improved by @yokrysty
-# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
def decode_png(png_data):
# Reference: https://www.w3.org/TR/PNG/
header = png_data[8:]
@@ -3872,7 +5619,7 @@ def write_xattr(path, key, value):
if hasattr(xattr, 'set'): # pyxattr
# Unicode arguments are not supported in python-pyxattr until
# version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
pyxattr_required_version = '0.5.0'
if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
# TODO: fallback to CLI tools
@@ -3918,9 +5665,9 @@ def write_xattr(path, key, value):
executable = 'xattr'
opts = ['-w', key, value]
- cmd = ([encodeFilename(executable, True)] +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(path, True)])
+ cmd = ([encodeFilename(executable, True)]
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(path, True)])
try:
p = subprocess.Popen(
@@ -3948,8 +5695,12 @@ def write_xattr(path, key, value):
def random_birthday(year_field, month_field, day_field):
+ start_date = datetime.date(1950, 1, 1)
+ end_date = datetime.date(1995, 12, 31)
+ offset = random.randint(0, (end_date - start_date).days)
+ random_date = start_date + datetime.timedelta(offset)
return {
- year_field: str(random.randint(1950, 1995)),
- month_field: str(random.randint(1, 12)),
- day_field: str(random.randint(1, 31)),
+ year_field: str(random_date.year),
+ month_field: str(random_date.month),
+ day_field: str(random_date.day),
}
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 2b3b584a4..17101fa47 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2018.09.18'
+__version__ = '2020.07.28'