From b3013540b41d1eb77c4803c5fca46f8d75b40fc1 Mon Sep 17 00:00:00 2001 From: Jesus Date: Mon, 4 Sep 2023 01:59:36 +0800 Subject: update from upstream --- AUTHORS | 111 +- CONTRIBUTING.md | 89 +- CONTRIBUTORS | 116 +- Changelog.md | 711 ++- Makefile | 7 +- README.md | 315 +- completions/zsh/_hypervideo | 2 +- devscripts/changelog_override.json | 73 + devscripts/changelog_override.schema.json | 96 + devscripts/cli_to_api.py | 48 + devscripts/lazy_load_template.py | 1 + devscripts/make_changelog.py | 510 ++ devscripts/make_lazy_extractors.py | 4 + devscripts/make_readme.py | 22 +- devscripts/utils.py | 13 +- hypervideo_dl/YoutubeDL.py | 908 ++-- hypervideo_dl/__init__.py | 170 +- hypervideo_dl/__pyinstaller/__init__.py | 5 + hypervideo_dl/__pyinstaller/hook-yt_dlp.py | 32 + hypervideo_dl/aes.py | 8 +- hypervideo_dl/cache.py | 16 +- hypervideo_dl/casefold.py | 5 + hypervideo_dl/compat/__init__.py | 19 +- hypervideo_dl/compat/_deprecated.py | 9 +- hypervideo_dl/compat/_legacy.py | 37 +- hypervideo_dl/compat/compat_utils.py | 111 +- hypervideo_dl/compat/types.py | 13 + hypervideo_dl/compat/urllib/__init__.py | 10 + hypervideo_dl/compat/urllib/request.py | 40 + hypervideo_dl/cookies.py | 383 +- hypervideo_dl/dependencies/Cryptodome.py | 38 + hypervideo_dl/dependencies/__init__.py | 83 + hypervideo_dl/downloader/__init__.py | 3 +- hypervideo_dl/downloader/common.py | 44 +- hypervideo_dl/downloader/external.py | 186 +- hypervideo_dl/downloader/f4m.py | 8 +- hypervideo_dl/downloader/fragment.py | 73 +- hypervideo_dl/downloader/hls.py | 81 +- hypervideo_dl/downloader/http.py | 81 +- hypervideo_dl/downloader/ism.py | 4 +- hypervideo_dl/downloader/niconico.py | 98 +- hypervideo_dl/downloader/youtube_live_chat.py | 10 +- hypervideo_dl/extractor/_extractors.py | 282 +- hypervideo_dl/extractor/abc.py | 15 +- hypervideo_dl/extractor/abematv.py | 114 +- hypervideo_dl/extractor/acast.py | 34 +- hypervideo_dl/extractor/adn.py | 18 +- hypervideo_dl/extractor/adobepass.py | 24 +- hypervideo_dl/extractor/adultswim.py | 6 +- hypervideo_dl/extractor/aenetworks.py | 15 +- hypervideo_dl/extractor/aeonco.py | 52 +- hypervideo_dl/extractor/afreecatv.py | 103 +- hypervideo_dl/extractor/airtv.py | 96 + hypervideo_dl/extractor/aitube.py | 60 + hypervideo_dl/extractor/amazon.py | 116 +- hypervideo_dl/extractor/amazonminitv.py | 3 +- hypervideo_dl/extractor/americastestkitchen.py | 78 +- hypervideo_dl/extractor/amp.py | 9 +- hypervideo_dl/extractor/anchorfm.py | 98 + hypervideo_dl/extractor/antenna.py | 143 + hypervideo_dl/extractor/anvato.py | 10 +- hypervideo_dl/extractor/archiveorg.py | 244 +- hypervideo_dl/extractor/ard.py | 63 +- hypervideo_dl/extractor/arte.py | 24 +- hypervideo_dl/extractor/atresplayer.py | 6 +- hypervideo_dl/extractor/bandcamp.py | 48 +- hypervideo_dl/extractor/bbc.py | 14 +- hypervideo_dl/extractor/beatbump.py | 101 + hypervideo_dl/extractor/bfmtv.py | 19 +- hypervideo_dl/extractor/bibeltv.py | 202 +- hypervideo_dl/extractor/bilibili.py | 571 +- hypervideo_dl/extractor/bitchute.py | 7 +- hypervideo_dl/extractor/blerp.py | 167 + hypervideo_dl/extractor/boxcast.py | 102 + hypervideo_dl/extractor/brainpop.py | 318 ++ hypervideo_dl/extractor/bravotv.py | 236 +- hypervideo_dl/extractor/brightcove.py | 12 +- hypervideo_dl/extractor/callin.py | 55 +- hypervideo_dl/extractor/camfm.py | 85 + hypervideo_dl/extractor/cammodels.py | 39 +- hypervideo_dl/extractor/canalplus.py | 2 +- hypervideo_dl/extractor/cbc.py | 192 +- hypervideo_dl/extractor/cbs.py | 113 + hypervideo_dl/extractor/cbsnews.py | 380 +- hypervideo_dl/extractor/cda.py | 47 +- hypervideo_dl/extractor/ceskatelevize.py | 30 +- hypervideo_dl/extractor/chilloutzone.py | 128 +- hypervideo_dl/extractor/cinetecamilano.py | 4 +- hypervideo_dl/extractor/ciscowebex.py | 32 +- hypervideo_dl/extractor/clipchamp.py | 61 + hypervideo_dl/extractor/clyp.py | 43 +- hypervideo_dl/extractor/comedycentral.py | 5 +- hypervideo_dl/extractor/common.py | 392 +- hypervideo_dl/extractor/crackle.py | 4 +- hypervideo_dl/extractor/crtvg.py | 34 + hypervideo_dl/extractor/crunchyroll.py | 693 ++- hypervideo_dl/extractor/cultureunplugged.py | 6 +- hypervideo_dl/extractor/curiositystream.py | 8 +- hypervideo_dl/extractor/dacast.py | 158 + hypervideo_dl/extractor/daftsex.py | 27 +- hypervideo_dl/extractor/dailymotion.py | 6 +- hypervideo_dl/extractor/digitalconcerthall.py | 27 +- hypervideo_dl/extractor/discogs.py | 35 + hypervideo_dl/extractor/discovery.py | 8 +- hypervideo_dl/extractor/dlf.py | 192 + hypervideo_dl/extractor/douyutv.py | 55 +- hypervideo_dl/extractor/dplay.py | 66 +- hypervideo_dl/extractor/dropbox.py | 42 +- hypervideo_dl/extractor/dropout.py | 54 +- hypervideo_dl/extractor/drtv.py | 166 +- hypervideo_dl/extractor/dumpert.py | 49 +- hypervideo_dl/extractor/eagleplatform.py | 6 +- hypervideo_dl/extractor/ebay.py | 36 + hypervideo_dl/extractor/eitb.py | 10 +- hypervideo_dl/extractor/elevensports.py | 59 + hypervideo_dl/extractor/embedly.py | 99 +- hypervideo_dl/extractor/eporner.py | 2 +- hypervideo_dl/extractor/espn.py | 13 +- hypervideo_dl/extractor/ettutv.py | 60 + hypervideo_dl/extractor/europa.py | 84 + hypervideo_dl/extractor/eurosport.py | 28 +- hypervideo_dl/extractor/extractors.py | 6 +- hypervideo_dl/extractor/facebook.py | 114 +- hypervideo_dl/extractor/fc2.py | 6 +- hypervideo_dl/extractor/fifa.py | 23 +- hypervideo_dl/extractor/filmon.py | 14 +- hypervideo_dl/extractor/fox.py | 16 +- hypervideo_dl/extractor/foxnews.py | 77 +- hypervideo_dl/extractor/foxsports.py | 57 +- hypervideo_dl/extractor/freesound.py | 1 + hypervideo_dl/extractor/fujitv.py | 2 +- hypervideo_dl/extractor/funimation.py | 8 +- hypervideo_dl/extractor/funker530.py | 79 + hypervideo_dl/extractor/gamejolt.py | 2 +- hypervideo_dl/extractor/gdcvault.py | 15 +- hypervideo_dl/extractor/generic.py | 411 +- hypervideo_dl/extractor/genius.py | 34 +- hypervideo_dl/extractor/globalplayer.py | 254 + hypervideo_dl/extractor/globo.py | 2 +- hypervideo_dl/extractor/gmanetwork.py | 83 + hypervideo_dl/extractor/googledrive.py | 37 +- hypervideo_dl/extractor/goplay.py | 6 +- hypervideo_dl/extractor/gronkh.py | 14 +- hypervideo_dl/extractor/hidive.py | 51 +- hypervideo_dl/extractor/hketv.py | 2 +- hypervideo_dl/extractor/hollywoodreporter.py | 72 + hypervideo_dl/extractor/hotnewhiphop.py | 14 +- hypervideo_dl/extractor/hotstar.py | 138 +- hypervideo_dl/extractor/hrefli.py | 15 + hypervideo_dl/extractor/hrti.py | 10 +- hypervideo_dl/extractor/hungama.py | 109 +- hypervideo_dl/extractor/huya.py | 9 +- hypervideo_dl/extractor/hypergryph.py | 32 + hypervideo_dl/extractor/idolplus.py | 115 + hypervideo_dl/extractor/ign.py | 334 +- hypervideo_dl/extractor/imggaming.py | 6 +- hypervideo_dl/extractor/instagram.py | 6 +- hypervideo_dl/extractor/iprima.py | 41 +- hypervideo_dl/extractor/iqiyi.py | 35 +- hypervideo_dl/extractor/ivi.py | 24 +- hypervideo_dl/extractor/iwara.py | 413 +- hypervideo_dl/extractor/joj.py | 26 +- hypervideo_dl/extractor/jstream.py | 73 + hypervideo_dl/extractor/jwplatform.py | 37 +- hypervideo_dl/extractor/kakao.py | 6 +- hypervideo_dl/extractor/kankanews.py | 48 + hypervideo_dl/extractor/kick.py | 126 + hypervideo_dl/extractor/kommunetv.py | 31 + hypervideo_dl/extractor/kuwo.py | 2 +- hypervideo_dl/extractor/la7.py | 61 +- hypervideo_dl/extractor/lastfm.py | 43 +- hypervideo_dl/extractor/lbry.py | 140 +- hypervideo_dl/extractor/lecturio.py | 2 +- hypervideo_dl/extractor/lefigaro.py | 135 + hypervideo_dl/extractor/lego.py | 4 +- hypervideo_dl/extractor/limelight.py | 6 +- hypervideo_dl/extractor/linuxacademy.py | 20 +- hypervideo_dl/extractor/litv.py | 4 +- hypervideo_dl/extractor/livestream.py | 97 +- hypervideo_dl/extractor/lumni.py | 24 + hypervideo_dl/extractor/magellantv.py | 50 + hypervideo_dl/extractor/mailru.py | 8 +- hypervideo_dl/extractor/medaltv.py | 23 +- hypervideo_dl/extractor/mediaite.py | 18 +- hypervideo_dl/extractor/mediaset.py | 205 +- hypervideo_dl/extractor/mediasite.py | 2 +- hypervideo_dl/extractor/mediastream.py | 208 + hypervideo_dl/extractor/megatvcom.py | 6 +- hypervideo_dl/extractor/mgtv.py | 67 +- hypervideo_dl/extractor/minds.py | 2 +- hypervideo_dl/extractor/miomio.py | 10 +- hypervideo_dl/extractor/mixch.py | 10 +- hypervideo_dl/extractor/motherless.py | 223 +- hypervideo_dl/extractor/moviepilot.py | 53 +- hypervideo_dl/extractor/mtv.py | 11 +- hypervideo_dl/extractor/museai.py | 112 + hypervideo_dl/extractor/myvideoge.py | 68 +- hypervideo_dl/extractor/mzaalo.py | 95 + hypervideo_dl/extractor/naver.py | 35 +- hypervideo_dl/extractor/nbc.py | 288 +- hypervideo_dl/extractor/nebula.py | 155 +- hypervideo_dl/extractor/nekohacker.py | 217 + hypervideo_dl/extractor/neteasemusic.py | 6 +- hypervideo_dl/extractor/netverse.py | 115 +- hypervideo_dl/extractor/nfl.py | 148 +- hypervideo_dl/extractor/nhk.py | 249 +- hypervideo_dl/extractor/niconico.py | 268 +- hypervideo_dl/extractor/ninenow.py | 2 +- hypervideo_dl/extractor/nitter.py | 124 +- hypervideo_dl/extractor/njpwworld.py | 2 +- hypervideo_dl/extractor/noice.py | 116 + hypervideo_dl/extractor/noodlemagazine.py | 31 +- hypervideo_dl/extractor/nosnl.py | 34 +- hypervideo_dl/extractor/nosvideo.py | 6 +- hypervideo_dl/extractor/nowness.py | 8 +- hypervideo_dl/extractor/npo.py | 314 +- hypervideo_dl/extractor/nrk.py | 5 +- hypervideo_dl/extractor/ntvru.py | 13 + hypervideo_dl/extractor/nubilesporn.py | 99 + hypervideo_dl/extractor/nzonscreen.py | 93 + hypervideo_dl/extractor/odkmedia.py | 105 + hypervideo_dl/extractor/odnoklassniki.py | 85 +- hypervideo_dl/extractor/oneplace.py | 43 + hypervideo_dl/extractor/opencast.py | 41 +- hypervideo_dl/extractor/orf.py | 2 +- hypervideo_dl/extractor/owncloud.py | 80 + hypervideo_dl/extractor/packtpub.py | 11 +- hypervideo_dl/extractor/panopto.py | 4 +- hypervideo_dl/extractor/parler.py | 94 +- hypervideo_dl/extractor/patreon.py | 18 +- hypervideo_dl/extractor/pbs.py | 59 + hypervideo_dl/extractor/peekvids.py | 190 +- hypervideo_dl/extractor/peloton.py | 12 +- hypervideo_dl/extractor/pgatour.py | 47 + hypervideo_dl/extractor/piapro.py | 21 +- hypervideo_dl/extractor/picarto.py | 56 +- hypervideo_dl/extractor/piksel.py | 16 +- hypervideo_dl/extractor/pinterest.py | 153 +- hypervideo_dl/extractor/pladform.py | 2 +- hypervideo_dl/extractor/platzi.py | 2 +- hypervideo_dl/extractor/playplustv.py | 14 +- hypervideo_dl/extractor/playsuisse.py | 88 +- hypervideo_dl/extractor/plutotv.py | 13 +- hypervideo_dl/extractor/polskieradio.py | 399 +- hypervideo_dl/extractor/porn91.py | 89 +- hypervideo_dl/extractor/pornez.py | 63 +- hypervideo_dl/extractor/pornhub.py | 23 +- hypervideo_dl/extractor/pr0gramm.py | 97 + hypervideo_dl/extractor/prankcast.py | 6 +- hypervideo_dl/extractor/puhutv.py | 8 +- hypervideo_dl/extractor/qdance.py | 150 + hypervideo_dl/extractor/radiko.py | 31 +- hypervideo_dl/extractor/radiocanada.py | 6 +- hypervideo_dl/extractor/rai.py | 565 +- hypervideo_dl/extractor/rbgtum.py | 93 + hypervideo_dl/extractor/rcs.py | 402 +- hypervideo_dl/extractor/rcti.py | 4 +- hypervideo_dl/extractor/recurbate.py | 42 + hypervideo_dl/extractor/redbulltv.py | 6 +- hypervideo_dl/extractor/reddit.py | 141 +- hypervideo_dl/extractor/redgifs.py | 4 +- hypervideo_dl/extractor/regiotv.py | 10 +- hypervideo_dl/extractor/rheinmaintv.py | 94 + hypervideo_dl/extractor/rokfin.py | 54 +- hypervideo_dl/extractor/roosterteeth.py | 10 +- hypervideo_dl/extractor/rottentomatoes.py | 80 +- hypervideo_dl/extractor/rozhlas.py | 296 ++ hypervideo_dl/extractor/rte.py | 6 +- hypervideo_dl/extractor/rts.py | 4 +- hypervideo_dl/extractor/rtvcplay.py | 285 + hypervideo_dl/extractor/rumble.py | 168 +- hypervideo_dl/extractor/rutube.py | 63 +- hypervideo_dl/extractor/s4c.py | 62 + hypervideo_dl/extractor/safari.py | 6 +- hypervideo_dl/extractor/sbs.py | 109 +- hypervideo_dl/extractor/scrippsnetworks.py | 1 + hypervideo_dl/extractor/senalcolombia.py | 31 + hypervideo_dl/extractor/servus.py | 169 +- hypervideo_dl/extractor/sevenplus.py | 10 +- hypervideo_dl/extractor/shahid.py | 8 +- hypervideo_dl/extractor/shemaroome.py | 5 +- hypervideo_dl/extractor/sibnet.py | 17 + hypervideo_dl/extractor/sina.py | 10 +- hypervideo_dl/extractor/sixplay.py | 2 +- hypervideo_dl/extractor/slideslive.py | 566 +- hypervideo_dl/extractor/sonyliv.py | 16 +- hypervideo_dl/extractor/soundcloud.py | 38 +- hypervideo_dl/extractor/spankbang.py | 7 +- hypervideo_dl/extractor/sportdeutschland.py | 191 +- hypervideo_dl/extractor/stacommu.py | 148 + hypervideo_dl/extractor/stageplus.py | 515 ++ hypervideo_dl/extractor/stripchat.py | 16 +- hypervideo_dl/extractor/stv.py | 2 + hypervideo_dl/extractor/substack.py | 8 +- hypervideo_dl/extractor/sverigesradio.py | 62 +- hypervideo_dl/extractor/svt.py | 61 +- hypervideo_dl/extractor/tagesschau.py | 58 +- hypervideo_dl/extractor/tbsjp.py | 152 + hypervideo_dl/extractor/teachable.py | 2 +- hypervideo_dl/extractor/teamcoco.py | 337 +- hypervideo_dl/extractor/telecaribe.py | 91 + hypervideo_dl/extractor/telemundo.py | 9 +- hypervideo_dl/extractor/tempo.py | 119 +- hypervideo_dl/extractor/tencent.py | 102 +- hypervideo_dl/extractor/tennistv.py | 2 +- hypervideo_dl/extractor/tenplay.py | 9 +- hypervideo_dl/extractor/testurl.py | 19 +- hypervideo_dl/extractor/tf1.py | 19 + hypervideo_dl/extractor/tfo.py | 8 +- hypervideo_dl/extractor/theplatform.py | 30 +- hypervideo_dl/extractor/thesun.py | 13 +- hypervideo_dl/extractor/thisoldhouse.py | 4 +- hypervideo_dl/extractor/thisvid.py | 226 + hypervideo_dl/extractor/threeqsdn.py | 4 +- hypervideo_dl/extractor/tiktok.py | 526 +- hypervideo_dl/extractor/tnaflix.py | 27 +- hypervideo_dl/extractor/toutv.py | 6 +- hypervideo_dl/extractor/triller.py | 315 +- hypervideo_dl/extractor/trtcocuk.py | 48 + hypervideo_dl/extractor/trueid.py | 6 +- hypervideo_dl/extractor/tubetugraz.py | 27 +- hypervideo_dl/extractor/tubitv.py | 8 +- hypervideo_dl/extractor/tumblr.py | 2 +- hypervideo_dl/extractor/tunein.py | 280 +- hypervideo_dl/extractor/tv2.py | 10 +- hypervideo_dl/extractor/tv4.py | 77 +- hypervideo_dl/extractor/tvp.py | 130 +- hypervideo_dl/extractor/tvplay.py | 223 +- hypervideo_dl/extractor/tvplayer.py | 10 +- hypervideo_dl/extractor/twitcasting.py | 38 +- hypervideo_dl/extractor/twitch.py | 81 +- hypervideo_dl/extractor/twitter.py | 720 ++- hypervideo_dl/extractor/txxx.py | 418 ++ hypervideo_dl/extractor/udemy.py | 35 +- hypervideo_dl/extractor/unsupported.py | 34 + hypervideo_dl/extractor/uplynk.py | 80 +- hypervideo_dl/extractor/urplay.py | 53 +- hypervideo_dl/extractor/vevo.py | 10 +- hypervideo_dl/extractor/vice.py | 10 +- hypervideo_dl/extractor/videa.py | 2 +- hypervideo_dl/extractor/videocampus_sachsen.py | 4 +- hypervideo_dl/extractor/videoken.py | 336 ++ hypervideo_dl/extractor/vidlii.py | 3 +- hypervideo_dl/extractor/viewlift.py | 6 +- hypervideo_dl/extractor/viidea.py | 6 +- hypervideo_dl/extractor/vimeo.py | 102 +- hypervideo_dl/extractor/viu.py | 148 +- hypervideo_dl/extractor/vk.py | 331 +- hypervideo_dl/extractor/vocaroo.py | 63 + hypervideo_dl/extractor/vodlocker.py | 12 +- hypervideo_dl/extractor/volejtv.py | 40 + hypervideo_dl/extractor/voot.py | 183 +- hypervideo_dl/extractor/vrt.py | 415 +- hypervideo_dl/extractor/vrv.py | 9 +- hypervideo_dl/extractor/vshare.py | 2 +- hypervideo_dl/extractor/vzaar.py | 2 +- hypervideo_dl/extractor/wat.py | 14 +- hypervideo_dl/extractor/webcamerapl.py | 44 + hypervideo_dl/extractor/weibo.py | 2 +- hypervideo_dl/extractor/weverse.py | 608 +++ hypervideo_dl/extractor/wevidi.py | 108 + hypervideo_dl/extractor/weyyak.py | 86 + hypervideo_dl/extractor/whyp.py | 50 + hypervideo_dl/extractor/wimbledon.py | 61 + hypervideo_dl/extractor/wistia.py | 45 +- hypervideo_dl/extractor/wrestleuniverse.py | 307 ++ hypervideo_dl/extractor/wykop.py | 268 + hypervideo_dl/extractor/xanimu.py | 51 + hypervideo_dl/extractor/xhamster.py | 10 +- hypervideo_dl/extractor/ximalaya.py | 13 +- hypervideo_dl/extractor/xtube.py | 4 +- hypervideo_dl/extractor/xvideos.py | 21 + hypervideo_dl/extractor/yahoo.py | 117 - hypervideo_dl/extractor/yandexvideo.py | 4 +- hypervideo_dl/extractor/yappy.py | 127 + hypervideo_dl/extractor/yesjapan.py | 9 +- hypervideo_dl/extractor/yle_areena.py | 127 +- hypervideo_dl/extractor/youku.py | 80 +- hypervideo_dl/extractor/youporn.py | 35 +- hypervideo_dl/extractor/youtube.py | 1697 +++--- hypervideo_dl/extractor/zaiko.py | 130 + hypervideo_dl/extractor/zattoo.py | 5 +- hypervideo_dl/extractor/zdf.py | 31 +- hypervideo_dl/extractor/zee5.py | 34 +- hypervideo_dl/extractor/zingmp3.py | 101 +- hypervideo_dl/extractor/zoom.py | 108 +- hypervideo_dl/extractor/zype.py | 6 +- hypervideo_dl/jsinterp.py | 62 +- hypervideo_dl/networking/__init__.py | 13 + hypervideo_dl/networking/_helper.py | 208 + hypervideo_dl/networking/_urllib.py | 454 ++ hypervideo_dl/networking/common.py | 564 ++ hypervideo_dl/networking/exceptions.py | 217 + hypervideo_dl/options.py | 271 +- hypervideo_dl/plugins.py | 173 + hypervideo_dl/postprocessor/__init__.py | 5 +- hypervideo_dl/postprocessor/common.py | 13 +- hypervideo_dl/postprocessor/embedthumbnail.py | 6 +- hypervideo_dl/postprocessor/ffmpeg.py | 29 +- hypervideo_dl/postprocessor/metadataparser.py | 4 +- hypervideo_dl/postprocessor/modify_chapters.py | 1 + hypervideo_dl/utils/__init__.py | 10 + hypervideo_dl/utils/_deprecated.py | 39 + hypervideo_dl/utils/_legacy.py | 242 + hypervideo_dl/utils/_utils.py | 5484 ++++++++++++++++++++ hypervideo_dl/utils/networking.py | 163 + hypervideo_dl/utils/traversal.py | 254 + hypervideo_dl/version.py | 6 +- setup.cfg | 4 +- setup.py | 31 +- test/conftest.py | 21 + test/helper.py | 4 +- test/test_InfoExtractor.py | 128 +- test/test_YoutubeDL.py | 168 +- test/test_YoutubeDLCookieJar.py | 24 +- test/test_aes.py | 6 +- test/test_age_restriction.py | 19 +- test/test_compat.py | 9 +- test/test_config.py | 227 + test/test_cookies.py | 18 +- test/test_download.py | 9 +- test/test_downloader_external.py | 139 + test/test_downloader_http.py | 12 +- test/test_networking.py | 1439 +++++ test/test_networking_utils.py | 282 + test/test_plugins.py | 73 + test/test_socks.py | 521 +- test/test_utils.py | 363 +- test/testdata/yt_dlp_plugins/extractor/_ignore.py | 5 + test/testdata/yt_dlp_plugins/extractor/ignore.py | 12 + test/testdata/yt_dlp_plugins/extractor/normal.py | 9 + .../yt_dlp_plugins/postprocessor/normal.py | 5 + .../yt_dlp_plugins/extractor/zipped.py | 5 + .../yt_dlp_plugins/postprocessor/zipped.py | 5 + 434 files changed, 39163 insertions(+), 8062 deletions(-) create mode 100644 devscripts/changelog_override.json create mode 100644 devscripts/changelog_override.schema.json create mode 100644 devscripts/cli_to_api.py create mode 100644 devscripts/make_changelog.py create mode 100644 hypervideo_dl/__pyinstaller/__init__.py create mode 100644 hypervideo_dl/__pyinstaller/hook-yt_dlp.py create mode 100644 hypervideo_dl/casefold.py create mode 100644 hypervideo_dl/compat/types.py create mode 100644 hypervideo_dl/compat/urllib/__init__.py create mode 100644 hypervideo_dl/compat/urllib/request.py create mode 100644 hypervideo_dl/dependencies/Cryptodome.py create mode 100644 hypervideo_dl/dependencies/__init__.py create mode 100644 hypervideo_dl/extractor/airtv.py create mode 100644 hypervideo_dl/extractor/aitube.py create mode 100644 hypervideo_dl/extractor/anchorfm.py create mode 100644 hypervideo_dl/extractor/antenna.py create mode 100644 hypervideo_dl/extractor/beatbump.py create mode 100644 hypervideo_dl/extractor/blerp.py create mode 100644 hypervideo_dl/extractor/boxcast.py create mode 100644 hypervideo_dl/extractor/brainpop.py create mode 100644 hypervideo_dl/extractor/camfm.py create mode 100644 hypervideo_dl/extractor/clipchamp.py create mode 100644 hypervideo_dl/extractor/crtvg.py create mode 100644 hypervideo_dl/extractor/dacast.py create mode 100644 hypervideo_dl/extractor/discogs.py create mode 100644 hypervideo_dl/extractor/dlf.py create mode 100644 hypervideo_dl/extractor/ebay.py create mode 100644 hypervideo_dl/extractor/elevensports.py create mode 100644 hypervideo_dl/extractor/ettutv.py create mode 100644 hypervideo_dl/extractor/funker530.py create mode 100644 hypervideo_dl/extractor/globalplayer.py create mode 100644 hypervideo_dl/extractor/gmanetwork.py create mode 100644 hypervideo_dl/extractor/hollywoodreporter.py create mode 100644 hypervideo_dl/extractor/hrefli.py create mode 100644 hypervideo_dl/extractor/hypergryph.py create mode 100644 hypervideo_dl/extractor/idolplus.py create mode 100644 hypervideo_dl/extractor/jstream.py create mode 100644 hypervideo_dl/extractor/kankanews.py create mode 100644 hypervideo_dl/extractor/kick.py create mode 100644 hypervideo_dl/extractor/kommunetv.py create mode 100644 hypervideo_dl/extractor/lefigaro.py create mode 100644 hypervideo_dl/extractor/lumni.py create mode 100644 hypervideo_dl/extractor/magellantv.py create mode 100644 hypervideo_dl/extractor/mediastream.py create mode 100644 hypervideo_dl/extractor/museai.py create mode 100644 hypervideo_dl/extractor/mzaalo.py create mode 100644 hypervideo_dl/extractor/nekohacker.py create mode 100644 hypervideo_dl/extractor/noice.py create mode 100644 hypervideo_dl/extractor/nubilesporn.py create mode 100644 hypervideo_dl/extractor/nzonscreen.py create mode 100644 hypervideo_dl/extractor/odkmedia.py create mode 100644 hypervideo_dl/extractor/oneplace.py create mode 100644 hypervideo_dl/extractor/owncloud.py create mode 100644 hypervideo_dl/extractor/pgatour.py create mode 100644 hypervideo_dl/extractor/pr0gramm.py create mode 100644 hypervideo_dl/extractor/qdance.py create mode 100644 hypervideo_dl/extractor/rbgtum.py create mode 100644 hypervideo_dl/extractor/recurbate.py create mode 100644 hypervideo_dl/extractor/rheinmaintv.py create mode 100644 hypervideo_dl/extractor/rtvcplay.py create mode 100644 hypervideo_dl/extractor/s4c.py create mode 100644 hypervideo_dl/extractor/senalcolombia.py create mode 100644 hypervideo_dl/extractor/sibnet.py create mode 100644 hypervideo_dl/extractor/stacommu.py create mode 100644 hypervideo_dl/extractor/stageplus.py create mode 100644 hypervideo_dl/extractor/tbsjp.py create mode 100644 hypervideo_dl/extractor/telecaribe.py create mode 100644 hypervideo_dl/extractor/thisvid.py create mode 100644 hypervideo_dl/extractor/trtcocuk.py create mode 100644 hypervideo_dl/extractor/txxx.py create mode 100644 hypervideo_dl/extractor/videoken.py create mode 100644 hypervideo_dl/extractor/vocaroo.py create mode 100644 hypervideo_dl/extractor/volejtv.py create mode 100644 hypervideo_dl/extractor/webcamerapl.py create mode 100644 hypervideo_dl/extractor/weverse.py create mode 100644 hypervideo_dl/extractor/wevidi.py create mode 100644 hypervideo_dl/extractor/weyyak.py create mode 100644 hypervideo_dl/extractor/whyp.py create mode 100644 hypervideo_dl/extractor/wimbledon.py create mode 100644 hypervideo_dl/extractor/wrestleuniverse.py create mode 100644 hypervideo_dl/extractor/wykop.py create mode 100644 hypervideo_dl/extractor/xanimu.py create mode 100644 hypervideo_dl/extractor/yappy.py create mode 100644 hypervideo_dl/extractor/zaiko.py create mode 100644 hypervideo_dl/networking/__init__.py create mode 100644 hypervideo_dl/networking/_helper.py create mode 100644 hypervideo_dl/networking/_urllib.py create mode 100644 hypervideo_dl/networking/common.py create mode 100644 hypervideo_dl/networking/exceptions.py create mode 100644 hypervideo_dl/plugins.py create mode 100644 hypervideo_dl/utils/__init__.py create mode 100644 hypervideo_dl/utils/_deprecated.py create mode 100644 hypervideo_dl/utils/_legacy.py create mode 100644 hypervideo_dl/utils/_utils.py create mode 100644 hypervideo_dl/utils/networking.py create mode 100644 hypervideo_dl/utils/traversal.py create mode 100644 test/conftest.py create mode 100644 test/test_config.py create mode 100644 test/test_downloader_external.py create mode 100644 test/test_networking.py create mode 100644 test/test_networking_utils.py create mode 100644 test/test_plugins.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/_ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/normal.py create mode 100644 test/testdata/yt_dlp_plugins/postprocessor/normal.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py diff --git a/AUTHORS b/AUTHORS index 8dafe32..374a1b1 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,6 +16,7 @@ Aaron Brager Aaron Lipinski Aaron Wojnowski Aaron Zeng +Aaruni Kaushik Abdullah Ibn Fulan Abhishek Kedia Abubukker Chaudhary @@ -41,7 +42,10 @@ Aleri Kaisattera Ales Jirasek Alessandro Ghedini Alex +Alex Berg +Alex Ionescu Alex Karabanov +Alex Klapheke Alex Merkel Alex Monk Alex Seiler @@ -60,7 +64,9 @@ Ali Sherief Allan Daemon Allan Zhou Alpesh Valia +Aman Salwan Amaury Gauthier +Amirreza Aflakparast Amish Bhadeshia Anand Babu Periasamy Anant Murmu @@ -126,7 +132,9 @@ Ben Rog-Wilhelm Ben Welsh Benedikt Wildenhain Benjamin Congdon +Benjamin Ryan Bepis +Berkan Teber Bernhard M. Wiedemann Bjorn Heesakkers BlahGeek @@ -143,6 +151,7 @@ Bricio Bruno Guerreiro BunnyHelp Burve +ByteDream CHJ85 CXwudi Camillo Dell'mour @@ -155,6 +164,7 @@ Charlie Le ChillingPepper Ching Yi, Chan Chirantan Ekbote +Chris Caruso Chris Gavin Chris Hranj Chris Lamb @@ -162,6 +172,7 @@ Christian Albrecht Christian Paul Christian Pointner Christoph Döpmann +Christoph Flathmann Christoph Moench-Tegeder Christopher Krooss Christopher Neugebauer @@ -175,14 +186,17 @@ Conner Corey Farwell Corey Nicholson Cory Hall +CoryTibbettsDev Costy Petrisor CplPwnies Craig Markwardt CrankDatSouljaBoy CrypticSignal CyberJacob +Cyberes Cyril Roelandt Cássio Ávila +D0LLYNH0 DEvmIb DaMightyZombie Daan van Vugt @@ -197,7 +211,9 @@ Daniel Bolton Daniel Höpfl Daniel Lindholm Daniel Peukert +Daniel Rich Daniel Twardowski +Daniel Vogt Daniel.Zeng Danko Alexeyev Dankryn @@ -205,6 +221,7 @@ Dao Hoang Son Dario Guarascio DarkZeros DarkstaIkers +DataGhost Dave Dave Loyall Dave Vasilevsky @@ -220,9 +237,12 @@ David Powell David Rabinowitz David Skrundz David Triendl +David Turner David Wagner +Davin Kevin Deer-Spangle Delon +Denis DepFA Derek Land DesweR @@ -266,6 +286,7 @@ Erik Erik Johnson Erwin de Haan Evan Spensley +Eveldee FND Fabi019 Fabian Stahl @@ -280,16 +301,22 @@ Filip B Filip Hedman Filippo Valsorda Finn Petersen +Finn R. Gärtner FireDart FliegendeWurst +Florian Albrechtskirchinger FooBarQuaxx Founder Fang Francesco Frassinelli Francois du Toit +Franklin Lee Frans de Jonge François Charlier François Revol Frederic Bournival +Frederik Nordahl Jul Sabroe +Friedrich Rehren +GD-Slime GDR! Gabriel Schubiner Gaetan Gilbert @@ -310,11 +337,13 @@ Giedrius Statkevičius Gilles Pietri Gino Lisignoli Giovanni Visentini +Giulio Muscarello Giuseppe Fabiano Gjorgji Jankovski Glenn Slayden Gorfiend Grabien +Greg Sadetsky GreyAlien502 Grom PE Grzegorz P @@ -349,8 +378,10 @@ Itachi Itay Brandes Iulian Onofrei Ivan Kozik +Ivan Skodje J J.D. Purcell +JC-Chung JChris246 Jacek Nowacki Jack Danger Canty @@ -373,6 +404,7 @@ Jan Schär Janez Troha Jason Normore Jason Terk +Jasper Rebane Jay Jeff Buchbinder Jeff Crouse @@ -383,11 +415,13 @@ Jelle van der Waa Jens Rutschmann Jens Timmerman Jens Wille +Jeong, Heon Jeremie J. Jarosh Jeroen Jacobs Jertzukka Jesse Jesse de Zwart +Jesus Jesús Jia Rong Yee JianxinLi @@ -412,6 +446,7 @@ Johny Mo Swag Joost Verdoorn Joram Schrijver Jordan Weatherby +Jorge Joseph Frazier Joseph Spiros Josh Soref @@ -452,10 +487,12 @@ KiberInfinity Kid Kieran O'Reilly Kitten King +Kurt Bestor Kyle Kyle Anthony Williams Kyu Yeun Kim LE +LXYan2333 Laneone LangerJan Lapinot @@ -464,6 +501,7 @@ Lauren Liberda Laurent Raufaste Leonardo Amaral Leonardo Taccari +LeoniePhiline Leslie P. Polzer Lesmiscore Li4ick @@ -474,6 +512,7 @@ Locke Logan B Logan Fleur Lovius +LowSuggestion912 Luc Ritchie Luca Cherubin Luca Steeb @@ -490,6 +529,7 @@ MAA MMM MRWITEK Magnus Kolstad +Mahmoud Abdel-Fattah Malte Kiefer Mamay Alexander Mantas Mikulėnas @@ -499,6 +539,7 @@ Marcin Cieślak Marco Fantauzzo Marco Ferragina Marco Schuster +Marek Hudik Marek Rusinowski Marenga Marian Sigler @@ -513,6 +554,7 @@ Martin Trigaux Martin Weinelt Marvin Ewald Marwen Dallel +Master Matej Dujava Mathias Rav Mats @@ -524,6 +566,7 @@ Matthew Rayfield Matthieu Muffato Mattias Harrysson Mattias Wadman +Matumo Matěj Cepl Max Max Mehl @@ -553,12 +596,15 @@ Misael Aguayo Mister Hat Mitsukarenai MobiDotS +Mohamed Al Mehairbi Mohamedh Fazal Mohammad Khaled AbouElSherbini Mohammad Teimori Pabandi Mohammed Yaseen Mowzer +Mohit Tokas Moises Lima Moritz Patelscheck +Mozi MrDoritos MrOctopus MrRawes @@ -567,12 +613,17 @@ Muratcan Simsek N1k145 NRTICN Naglis Jonaitis +Nam Vu Namnamseo Nathan Rossi +Nathan Touzé Nehal Patel NeroBurner +Neurognostic Nevar Angelo +Nicholas Defranco Nick Daniels +Nicolai Dagestad Nicolas Kaiser Nicolas SAPA Nicolas Évrard @@ -585,6 +636,8 @@ Nitish Kumar Noah NotFound OHaiiBuzzle +OIRNOIR +OMEGA_RAZER Odd Stråbø OhMyBahGosh Ole Ernst @@ -592,6 +645,8 @@ Oleg Prutz Oli Allen Oliver Freyermuth Olivier Bilodeau +Omar Atef +OndrejBakan Ondřej Bárta Ondřej Caletka Ori Avtalion @@ -599,6 +654,7 @@ Orn Osama Khalid Oskar Cieslik Oskar Jauch +OverlordQ P-reducible PB PC @@ -652,22 +708,26 @@ Quan Hua Quentin Rameau RPing Rafal Borczuch +Rajeshwaran Ralf Haring Random User Raphael Michel Rasmus Rendal Rastislav Barlik Ray Douglass +RedDeffender Remita Amine Reto Kromer Reventl0v RexYuan +RfadnjdExt RiCON Ricardo Ricardo Constantino Ricardo Garcia Richard Clamp Richard Gibson +RjY Rob Rob van Bekkum Robert Geislinger @@ -718,8 +778,10 @@ Shaun Walbridge Shaya G Shreyas Minocha Shrimadhav U K +Siddhartha Sahu Sidney de Koning Silvan Mosberger +Simon Simon Morgan Simon Sawicki Simon W. Jackson @@ -739,8 +801,10 @@ Stefan Lobbenmeier Stefan Pöschel Stefan-Gabriel Muscalu Steffan Donal +Stel Abrego Stephan Stephen Stair +Steve Steven Gosseling Steven Maude Sukhbir Singh @@ -791,24 +855,29 @@ Toni Viemerö TotalCaesar659 Trevor Nelson Tristan Waddington +TxI5 Tyler Szabo Unit 193 Unknown Urgau Varun Vasyl' Vavrychuk +Venkata Krishna S Vid VietTPham Vignesh Venkat Vijay Singh Viktor Szakats Viren Rajput +Vita Vitaliy Syrchikov Vitaly Khabarov +Vladislav Vobe Vrihub Vukkk Vítor Galvão +Văn Anh Wandang Wang Jun Tham WassimAttar @@ -821,6 +890,7 @@ Witold Baryluk WolfganP Xaver Hellauer Xiao Di Guan +Xiao Han Xie Yanbo Xu Cheng Xuan Hu (Sean) @@ -839,8 +909,10 @@ Zach Bruggeman Zack Fernandes Zenon Mousmoulas Zhong Jianxin +Zhong Lufan Zhymabek Roman Zirro +Zprokkel aarubui adamanldo aegamesi @@ -864,10 +936,12 @@ aviperes axelerometer aystroganov@gmail.com azeem +barsnick bashonly bastik bato3 beefchop +bepvte bitraid biwubo blissland @@ -877,6 +951,7 @@ bpfoley bsun0000 bubbleguuum bzc6p +c-basalt ca-za cant-think-of-a-name cantandwont @@ -950,11 +1025,13 @@ fluks fnord foghawk forDream +foreignBlade frenchy1983 ftk funniray gam2046 gamer191 +garret gcmalloc gdzx geauxlo @@ -970,6 +1047,7 @@ h-collector ha shao hakatashi haobinliang +hasezoey hassaanaliw hcwhan hdclark @@ -977,6 +1055,7 @@ hedii helb hh0rva1h hmlinaric +hoaluvn hojel hrimfaxi hseg @@ -986,6 +1065,7 @@ huohuarong hurda i6t ian +ifan-t igv inondle insaneracist @@ -1005,12 +1085,14 @@ jfogelman jhwgh1968 jjatria jnozsc +jo-nike joehillen jomo josanabr julien jxu k3ns1n +kangalio kaspi kayb94 kaz-us @@ -1028,12 +1110,14 @@ knapior kr4ssi krichbanana kurumigi -lauren +lauren n. liberda lazypete365 light94 lightmare linhua55 +linsui lkho +lkw123 llamasblade llyyr logon84 @@ -1060,6 +1144,7 @@ mehq mexican porn commits midas02 migbac +milkknife minusf miseran mjdubell @@ -1070,10 +1155,13 @@ motophil mpeter50 mrBliss mrkrossxdx +mrscrapy mrtnmtth mtilbury +mushbite mutantmonkey mzbaulhaque +mzhou nawl nemunaire net @@ -1086,6 +1174,7 @@ nikhil nixxo nmeum nmrugg +nnoboa nomevi nosoop nto @@ -1101,10 +1190,12 @@ opusforlife2 oteng ouwou ovitei +oxamun ozburo pachacamac panatexxa patrickslin +permunkle peugeot pgaig phaer @@ -1117,11 +1208,15 @@ pingtux piplongrun pishposhmcgee plroman +pmitchell86 +puc9 pukkandan pulpe pyed pypy +qbnu quinlander +qulaz quyleanh raleeper rand-net @@ -1131,10 +1226,12 @@ reddraggone9 reiv remis renalid +rexlambert22 rhhayward rhsmachine rigstot riking +ringus1 rmanola robbie robin @@ -1154,6 +1251,7 @@ sceext schn0sch schnusch scil +sepro sh!zeeg shirt shirt-dev @@ -1162,6 +1260,7 @@ sichuan-pepper siddharth siikamiika skacurt +skbeh slangangular slocum smed79 @@ -1205,16 +1304,24 @@ tlsssl tobi1805 tom toniz4 +toomyzoom +trainman261 trasssh troywith77 +truedread tsantala tsia u-spec-png +unbeatable-101 +urectanc user utlasidyo v-delta +vampirefrog venth +vidiot720 vijayanand nandam +viktor-enzell vkorablin vobe vordep @@ -1240,6 +1347,8 @@ zackmark29 zcanfly zejn zenerdi0de +zhgwn +zhong-yiyu zootedb0t zouhair zraktvor diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0ed1eb4..372587b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,9 @@ Before reporting any issue, type `doas pacman -Sy hypervideo`. This should repor ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, subcribe to it to be notified when there is any progress. Unless you have something useful to add to the converation, please refrain from commenting. + +Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here. ### Why are existing options not enough? @@ -66,6 +68,28 @@ Only post features that you (or an incapacitated friend you can personally talk ### Is your question about hypervideo? +Some bug reports are completely unrelated to hypervideo and relate to a different, or even the reporter's own, application. Please make sure that you are actually using hypervideo. If you are using a UI for hypervideo, report the bug to the maintainer of the actual application providing the UI. In general, if you are unable to provide the verbose log, you should not be opening the issue here. + +If the issue is with `youtube-dl` (the upstream fork of hypervideo) and not with hypervideo, the issue should be raised in the youtube-dl project. + +### Are you willing to share account details if needed? + +The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it. + +By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials. + +While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. + +- Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. +- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password after receiving the account back. + +### Is the website primarily used for piracy? + +We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). + + + It may sound strange, but some bug reports we receive are completely unrelated to hypervideo and relate to a different, or even the reporter's own, application. Please make sure that you are actually using hypervideo. If you are using a UI for hypervideo, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for hypervideo fails in some way you believe is related to hypervideo, by all means, go ahead and report the bug. # DEVELOPER INSTRUCTIONS @@ -74,7 +98,7 @@ Most users do not need to build hypervideo and can [download the builds](https:/ To run hypervideo as a developer, you don't need to build anything either. Simply execute - python -m youtube_dl + python -m hypervideo_dl To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: @@ -108,7 +132,7 @@ After you have ensured this site is distributing its content legally, you can fo cd hypervideo git checkout -b yourextractor -4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: +4. Start with this simple template and save it to `hypervideo_dl/extractor/yourextractor.py`: ```python # coding: utf-8 @@ -147,21 +171,21 @@ After you have ensured this site is distributing its content legally, you can fo 'title': title, 'description': self._og_search_description(webpage), 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + # TODO more properties (see hypervideo_dl/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). +5. Add an import in [`hypervideo_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). 6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. +7. Have a look at [`hypervideo_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. 8. Make sure your code follows [hypervideo coding conventions](#hypervideo-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): - $ flake8 youtube_dl/extractor/yourextractor.py + $ flake8 hypervideo_dl/extractor/yourextractor.py 9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by hypervideo, namely 2.6, 2.7, and 3.2+. 10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/extractors.py - $ git add youtube_dl/extractor/yourextractor.py + $ git add hypervideo_dl/extractor/extractors.py + $ git add hypervideo_dl/extractor/yourextractor.py $ git commit -m '[yourextractor] Add new extractor' $ git push origin yourextractor @@ -173,7 +197,8 @@ In any case, thank you very much for your contributions! This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. -Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old hypervideo versions working. Even though this breakage issue is easily fixed by emitting a new version of hypervideo with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. +Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the extractor will remain broken. + ### Mandatory and optional metafields @@ -239,6 +264,46 @@ description = self._search_regex( On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. + +Another thing to remember is not to try to iterate over `None` + +Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate over them + +```python +thumbnail_data = data.get('thumbnails') or [] +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in thumbnail_data if item.get('url')] # correct +``` + +and not like: + +```python +thumbnail_data = data.get('thumbnails') +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in thumbnail_data] # incorrect +``` + +In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead. + +Alternately, this can be further simplified by using `traverse_obj` + +```python +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in traverse_obj(data, ('thumbnails', lambda _, v: v['url']))] +``` + +or, even better, + +```python +thumbnails = traverse_obj(data, ('thumbnails', ..., {'url': 'url', 'height': 'h'})) +``` + ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. @@ -407,7 +472,7 @@ Incorrect: ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -415,7 +480,7 @@ Use `try_get` for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. +Explore [`hypervideo_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. #### More examples diff --git a/CONTRIBUTORS b/CONTRIBUTORS index f2a1368..6b9b9f4 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -2,7 +2,8 @@ pukkandan (owner) shirt-dev (collaborator) coletdjnz/colethedj (collaborator) Ashish0804 (collaborator) -nao20010128nao/Lesmiscore (collaborator) +bashonly (collaborator) +Grub4K (collaborator) h-h-h-h pauldubois98 nixxo @@ -295,7 +296,6 @@ Mehavoid winterbird-code yashkc2025 aldoridhoni -bashonly jacobtruman masta79 palewire @@ -319,7 +319,6 @@ columndeeply DoubleCouponDay Fabi019 GautamMKGarg -Grub4K itachi-19 jeroenj josanabr @@ -357,3 +356,114 @@ SG5 the-marenga tkgmomosheep vitkhab +glensc +synthpop123 +tntmod54321 +milkknife +Bnyro +CapacitorSet +stelcodes +skbeh +muddi900 +digitall +chengzhicn +mexus +JChris246 +redraskal +Spicadox +barsnick +docbender +KurtBestor +Chrissi2812 +FrederikNS +gschizas +JC-Chung +mzhou +OndrejBakan +ab4cbef +aionescu +amra +ByteDream +carusocr +chexxor +felixonmars +FrankZ85 +FriedrichRehren +gregsadetsky +LeoniePhiline +LowSuggestion912 +Matumo +OIRNOIR +OMEGARAZER +oxamun +pmitchell86 +qbnu +qulaz +rebane2001 +road-master +rohieb +sdht0 +seproDev +Hill-98 +LXYan2333 +mushbite +venkata-krishnas +7vlad7 +alexklapheke +arobase-che +bepvte +bergoid +blmarket +brandon-dacrib +c-basalt +CoryTibbettsDev +Cyberes +D0LLYNH0 +danog +DataGhost +falbrechtskirchinger +foreignBlade +garret1317 +hasezoey +hoaluvn +ItzMaxTV +ivanskodje +jo-nike +kangalio +linsui +makew0rld +menschel +mikf +mrscrapy +NDagestad +Neurognostic +NextFire +nick-cd +permunkle +pzhlkj6612 +ringus1 +rjy +Schmoaaaaah +sjthespian +theperfectpunk +toomyzoom +truedread +TxI5 +unbeatable-101 +vampirefrog +vidiot720 +viktor-enzell +zhgwn +barthelmannk +berkanteber +OverlordQ +rexlambert22 +Ti4eeT4e +AmanSal1 +bbilly1 +meliber +nnoboa +rdamas +RfadnjdExt +urectanc +nao20010128nao/Lesmiscore diff --git a/Changelog.md b/Changelog.md index 1a39d29..9073814 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,15 +1,712 @@ # Changelog +### 2023.07.06 + +#### Important changes +- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj) + - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains + - Cookies are scoped when passed to external downloaders + - Add `cookies` field to info.json and deprecate `http_headers.Cookie` + +#### Core changes +- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan) +- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan) +- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K) +- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz) +- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan) +- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan) +- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan) + +#### Extractor changes +- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber) +- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa) +- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas) +- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly) +- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt) +- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly) +- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly) +- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan) +- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc) +- **twitter** + - [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly) + - spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly) +- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan) +- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt) +- **youtube** + - [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan) + - [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan) + - [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1) + - [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz) + - [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan) + - stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan) + - tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz) + +#### Downloader changes +- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan) +- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) +- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf) +- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan) +- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan) +- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan) + +### 2023.06.22 + +#### Core changes +- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan) +- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan) + - Support negative time-ranges + - Add `*from-url` to obey time-ranges in URL +- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan) + +#### Extractor changes +- [Support multiple `_VALID_URL`s](https://github.com/yt-dlp/yt-dlp/commit/5fd8367496b42c7b900b896a0d5460561a2859de) ([#5812](https://github.com/yt-dlp/yt-dlp/issues/5812)) by [nixxo](https://github.com/nixxo) +- **dplay**: GlobalCyclingNetworkPlus: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/774aa09dd6aa61ced9ec818d1f67e53414d22762) ([#7360](https://github.com/yt-dlp/yt-dlp/issues/7360)) by [bashonly](https://github.com/bashonly) +- **dropout**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/db22142f6f817ff673d417b4b78e8db497bf8ab3) ([#7304](https://github.com/yt-dlp/yt-dlp/issues/7304)) by [OverlordQ](https://github.com/OverlordQ) +- **motherless**: [Add gallery support, fix groups](https://github.com/yt-dlp/yt-dlp/commit/f2ff0f6f1914b82d4a51681a72cc0828115dcb4a) ([#7211](https://github.com/yt-dlp/yt-dlp/issues/7211)) by [rexlambert22](https://github.com/rexlambert22), [Ti4eeT4e](https://github.com/Ti4eeT4e) +- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb) +- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk) +- **youtube** + - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan) + - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively + - IOS also has higher bit-rate 'premium' formats though they are not labeled as such + - [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan) + - [Improve nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/cd810afe2ac5567c822b7424800fc470ef2d0045) by [pukkandan](https://github.com/pukkandan) + - [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan) +- **cleanup** + - Miscellaneous + - [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly) + - [812cdfa](https://github.com/yt-dlp/yt-dlp/commit/812cdfa06c33a40e73a8e04b3e6f42c084666a43) by [pukkandan](https://github.com/pukkandan) + +### 2023.06.21 + +#### Important changes +- YouTube: Improved throttling and signature fixes + +#### Core changes +- [Add `--compat-option playlist-match-filter`](https://github.com/yt-dlp/yt-dlp/commit/93b39cdbd9dcf351bfa0c4ee252805b4617fdca9) by [pukkandan](https://github.com/pukkandan) +- [Add `--no-quiet`](https://github.com/yt-dlp/yt-dlp/commit/d669772c65e8630162fd6555d0a578b246591921) by [pukkandan](https://github.com/pukkandan) +- [Add option `--color`](https://github.com/yt-dlp/yt-dlp/commit/8417f26b8a819cd7ffcd4e000ca3e45033e670fb) ([#6904](https://github.com/yt-dlp/yt-dlp/issues/6904)) by [Grub4K](https://github.com/Grub4K) +- [Add option `--netrc-cmd`](https://github.com/yt-dlp/yt-dlp/commit/db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb) ([#6682](https://github.com/yt-dlp/yt-dlp/issues/6682)) by [NDagestad](https://github.com/NDagestad), [pukkandan](https://github.com/pukkandan) +- [Add option `--xff`](https://github.com/yt-dlp/yt-dlp/commit/c16644642b08e2bf4130a6c5fa01395d8718c990) by [pukkandan](https://github.com/pukkandan) +- [Auto-select default format in `-f-`](https://github.com/yt-dlp/yt-dlp/commit/372a0f3b9dadd1e52234b498aa4c7040ef868c7d) ([#7101](https://github.com/yt-dlp/yt-dlp/issues/7101)) by [ivanskodje](https://github.com/ivanskodje), [pukkandan](https://github.com/pukkandan) +- [Deprecate internal `Youtubedl-no-compression` header](https://github.com/yt-dlp/yt-dlp/commit/955c89584b66fcd0fcfab3e611f1edeb1ca63886) ([#6876](https://github.com/yt-dlp/yt-dlp/issues/6876)) by [coletdjnz](https://github.com/coletdjnz) +- [Do not translate newlines in `--print-to-file`](https://github.com/yt-dlp/yt-dlp/commit/9874e82b5a61582169300bea561b3e8899ad1ef7) by [pukkandan](https://github.com/pukkandan) +- [Ensure pre-processor errors do not block `--print`](https://github.com/yt-dlp/yt-dlp/commit/f005a35aa7e4f67a0c603a946c0dd714c151b2d6) by [pukkandan](https://github.com/pukkandan) (With fixes in [17ba434](https://github.com/yt-dlp/yt-dlp/commit/17ba4343cf99701692a7f4798fd42b50f644faba)) +- [Fix `filepath` being copied to underlying format dict](https://github.com/yt-dlp/yt-dlp/commit/84078a8b38f403495d00b46654c8750774d821de) by [pukkandan](https://github.com/pukkandan) +- [Improve HTTP redirect handling](https://github.com/yt-dlp/yt-dlp/commit/08916a49c777cb6e000eec092881eb93ec22076c) ([#7094](https://github.com/yt-dlp/yt-dlp/issues/7094)) by [coletdjnz](https://github.com/coletdjnz) +- [Populate `filename` and `urls` fields at all stages of `--print`](https://github.com/yt-dlp/yt-dlp/commit/170605840ea9d5ad75da6576485ea7d125b428ee) by [pukkandan](https://github.com/pukkandan) (With fixes in [b5f61b6](https://github.com/yt-dlp/yt-dlp/commit/b5f61b69d4561b81fc98c226b176f0c15493e688)) +- [Relaxed validation for numeric format filters](https://github.com/yt-dlp/yt-dlp/commit/c3f624ef0a5d7a6ae1c5ffeb243087e9fc7d79dc) by [pukkandan](https://github.com/pukkandan) +- [Support decoding multiple content encodings](https://github.com/yt-dlp/yt-dlp/commit/daafbf49b3482edae4d70dd37070be99742a926e) ([#7142](https://github.com/yt-dlp/yt-dlp/issues/7142)) by [coletdjnz](https://github.com/coletdjnz) +- [Support loading info.json with a list at it's root](https://github.com/yt-dlp/yt-dlp/commit/ab1de9cb1e39cf421c2b7dc6756c6ff1955bb313) by [pukkandan](https://github.com/pukkandan) +- [Workaround erroneous urllib Windows proxy parsing](https://github.com/yt-dlp/yt-dlp/commit/3f66b6fe50f8d5b545712f8b19d5ae62f5373980) ([#7092](https://github.com/yt-dlp/yt-dlp/issues/7092)) by [coletdjnz](https://github.com/coletdjnz) +- **cookies** + - [Defer extraction of v11 key from keyring](https://github.com/yt-dlp/yt-dlp/commit/9b7a48abd1b187eae1e3f6c9839c47d43ccec00b) by [Grub4K](https://github.com/Grub4K) + - [Move `YoutubeDLCookieJar` to cookies module](https://github.com/yt-dlp/yt-dlp/commit/b87e01c123fd560b6a674ce00f45a9459d82d98a) ([#7091](https://github.com/yt-dlp/yt-dlp/issues/7091)) by [coletdjnz](https://github.com/coletdjnz) + - [Support custom Safari cookies path](https://github.com/yt-dlp/yt-dlp/commit/a58182b75a05fe0a10c5e94a536711d3ade19c20) ([#6783](https://github.com/yt-dlp/yt-dlp/issues/6783)) by [NextFire](https://github.com/NextFire) + - [Update for chromium changes](https://github.com/yt-dlp/yt-dlp/commit/b38d4c941d1993ab27e4c0f8e024e23c2ec0f8f8) ([#6897](https://github.com/yt-dlp/yt-dlp/issues/6897)) by [mbway](https://github.com/mbway) +- **Cryptodome**: [Fix `__bool__`](https://github.com/yt-dlp/yt-dlp/commit/98ac902c4979e4529b166e873473bef42baa2e3e) by [pukkandan](https://github.com/pukkandan) +- **jsinterp** + - [Do not compile regex](https://github.com/yt-dlp/yt-dlp/commit/7aeda6cc9e73ada0b0a0b6a6748c66bef63a20a8) by [pukkandan](https://github.com/pukkandan) + - [Fix division](https://github.com/yt-dlp/yt-dlp/commit/b4a252fba81f53631c07ca40ce7583f5d19a8a36) ([#7279](https://github.com/yt-dlp/yt-dlp/issues/7279)) by [bashonly](https://github.com/bashonly) + - [Fix global object extraction](https://github.com/yt-dlp/yt-dlp/commit/01aba2519a0884ef17d5f85608dbd2a455577147) by [pukkandan](https://github.com/pukkandan) + - [Handle `NaN` in bitwise operators](https://github.com/yt-dlp/yt-dlp/commit/1d7656184c6b8aa46b29149893894b3c24f1df00) by [pukkandan](https://github.com/pukkandan) + - [Handle negative numbers better](https://github.com/yt-dlp/yt-dlp/commit/7cf51f21916292cd80bdeceb37489f5322f166dd) by [pukkandan](https://github.com/pukkandan) +- **outtmpl** + - [Allow `\n` in replacements and default.](https://github.com/yt-dlp/yt-dlp/commit/78fde6e3398ff11e5d383a66b28664badeab5180) by [pukkandan](https://github.com/pukkandan) + - [Fix some minor bugs](https://github.com/yt-dlp/yt-dlp/commit/ebe1b4e34f43c3acad30e4bcb8484681a030c114) by [pukkandan](https://github.com/pukkandan) (With fixes in [1619ab3](https://github.com/yt-dlp/yt-dlp/commit/1619ab3e67d8dc4f86fc7ed292c79345bc0d91a0)) + - [Support `str.format` syntax inside replacements](https://github.com/yt-dlp/yt-dlp/commit/ec9311c41b111110bc52cfbd6ea682c6fb23f77a) by [pukkandan](https://github.com/pukkandan) +- **update** + - [Better error handling](https://github.com/yt-dlp/yt-dlp/commit/d2e84d5eb01c66fc5304e8566348d65a7be24ed7) by [pukkandan](https://github.com/pukkandan) + - [Do not restart into versions without `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/02948a17d903f544363bb20b51a6d8baed7bba08) by [pukkandan](https://github.com/pukkandan) + - [Implement `--update-to` repo](https://github.com/yt-dlp/yt-dlp/commit/665472a7de3880578c0b7b3f95c71570c056368e) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +- **upstream** + - [Merged with youtube-dl 07af47](https://github.com/yt-dlp/yt-dlp/commit/42f2d40b475db66486a4b4fe5b56751a640db5db) by [pukkandan](https://github.com/pukkandan) + - [Merged with youtube-dl d1c6c5](https://github.com/yt-dlp/yt-dlp/commit/4823ec9f461512daa1b8ab362893bb86a6320b26) by [pukkandan](https://github.com/pukkandan) (With fixes in [edbe5b5](https://github.com/yt-dlp/yt-dlp/commit/edbe5b589dd0860a67b4e03f58db3cd2539d91c2) by [bashonly](https://github.com/bashonly)) +- **utils** + - `FormatSorter`: [Improve `size` and `br`](https://github.com/yt-dlp/yt-dlp/commit/eedda5252c05327748dede204a8fccafa0288118) by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) + - `js_to_json`: [Implement template strings](https://github.com/yt-dlp/yt-dlp/commit/0898c5c8ccadfc404472456a7a7751b72afebadd) ([#6623](https://github.com/yt-dlp/yt-dlp/issues/6623)) by [Grub4K](https://github.com/Grub4K) + - `locked_file`: [Fix for virtiofs](https://github.com/yt-dlp/yt-dlp/commit/45998b3e371b819ce0dbe50da703809a048cc2fe) ([#6840](https://github.com/yt-dlp/yt-dlp/issues/6840)) by [brandon-dacrib](https://github.com/brandon-dacrib) + - `strftime_or_none`: [Handle negative timestamps](https://github.com/yt-dlp/yt-dlp/commit/a35af4306d24c56c6358f89cdf204860d1cd62b4) by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) + - `traverse_obj` + - [Allow iterables in traversal](https://github.com/yt-dlp/yt-dlp/commit/21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e) ([#6902](https://github.com/yt-dlp/yt-dlp/issues/6902)) by [Grub4K](https://github.com/Grub4K) + - [More fixes](https://github.com/yt-dlp/yt-dlp/commit/b079c26f0af8085bccdadc72c61c8164ca5ab0f8) ([#6959](https://github.com/yt-dlp/yt-dlp/issues/6959)) by [Grub4K](https://github.com/Grub4K) + - `write_string`: [Fix noconsole behavior](https://github.com/yt-dlp/yt-dlp/commit/3b479100df02e20dd949e046003ae96ddbfced57) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Do not exit early for unsuitable `url_result`](https://github.com/yt-dlp/yt-dlp/commit/baa922b5c74b10e3b86ff5e6cf6529b3aae8efab) by [pukkandan](https://github.com/pukkandan) +- [Do not warn for invalid chapter data in description](https://github.com/yt-dlp/yt-dlp/commit/84ffeb7d5e72e3829319ba7720a8480fc4c7503b) by [pukkandan](https://github.com/pukkandan) +- [Extract more metadata from ISM](https://github.com/yt-dlp/yt-dlp/commit/f68434cc74cfd3db01b266476a2eac8329fbb267) by [pukkandan](https://github.com/pukkandan) +- **abematv**: [Add fallback for title and description extraction and extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/c449c0655d7c8549e6e1389c26b628053b253d39) ([#6994](https://github.com/yt-dlp/yt-dlp/issues/6994)) by [Lesmiscore](https://github.com/Lesmiscore) +- **acast**: [Support embeds](https://github.com/yt-dlp/yt-dlp/commit/c91ac833ea99b00506e470a44cf930e4e23378c9) ([#7212](https://github.com/yt-dlp/yt-dlp/issues/7212)) by [pabs3](https://github.com/pabs3) +- **adobepass**: [Handle `Charter_Direct` MSO as `Spectrum`](https://github.com/yt-dlp/yt-dlp/commit/ea0570820336a0fe9c3b530d1b0d1e59313274f4) ([#6824](https://github.com/yt-dlp/yt-dlp/issues/6824)) by [bashonly](https://github.com/bashonly) +- **aeonco**: [Support Youtube embeds](https://github.com/yt-dlp/yt-dlp/commit/ed81b74802b4247ee8d9dc0ef87eb52baefede1c) ([#6591](https://github.com/yt-dlp/yt-dlp/issues/6591)) by [alexklapheke](https://github.com/alexklapheke) +- **afreecatv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fdd69db38924c38194ef236b26325d66ac815c88) ([#6283](https://github.com/yt-dlp/yt-dlp/issues/6283)) by [blmarket](https://github.com/blmarket) +- **ARDBetaMediathek**: [Add thumbnail](https://github.com/yt-dlp/yt-dlp/commit/f78eb41e1c0f1dcdb10317358a26bf541dc7ee15) ([#6890](https://github.com/yt-dlp/yt-dlp/issues/6890)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **bibeltv**: [Fix extraction, support live streams and series](https://github.com/yt-dlp/yt-dlp/commit/4ad58667c102bd82a7c4cca8aa395ec1682e3b4c) ([#6505](https://github.com/yt-dlp/yt-dlp/issues/6505)) by [flashdagger](https://github.com/flashdagger) +- **bilibili** + - [Support festival videos](https://github.com/yt-dlp/yt-dlp/commit/ab29e47029e2f5b48abbbab78e82faf7cf6e9506) ([#6547](https://github.com/yt-dlp/yt-dlp/issues/6547)) by [qbnu](https://github.com/qbnu) + - SpaceVideo: [Extract signature](https://github.com/yt-dlp/yt-dlp/commit/6f10cdcf7eeaeae5b75e0a4428cd649c156a2d83) ([#7149](https://github.com/yt-dlp/yt-dlp/issues/7149)) by [elyse0](https://github.com/elyse0) +- **biliIntl**: [Add comment extraction](https://github.com/yt-dlp/yt-dlp/commit/b093c38cc9f26b59a8504211d792f053142c847d) ([#6079](https://github.com/yt-dlp/yt-dlp/issues/6079)) by [HobbyistDev](https://github.com/HobbyistDev) +- **bitchute**: [Add more fallback subdomains](https://github.com/yt-dlp/yt-dlp/commit/0c4e0fbcade0fc92d14c2a6d63e360fe067f6192) ([#6907](https://github.com/yt-dlp/yt-dlp/issues/6907)) by [Neurognostic](https://github.com/Neurognostic) +- **booyah**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f7f7a877bf8e87fd4eb0ad2494ad948ca7691114) by [pukkandan](https://github.com/pukkandan) +- **BrainPOP**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/979568f26ece80bca72b48f0dd57d676e431059a) ([#6106](https://github.com/yt-dlp/yt-dlp/issues/6106)) by [MinePlayersPE](https://github.com/MinePlayersPE) +- **bravotv** + - [Detect DRM](https://github.com/yt-dlp/yt-dlp/commit/1fe5bf240e6ade487d18079a62aa36bcc440a27a) ([#7171](https://github.com/yt-dlp/yt-dlp/issues/7171)) by [bashonly](https://github.com/bashonly) + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/06966cb8966b9aa4f60ab9c44c182a057d4ca3a3) ([#6568](https://github.com/yt-dlp/yt-dlp/issues/6568)) by [bashonly](https://github.com/bashonly) +- **camfm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4cbfa570a1b9bd65b0f48770693377e8d842dcb0) ([#7083](https://github.com/yt-dlp/yt-dlp/issues/7083)) by [garret1317](https://github.com/garret1317) +- **cbc** + - [Fix live extractor, playlist `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/7a7b1376fbce0067cf37566bb47131bc0022638d) ([#6625](https://github.com/yt-dlp/yt-dlp/issues/6625)) by [makew0rld](https://github.com/makew0rld) + - [Ignore 426 from API](https://github.com/yt-dlp/yt-dlp/commit/4afb208cf07b59291ae3b0c4efc83945ee5b8812) ([#6781](https://github.com/yt-dlp/yt-dlp/issues/6781)) by [jo-nike](https://github.com/jo-nike) + - gem: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/871c907454693940cb56906ed9ea49fcb7154829) ([#6499](https://github.com/yt-dlp/yt-dlp/issues/6499)) by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +- **cbs**: [Add `ParamountPressExpress` extractor](https://github.com/yt-dlp/yt-dlp/commit/44369c9afa996e14e9f466754481d878811b5b4a) ([#6604](https://github.com/yt-dlp/yt-dlp/issues/6604)) by [bashonly](https://github.com/bashonly) +- **cbsnews**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/f6e43d6fa9804c24525e1fed0a87782754dab7ed) ([#6681](https://github.com/yt-dlp/yt-dlp/issues/6681)) by [bashonly](https://github.com/bashonly) +- **chilloutzone**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6f4fc5660f40f3458882a8f51601eae4af7be609) ([#6445](https://github.com/yt-dlp/yt-dlp/issues/6445)) by [bashonly](https://github.com/bashonly) +- **clipchamp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f07c4c1da4361af213e5791279b9d152d2e4ce3) ([#6978](https://github.com/yt-dlp/yt-dlp/issues/6978)) by [bashonly](https://github.com/bashonly) +- **comedycentral**: [Add support for movies](https://github.com/yt-dlp/yt-dlp/commit/66468bbf49562ff82670cbbd456c5e8448a6df34) ([#7108](https://github.com/yt-dlp/yt-dlp/issues/7108)) by [sqrtNOT](https://github.com/sqrtNOT) +- **crtvg**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/26c517b29c8727e47948d6fff749d5297f0efb60) ([#7168](https://github.com/yt-dlp/yt-dlp/issues/7168)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **crunchyroll**: [Rework with support for movies, music and artists](https://github.com/yt-dlp/yt-dlp/commit/032de83ea9ff2f4977d9c71a93bbc1775597b762) ([#6237](https://github.com/yt-dlp/yt-dlp/issues/6237)) by [Grub4K](https://github.com/Grub4K) +- **dacast**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/c25cac2f8e5fbac2737a426d7778fd2f0efc5381) ([#6896](https://github.com/yt-dlp/yt-dlp/issues/6896)) by [bashonly](https://github.com/bashonly) +- **daftsex**: [Update domain and embed player url](https://github.com/yt-dlp/yt-dlp/commit/fc5a7f9b27d2a89b1f3ca7d33a95301c21d832cd) ([#5966](https://github.com/yt-dlp/yt-dlp/issues/5966)) by [JChris246](https://github.com/JChris246) +- **DigitalConcertHall**: [Support films](https://github.com/yt-dlp/yt-dlp/commit/55ed4ff73487feb3177b037dfc2ea527e777da3e) ([#7202](https://github.com/yt-dlp/yt-dlp/issues/7202)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **discogs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6daaf21092888beff11b807cd46f832f1f9c46a0) ([#6624](https://github.com/yt-dlp/yt-dlp/issues/6624)) by [rjy](https://github.com/rjy) +- **dlf**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b423b6a48e0b19260bc95ab7d72d2138d7f124dc) ([#6697](https://github.com/yt-dlp/yt-dlp/issues/6697)) by [nick-cd](https://github.com/nick-cd) +- **drtv**: [Fix radio page extraction](https://github.com/yt-dlp/yt-dlp/commit/9a06b7b1891b48cebbe275652ae8025a36d97d97) ([#6552](https://github.com/yt-dlp/yt-dlp/issues/6552)) by [viktor-enzell](https://github.com/viktor-enzell) +- **Dumpert**: [Fix m3u8 and support new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/f8ae441501596733e2b967430471643a1d7cacb8) ([#6091](https://github.com/yt-dlp/yt-dlp/issues/6091)) by [DataGhost](https://github.com/DataGhost), [pukkandan](https://github.com/pukkandan) +- **elevensports**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ecfe47973f6603b5367fe2cc3c65274627d94516) ([#7172](https://github.com/yt-dlp/yt-dlp/issues/7172)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **ettutv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/83465fc4100a2fb2c188898fbc2f3021f6a9b4dd) ([#6579](https://github.com/yt-dlp/yt-dlp/issues/6579)) by [elyse0](https://github.com/elyse0) +- **europarl**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/03789976d301eaed3e957dbc041573098f6af059) ([#7114](https://github.com/yt-dlp/yt-dlp/issues/7114)) by [HobbyistDev](https://github.com/HobbyistDev) +- **eurosport**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/45e87ea106ad37b2a002663fa30ee41ce97b16cd) ([#7076](https://github.com/yt-dlp/yt-dlp/issues/7076)) by [HobbyistDev](https://github.com/HobbyistDev) +- **facebook**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/3b52a606881e6adadc33444abdeacce562b79330) ([#6856](https://github.com/yt-dlp/yt-dlp/issues/6856)) by [ringus1](https://github.com/ringus1) +- **foxnews**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/97d60ad8cd6c99f01e463a9acfce8693aff2a609) ([#7222](https://github.com/yt-dlp/yt-dlp/issues/7222)) by [bashonly](https://github.com/bashonly) +- **funker530**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cab94a0cd8b6d3fffed5a6faff030274adbed182) ([#7291](https://github.com/yt-dlp/yt-dlp/issues/7291)) by [Cyberes](https://github.com/Cyberes) +- **generic** + - [Accept values for `fragment_query`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/5cc0a8fd2e9fec50026fb92170b57993af939e4a) ([#6600](https://github.com/yt-dlp/yt-dlp/issues/6600)) by [bashonly](https://github.com/bashonly) (With fixes in [9bfe0d1](https://github.com/yt-dlp/yt-dlp/commit/9bfe0d15bd7dbdc6b0e6378fa9f5e2e289b2373b)) + - [Add extractor-args `hls_key`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/c2e0fc40a73dd85ab3920f977f579d475e66ef59) ([#6567](https://github.com/yt-dlp/yt-dlp/issues/6567)) by [bashonly](https://github.com/bashonly) + - [Attempt to detect live HLS](https://github.com/yt-dlp/yt-dlp/commit/93e7c6995e07dafb9dcc06c0d06acf6c5bdfecc5) ([#6775](https://github.com/yt-dlp/yt-dlp/issues/6775)) by [bashonly](https://github.com/bashonly) +- **genius**: [Add support for articles](https://github.com/yt-dlp/yt-dlp/commit/460da07439718d9af1e3661da2a23e05a913a2e6) ([#6474](https://github.com/yt-dlp/yt-dlp/issues/6474)) by [bashonly](https://github.com/bashonly) +- **globalplayer**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/30647668a92a0ca5cd108776804baac0996bd9f7) ([#6903](https://github.com/yt-dlp/yt-dlp/issues/6903)) by [garret1317](https://github.com/garret1317) +- **gmanetwork**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2d97d154fe4fb84fe2ed3a4e1ed5819e89b71e88) ([#5945](https://github.com/yt-dlp/yt-dlp/issues/5945)) by [HobbyistDev](https://github.com/HobbyistDev) +- **gronkh**: [Extract duration and chapters](https://github.com/yt-dlp/yt-dlp/commit/9c92b803fa24e48543ce969468d5404376e315b7) ([#6817](https://github.com/yt-dlp/yt-dlp/issues/6817)) by [satan1st](https://github.com/satan1st) +- **hentaistigma**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/04f8018a0544736a18494bc3899d06b05b78fae6) by [pukkandan](https://github.com/pukkandan) +- **hidive**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/e6ab678e36c40ded0aae305bbb866cdab554d417) by [pukkandan](https://github.com/pukkandan) +- **hollywoodreporter**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6bdb64e2a2a6d504d8ce1dc830fbfb8a7f199c63) ([#6614](https://github.com/yt-dlp/yt-dlp/issues/6614)) by [bashonly](https://github.com/bashonly) +- **hotstar**: [Support `/shows/` URLs](https://github.com/yt-dlp/yt-dlp/commit/7f8ddebbb51c9fd4a347306332a718ba41b371b8) ([#7225](https://github.com/yt-dlp/yt-dlp/issues/7225)) by [bashonly](https://github.com/bashonly) +- **hrefli**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7e35526d5b970a034b9d76215ee3e4bd7631edcd) ([#6762](https://github.com/yt-dlp/yt-dlp/issues/6762)) by [selfisekai](https://github.com/selfisekai) +- **idolplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c14b213679ed4401288bdc86ae696932e219222) ([#6732](https://github.com/yt-dlp/yt-dlp/issues/6732)) by [ping](https://github.com/ping) +- **iq**: [Set more language codes](https://github.com/yt-dlp/yt-dlp/commit/2d5cae9636714ff922d28c548c349d5f2b48f317) ([#6476](https://github.com/yt-dlp/yt-dlp/issues/6476)) by [D0LLYNH0](https://github.com/D0LLYNH0) +- **iwara** + - [Accept old URLs](https://github.com/yt-dlp/yt-dlp/commit/ab92d8651c48d247dfb7d3f0a824cc986e47c7ed) by [Lesmiscore](https://github.com/Lesmiscore) + - [Fix authentication](https://github.com/yt-dlp/yt-dlp/commit/0a5d7c39e17bb9bd50c9db42bcad40eb82d7f784) ([#7137](https://github.com/yt-dlp/yt-dlp/issues/7137)) by [toomyzoom](https://github.com/toomyzoom) + - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/56793f74c36899742d7abd52afb0deca97d469e1) ([#6651](https://github.com/yt-dlp/yt-dlp/issues/6651)) by [hasezoey](https://github.com/hasezoey) + - [Fix typo](https://github.com/yt-dlp/yt-dlp/commit/d1483ec693c79f0b4ddf493870bcb840aca4da08) by [Lesmiscore](https://github.com/Lesmiscore) + - [Implement login](https://github.com/yt-dlp/yt-dlp/commit/21b9413cf7dd4830b2ece57af21589dd4538fc52) ([#6721](https://github.com/yt-dlp/yt-dlp/issues/6721)) by [toomyzoom](https://github.com/toomyzoom) + - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/c14af7a741931b364bab3d9546c0f4359f318f8c) ([#6557](https://github.com/yt-dlp/yt-dlp/issues/6557)) by [Lesmiscore](https://github.com/Lesmiscore) + - [Report private videos](https://github.com/yt-dlp/yt-dlp/commit/95a383be1b6fb00c92ee3fb091732c4f6009acb6) ([#6641](https://github.com/yt-dlp/yt-dlp/issues/6641)) by [Lesmiscore](https://github.com/Lesmiscore) +- **JStream**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3459d3c5af3b2572ed51e8ecfda6c11022a838c6) ([#6252](https://github.com/yt-dlp/yt-dlp/issues/6252)) by [Lesmiscore](https://github.com/Lesmiscore) +- **jwplatform**: [Update `_extract_embed_urls`](https://github.com/yt-dlp/yt-dlp/commit/cf9fd52fabe71d6e7c30d3ea525029ffa561fc9c) ([#6383](https://github.com/yt-dlp/yt-dlp/issues/6383)) by [carusocr](https://github.com/carusocr) +- **kick**: [Make initial request non-fatal](https://github.com/yt-dlp/yt-dlp/commit/0a6918a4a1431960181d8c50e0bbbcb0afbaff9a) by [bashonly](https://github.com/bashonly) +- **LastFM**: [Rewrite playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/026435714cb7c39613a0d7d2acd15d3823b78d94) ([#6379](https://github.com/yt-dlp/yt-dlp/issues/6379)) by [hatienl0i261299](https://github.com/hatienl0i261299), [pukkandan](https://github.com/pukkandan) +- **lbry**: [Extract original quality formats](https://github.com/yt-dlp/yt-dlp/commit/44c0d66442b568d9e1359e669d8b029b08a77fa7) ([#7257](https://github.com/yt-dlp/yt-dlp/issues/7257)) by [bashonly](https://github.com/bashonly) +- **line**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/faa0332ed69e070cf3bd31390589a596e962f392) ([#6734](https://github.com/yt-dlp/yt-dlp/issues/6734)) by [sian1468](https://github.com/sian1468) +- **livestream**: [Support videos with account id](https://github.com/yt-dlp/yt-dlp/commit/bfdf144c7e5d7a93fbfa9d8e65598c72bf2b542a) ([#6324](https://github.com/yt-dlp/yt-dlp/issues/6324)) by [theperfectpunk](https://github.com/theperfectpunk) +- **medaltv**: [Fix clips](https://github.com/yt-dlp/yt-dlp/commit/1e3c2b6ec28d7ab5e31341fa93c47b65be4fbff4) ([#6502](https://github.com/yt-dlp/yt-dlp/issues/6502)) by [xenova](https://github.com/xenova) +- **mediastream**: [Improve `WinSports` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/03025b6e105139d01cd415ddc51fd692957fd2ba) ([#6426](https://github.com/yt-dlp/yt-dlp/issues/6426)) by [bashonly](https://github.com/bashonly) +- **mgtv**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/59d9fe08312bbb76ee26238d207a8ca35410a48d) ([#7234](https://github.com/yt-dlp/yt-dlp/issues/7234)) by [bashonly](https://github.com/bashonly) +- **Mzaalo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dc3c44f349ba85af320e706e2a27ad81a78b1c6e) ([#7163](https://github.com/yt-dlp/yt-dlp/issues/7163)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **nbc**: [Fix `NBCStations` direct mp4 formats](https://github.com/yt-dlp/yt-dlp/commit/9be0fe1fd967f62cbf3c60bd14e1021a70abc147) ([#6637](https://github.com/yt-dlp/yt-dlp/issues/6637)) by [bashonly](https://github.com/bashonly) +- **nebula**: [Add `beta.nebula.tv`](https://github.com/yt-dlp/yt-dlp/commit/cbfe2e5cbe0f4649a91e323a82b8f5f774f36662) ([#6516](https://github.com/yt-dlp/yt-dlp/issues/6516)) by [unbeatable-101](https://github.com/unbeatable-101) +- **nekohacker**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/489f51279d00318018478fd7461eddbe3b45297e) ([#7003](https://github.com/yt-dlp/yt-dlp/issues/7003)) by [hasezoey](https://github.com/hasezoey) +- **nhk** + - [Add `NhkRadiru` extractor](https://github.com/yt-dlp/yt-dlp/commit/8f0be90ecb3b8d862397177bb226f17b245ef933) ([#6819](https://github.com/yt-dlp/yt-dlp/issues/6819)) by [garret1317](https://github.com/garret1317) + - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/f41b949a2ef646fbc36375febbe3f0c19d742c0f) ([#7180](https://github.com/yt-dlp/yt-dlp/issues/7180)) by [menschel](https://github.com/menschel), [sjthespian](https://github.com/sjthespian) + - `NhkRadiruLive`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/81c8b9bdd9841b72cbfc1bbff9dab5fb4aa038b0) ([#7332](https://github.com/yt-dlp/yt-dlp/issues/7332)) by [garret1317](https://github.com/garret1317) +- **niconico** + - [Download comments from the new endpoint](https://github.com/yt-dlp/yt-dlp/commit/52ecc33e221f7de7eb6fed6c22489f0c5fdd2c6d) ([#6773](https://github.com/yt-dlp/yt-dlp/issues/6773)) by [Lesmiscore](https://github.com/Lesmiscore) + - live: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8f9250fe280d37f0988646cd5cc0072f4d33a6d) ([#5764](https://github.com/yt-dlp/yt-dlp/issues/5764)) by [Lesmiscore](https://github.com/Lesmiscore) + - series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/c86e433c35fe5da6cb29f3539eef97497f84ed38) ([#6898](https://github.com/yt-dlp/yt-dlp/issues/6898)) by [sqrtNOT](https://github.com/sqrtNOT) +- **nubilesporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d4e6ef40772e0560a8ed33b844ef7549e86837be) ([#6231](https://github.com/yt-dlp/yt-dlp/issues/6231)) by [permunkle](https://github.com/permunkle) +- **odnoklassniki**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/1a2eb5bda51d8b7a78a65acebf72a0dcf9da196b) ([#7217](https://github.com/yt-dlp/yt-dlp/issues/7217)) by [bashonly](https://github.com/bashonly) +- **opencast** + - [Add ltitools to `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3588be59cee429a0ab5c4ceb2f162298bb44147d) ([#6371](https://github.com/yt-dlp/yt-dlp/issues/6371)) by [C0D3D3V](https://github.com/C0D3D3V) + - [Fix format bug](https://github.com/yt-dlp/yt-dlp/commit/89dbf0848370deaa55af88c3593a2a264124caf5) ([#6512](https://github.com/yt-dlp/yt-dlp/issues/6512)) by [C0D3D3V](https://github.com/C0D3D3V) +- **owncloud**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c6d4b82a8b8bce59b1c9ce5e6d349ea428dac0a7) ([#6533](https://github.com/yt-dlp/yt-dlp/issues/6533)) by [C0D3D3V](https://github.com/C0D3D3V) +- **Parler**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/80ea6d3dea8483cddd39fc89b5ee1fc06670c33c) ([#6446](https://github.com/yt-dlp/yt-dlp/issues/6446)) by [JChris246](https://github.com/JChris246) +- **pgatour**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae182ad89e1427ff7b1684d6a44ff93fa857a0c) ([#6613](https://github.com/yt-dlp/yt-dlp/issues/6613)) by [bashonly](https://github.com/bashonly) +- **playsuisse**: [Support new url format](https://github.com/yt-dlp/yt-dlp/commit/94627c5dde12a72766bdba36e056916c29c40ed1) ([#6528](https://github.com/yt-dlp/yt-dlp/issues/6528)) by [sbor23](https://github.com/sbor23) +- **polskieradio**: [Improve extractors](https://github.com/yt-dlp/yt-dlp/commit/738c90a463257634455ada3e5c18b714c531dede) ([#5948](https://github.com/yt-dlp/yt-dlp/issues/5948)) by [selfisekai](https://github.com/selfisekai) +- **pornez**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f) ([#6792](https://github.com/yt-dlp/yt-dlp/issues/6792)) by [zhgwn](https://github.com/zhgwn) +- **pornhub**: [Set access cookies to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/62beefa818c75c20b6941389bb197051554a5d41) ([#6685](https://github.com/yt-dlp/yt-dlp/issues/6685)) by [arobase-che](https://github.com/arobase-che), [Schmoaaaaah](https://github.com/Schmoaaaaah) +- **rai**: [Rewrite extractors](https://github.com/yt-dlp/yt-dlp/commit/c6d3f81a4077aaf9cffc6aa2d0dec92f38e74bb0) ([#5940](https://github.com/yt-dlp/yt-dlp/issues/5940)) by [danog](https://github.com/danog), [nixxo](https://github.com/nixxo) +- **recurbate**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2502cfed91415c7ccfff925fd3404d230046484) ([#6297](https://github.com/yt-dlp/yt-dlp/issues/6297)) by [mrscrapy](https://github.com/mrscrapy) +- **reddit** + - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/4d9280c9c853733534dda60486fa949bcca36c9e) ([#6950](https://github.com/yt-dlp/yt-dlp/issues/6950)) by [bashonly](https://github.com/bashonly) + - [Support cookies and short URLs](https://github.com/yt-dlp/yt-dlp/commit/7a6f6f24592a8065376f11a58e44878807732cf6) ([#6825](https://github.com/yt-dlp/yt-dlp/issues/6825)) by [bashonly](https://github.com/bashonly) +- **rokfin**: [Re-construct manifest url](https://github.com/yt-dlp/yt-dlp/commit/7a6c8a0807941dd24fbf0d6172e811884f98e027) ([#6507](https://github.com/yt-dlp/yt-dlp/issues/6507)) by [vampirefrog](https://github.com/vampirefrog) +- **rottentomatoes**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2d306c03d6f2697fcbabb7da35aa62cc078359d3) ([#6844](https://github.com/yt-dlp/yt-dlp/issues/6844)) by [JChris246](https://github.com/JChris246) +- **rozhlas** + - [Extract manifest formats](https://github.com/yt-dlp/yt-dlp/commit/e4cf7741f9302b3faa092962f2895b55cb3d89bb) ([#6590](https://github.com/yt-dlp/yt-dlp/issues/6590)) by [bashonly](https://github.com/bashonly) + - `MujRozhlas`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2b801fea59628d5c873e06a0727fbf2051bbd1f) ([#7129](https://github.com/yt-dlp/yt-dlp/issues/7129)) by [stanoarn](https://github.com/stanoarn) +- **rtvc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/9b30cd3dfce83c2f0201b28a7a3ef44ab9722664) ([#6578](https://github.com/yt-dlp/yt-dlp/issues/6578)) by [elyse0](https://github.com/elyse0) +- **rumble** + - [Detect timeline format](https://github.com/yt-dlp/yt-dlp/commit/78bc1868ff3352108ab2911033d1ac67a55f151e) by [pukkandan](https://github.com/pukkandan) + - [Fix videos without quality selection](https://github.com/yt-dlp/yt-dlp/commit/6994afc030d2a786d8032075ed71a14d7eac5a4f) by [pukkandan](https://github.com/pukkandan) +- **sbs**: [Overhaul extractor for new API](https://github.com/yt-dlp/yt-dlp/commit/6a765f135ccb654861336ea27a2c1c24ea8e286f) ([#6839](https://github.com/yt-dlp/yt-dlp/issues/6839)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [vidiot720](https://github.com/vidiot720) +- **shemaroome**: [Pass `stream_key` header to downloader](https://github.com/yt-dlp/yt-dlp/commit/7bc92517463f5766e9d9b92c3823b5cf403c0e3d) ([#7224](https://github.com/yt-dlp/yt-dlp/issues/7224)) by [bashonly](https://github.com/bashonly) +- **sonyliv**: [Fix login with token](https://github.com/yt-dlp/yt-dlp/commit/4815d35c191e7d375b94492a6486dd2ba43a8954) ([#7223](https://github.com/yt-dlp/yt-dlp/issues/7223)) by [bashonly](https://github.com/bashonly) +- **stageplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e5265dc6517478e589ee3c1ff0cb19bdf4e35ce1) ([#6838](https://github.com/yt-dlp/yt-dlp/issues/6838)) by [bashonly](https://github.com/bashonly) +- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f9213f8a2d7ba46b912afe1dd3ce6bb700a33d72) ([#7306](https://github.com/yt-dlp/yt-dlp/issues/7306)) by [foreignBlade](https://github.com/foreignBlade) +- **substack**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/12037d8b0a578fcc78a5c8f98964e48ee6060e25) ([#7218](https://github.com/yt-dlp/yt-dlp/issues/7218)) by [bashonly](https://github.com/bashonly) +- **sverigesradio**: [Support slug URLs](https://github.com/yt-dlp/yt-dlp/commit/5ee9a7d6e18ceea956e831994cf11c423979354f) ([#7220](https://github.com/yt-dlp/yt-dlp/issues/7220)) by [bashonly](https://github.com/bashonly) +- **tagesschau**: [Fix single audio urls](https://github.com/yt-dlp/yt-dlp/commit/af7585c824a1e405bd8afa46d87b4be322edc93c) ([#6626](https://github.com/yt-dlp/yt-dlp/issues/6626)) by [flashdagger](https://github.com/flashdagger) +- **teamcoco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c459d45dd4d417fb80a52e1a04e607776a44baa4) ([#6437](https://github.com/yt-dlp/yt-dlp/issues/6437)) by [bashonly](https://github.com/bashonly) +- **telecaribe**: [Expand livestream support](https://github.com/yt-dlp/yt-dlp/commit/69b2f838d3d3e37dc17367ef64d978db1bea45cf) ([#6601](https://github.com/yt-dlp/yt-dlp/issues/6601)) by [bashonly](https://github.com/bashonly) +- **tencent**: [Fix fatal metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/971d901d129403e875a04dd92109507a03fbc070) ([#7219](https://github.com/yt-dlp/yt-dlp/issues/7219)) by [bashonly](https://github.com/bashonly) +- **thesun**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0181b9a1b31db3fde943f7cd3fe9662f23bff292) ([#6522](https://github.com/yt-dlp/yt-dlp/issues/6522)) by [hatienl0i261299](https://github.com/hatienl0i261299) +- **tiktok** + - [Extract 1080p adaptive formats](https://github.com/yt-dlp/yt-dlp/commit/c2a1bdb00931969193f2a31ea27b9c66a07aaec2) ([#7228](https://github.com/yt-dlp/yt-dlp/issues/7228)) by [bashonly](https://github.com/bashonly) + - [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/925936908a3c3ee0e508621db14696b9f6a8b563) ([#6777](https://github.com/yt-dlp/yt-dlp/issues/6777)) by [bashonly](https://github.com/bashonly) + - [Fix mp3 formats](https://github.com/yt-dlp/yt-dlp/commit/8ceb07e870424c219dced8f4348729553f05c5cc) ([#6615](https://github.com/yt-dlp/yt-dlp/issues/6615)) by [bashonly](https://github.com/bashonly) + - [Fix resolution extraction](https://github.com/yt-dlp/yt-dlp/commit/ab6057ec80aa75db6303b8206916d00c376c622c) ([#7237](https://github.com/yt-dlp/yt-dlp/issues/7237)) by [puc9](https://github.com/puc9) + - [Improve `TikTokLive` extractor](https://github.com/yt-dlp/yt-dlp/commit/216bcb66d7dce0762767d751dad10650cb57da9d) ([#6520](https://github.com/yt-dlp/yt-dlp/issues/6520)) by [bashonly](https://github.com/bashonly) +- **triller**: [Support short URLs, detect removed videos](https://github.com/yt-dlp/yt-dlp/commit/33b737bedf8383c0d00d4e1d06a5273dcdfdb756) ([#6636](https://github.com/yt-dlp/yt-dlp/issues/6636)) by [bashonly](https://github.com/bashonly) +- **tv4**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/125ffaa1737dd04716f2f6fbb0595ad3eb7a4b1c) ([#5649](https://github.com/yt-dlp/yt-dlp/issues/5649)) by [dirkf](https://github.com/dirkf), [TxI5](https://github.com/TxI5) +- **tvp**: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/0c7ce146e4d2a84e656d78f6857952bfd25ab389) ([#6989](https://github.com/yt-dlp/yt-dlp/issues/6989)) by [selfisekai](https://github.com/selfisekai) +- **tvplay**: [Remove outdated domains](https://github.com/yt-dlp/yt-dlp/commit/937264419f9bf375d5656785ae6e53282587c15d) ([#7106](https://github.com/yt-dlp/yt-dlp/issues/7106)) by [ivanskodje](https://github.com/ivanskodje) +- **twitch** + - [Extract original size thumbnail](https://github.com/yt-dlp/yt-dlp/commit/80b732b7a9585b2a61e456dc0d2d014a439cbaee) ([#6629](https://github.com/yt-dlp/yt-dlp/issues/6629)) by [JC-Chung](https://github.com/JC-Chung) + - [Fix `is_live`](https://github.com/yt-dlp/yt-dlp/commit/0551511b45f7847f40e4314aa9e624e80d086539) ([#6500](https://github.com/yt-dlp/yt-dlp/issues/6500)) by [elyse0](https://github.com/elyse0) + - [Support mobile clips](https://github.com/yt-dlp/yt-dlp/commit/02312c03cf53eb1da24c9ad022ee79af26060733) ([#6699](https://github.com/yt-dlp/yt-dlp/issues/6699)) by [bepvte](https://github.com/bepvte) + - [Update `_CLIENT_ID` and add extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/01231feb142e80828985aabdec04ac608e3d43e2) ([#7200](https://github.com/yt-dlp/yt-dlp/issues/7200)) by [bashonly](https://github.com/bashonly) + - vod: [Support links from schedule tab](https://github.com/yt-dlp/yt-dlp/commit/dbce5afa6bb61f6272ade613f2e9a3d66b88c7ea) ([#7071](https://github.com/yt-dlp/yt-dlp/issues/7071)) by [falbrechtskirchinger](https://github.com/falbrechtskirchinger) +- **twitter** + - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/d1795f4a6af99c976c9d3ea2dabe5cf4f8965d3c) ([#7258](https://github.com/yt-dlp/yt-dlp/issues/7258)) by [bashonly](https://github.com/bashonly) + - [Default to GraphQL, handle auth errors](https://github.com/yt-dlp/yt-dlp/commit/147e62fc584c3ea6fdb09bb7a47905df68553a22) ([#6957](https://github.com/yt-dlp/yt-dlp/issues/6957)) by [bashonly](https://github.com/bashonly) + - spaces: [Add `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/1c16d9df5330819cc79ad588b24aa5b72765c168) ([#7186](https://github.com/yt-dlp/yt-dlp/issues/7186)) by [CeruleanSky](https://github.com/CeruleanSky) +- **urplay**: [Extract all subtitles](https://github.com/yt-dlp/yt-dlp/commit/7bcd4813215ac98daa4949af2ffc677c78307a38) ([#7309](https://github.com/yt-dlp/yt-dlp/issues/7309)) by [hoaluvn](https://github.com/hoaluvn) +- **voot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4f7b11cc1c1cebf598107e00cd7295588ed484da) ([#7227](https://github.com/yt-dlp/yt-dlp/issues/7227)) by [bashonly](https://github.com/bashonly) +- **vrt**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/1a7dcca378e80a387923ee05c250d8ba122441c6) ([#6244](https://github.com/yt-dlp/yt-dlp/issues/6244)) by [bashonly](https://github.com/bashonly), [bergoid](https://github.com/bergoid), [jeroenj](https://github.com/jeroenj) +- **weverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b844a3f8b16500663e7ab6c6ec061cc9b30f71ac) ([#6711](https://github.com/yt-dlp/yt-dlp/issues/6711)) by [bashonly](https://github.com/bashonly) (With fixes in [fd5d93f](https://github.com/yt-dlp/yt-dlp/commit/fd5d93f7040f9776fd541f4e4079dad7d3b3fb4f)) +- **wevidi**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1ea15603d852971ed7d92f4de12808b27b3d9370) ([#6868](https://github.com/yt-dlp/yt-dlp/issues/6868)) by [truedread](https://github.com/truedread) +- **weyyak**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6dc00acf0f1f1107a626c21befd1691403e6aeeb) ([#7124](https://github.com/yt-dlp/yt-dlp/issues/7124)) by [ItzMaxTV](https://github.com/ItzMaxTV) +- **whyp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2c566ed14101673c651c08c306c30fa5b4010b85) ([#6803](https://github.com/yt-dlp/yt-dlp/issues/6803)) by [CoryTibbettsDev](https://github.com/CoryTibbettsDev) +- **wrestleuniverse** + - [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/c8561c6d03f025268d6d3972abeb47987c8d7cbb) by [bashonly](https://github.com/bashonly) + - [Fix extraction, add login](https://github.com/yt-dlp/yt-dlp/commit/ef8fb7f029b816dfc95600727d84400591a3b5c5) ([#6982](https://github.com/yt-dlp/yt-dlp/issues/6982)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **wykop**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/aed945e1b9b7d3af2a907e1a12e6508cc81d6a20) ([#6140](https://github.com/yt-dlp/yt-dlp/issues/6140)) by [selfisekai](https://github.com/selfisekai) +- **ximalaya**: [Sort playlist entries](https://github.com/yt-dlp/yt-dlp/commit/8790ea7b2536332777bce68590386b1aa935fac7) ([#7292](https://github.com/yt-dlp/yt-dlp/issues/7292)) by [linsui](https://github.com/linsui) +- **YahooGyaOIE, YahooGyaOPlayerIE**: [Delete extractors due to website close](https://github.com/yt-dlp/yt-dlp/commit/68be95bd0ca3f76aa63c9812935bd826b3a42e53) ([#6218](https://github.com/yt-dlp/yt-dlp/issues/6218)) by [Lesmiscore](https://github.com/Lesmiscore) +- **yappy**: YappyProfile: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6f69101dc912690338d32e2aab085c32e44eba3f) ([#7346](https://github.com/yt-dlp/yt-dlp/issues/7346)) by [7vlad7](https://github.com/7vlad7) +- **youku**: [Improve error message](https://github.com/yt-dlp/yt-dlp/commit/ef0848abd425dfda6db62baa8d72897eefb0007f) ([#6690](https://github.com/yt-dlp/yt-dlp/issues/6690)) by [carusocr](https://github.com/carusocr) +- **youporn**: [Extract m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/ddae33754ae1f32dd9c64cf895c47d20f6b5f336) by [pukkandan](https://github.com/pukkandan) +- **youtube** + - [Add client name to `format_note` when `-v`](https://github.com/yt-dlp/yt-dlp/commit/c795c39f27244cbce846067891827e4847036441) ([#6254](https://github.com/yt-dlp/yt-dlp/issues/6254)) by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) + - [Add extractor-arg `include_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/86cb922118b236306310a72657f70426c20e28bb) by [pukkandan](https://github.com/pukkandan) + - [Bypass throttling for `-f17`](https://github.com/yt-dlp/yt-dlp/commit/c9abebb851e6188cb34b9eb744c1863dd46af919) by [pukkandan](https://github.com/pukkandan) + - [Construct fragment list lazily](https://github.com/yt-dlp/yt-dlp/commit/2a23d92d9ec44a0168079e38bcf3d383e5c4c7bb) by [pukkandan](https://github.com/pukkandan) (With fixes in [e389d17](https://github.com/yt-dlp/yt-dlp/commit/e389d172b6f42e4f332ae679dc48543fb7b9b61d)) + - [Define strict uploader metadata mapping](https://github.com/yt-dlp/yt-dlp/commit/7666b93604b97e9ada981c6b04ccf5605dd1bd44) ([#6384](https://github.com/yt-dlp/yt-dlp/issues/6384)) by [coletdjnz](https://github.com/coletdjnz) + - [Determine audio language using automatic captions](https://github.com/yt-dlp/yt-dlp/commit/ff9b0e071ffae5543cc309e6f9e647ac51e5846e) by [pukkandan](https://github.com/pukkandan) + - [Extract `channel_is_verified`](https://github.com/yt-dlp/yt-dlp/commit/8213ce28a485e200f6a7e1af1434a987c8e702bd) ([#7213](https://github.com/yt-dlp/yt-dlp/issues/7213)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract `heatmap` data](https://github.com/yt-dlp/yt-dlp/commit/5caf30dbc34f10b0be60676fece635b5c59f0d72) ([#7100](https://github.com/yt-dlp/yt-dlp/issues/7100)) by [tntmod54321](https://github.com/tntmod54321) + - [Extract more metadata for comments](https://github.com/yt-dlp/yt-dlp/commit/c35448b7b14113b35c4415dbfbf488c4731f006f) ([#7179](https://github.com/yt-dlp/yt-dlp/issues/7179)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract uploader metadata for feed/playlist items](https://github.com/yt-dlp/yt-dlp/commit/93e12ed76ef49252dc6869b59d21d0777e5e11af) by [coletdjnz](https://github.com/coletdjnz) + - [Fix comment loop detection for pinned comments](https://github.com/yt-dlp/yt-dlp/commit/141a8dff98874a426d7fbe772e0a8421bb42656f) ([#6714](https://github.com/yt-dlp/yt-dlp/issues/6714)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix continuation loop with no comments](https://github.com/yt-dlp/yt-dlp/commit/18f8fba7c89a87f99cc3313a1795848867e84fff) ([#7148](https://github.com/yt-dlp/yt-dlp/issues/7148)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix parsing `comment_count`](https://github.com/yt-dlp/yt-dlp/commit/071670cbeaa01ddf2cc20a95ae6da25f8f086431) ([#6523](https://github.com/yt-dlp/yt-dlp/issues/6523)) by [nick-cd](https://github.com/nick-cd) + - [Handle incomplete initial data from watch page](https://github.com/yt-dlp/yt-dlp/commit/607510b9f2f67bfe7d33d74031a5c1fe22a24862) ([#6510](https://github.com/yt-dlp/yt-dlp/issues/6510)) by [coletdjnz](https://github.com/coletdjnz) + - [Ignore wrong fps of some formats](https://github.com/yt-dlp/yt-dlp/commit/97afb093d4cbe5df889145afa5f9ede4535e93e4) by [pukkandan](https://github.com/pukkandan) + - [Misc cleanup](https://github.com/yt-dlp/yt-dlp/commit/14a14335b280766fbf5a469ae26836d6c1fe450a) by [coletdjnz](https://github.com/coletdjnz) + - [Prioritize premium formats](https://github.com/yt-dlp/yt-dlp/commit/51a07b0dca4c079d58311c19b6d1c097c24bb021) by [pukkandan](https://github.com/pukkandan) + - [Revert default formats to `https`](https://github.com/yt-dlp/yt-dlp/commit/c6786ff3baaf72a5baa4d56d34058e54cbcf8ceb) by [pukkandan](https://github.com/pukkandan) + - [Support podcasts and releases tabs](https://github.com/yt-dlp/yt-dlp/commit/447afb9eaa65bc677e3245c83e53a8e69c174a3c) by [coletdjnz](https://github.com/coletdjnz) + - [Support shorter relative time format](https://github.com/yt-dlp/yt-dlp/commit/2fb35f6004c7625f0dd493da4a5abf0690f7777c) ([#7191](https://github.com/yt-dlp/yt-dlp/issues/7191)) by [coletdjnz](https://github.com/coletdjnz) + - music_search_url: [Extract title](https://github.com/yt-dlp/yt-dlp/commit/69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2) ([#7102](https://github.com/yt-dlp/yt-dlp/issues/7102)) by [kangalio](https://github.com/kangalio) +- **zaiko** + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/345b4c0aedd9d19898ce00d5cef35fe0d277a052) ([#7254](https://github.com/yt-dlp/yt-dlp/issues/7254)) by [c-basalt](https://github.com/c-basalt) + - ZaikoETicket: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5cc09c004bd5edbbada9b041c08a720cadc4f4df) ([#7347](https://github.com/yt-dlp/yt-dlp/issues/7347)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **zdf**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ee0ed0338df328cd986f97315c8162b5a151476d) by [bashonly](https://github.com/bashonly) +- **zee5**: [Fix extraction of new content](https://github.com/yt-dlp/yt-dlp/commit/9d7fde89a40360396f0baa2ee8bf507f92108b32) ([#7280](https://github.com/yt-dlp/yt-dlp/issues/7280)) by [bashonly](https://github.com/bashonly) +- **zingmp3**: [Fix and improve extractors](https://github.com/yt-dlp/yt-dlp/commit/17d7ca84ea723c20668bd9bfa938be7ea0e64f6b) ([#6367](https://github.com/yt-dlp/yt-dlp/issues/6367)) by [hatienl0i261299](https://github.com/hatienl0i261299) +- **zoom** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/79c77e85b70ae3b9942d5a88c14d021a9bd24222) ([#6741](https://github.com/yt-dlp/yt-dlp/issues/6741)) by [shreyasminocha](https://github.com/shreyasminocha) + - [Fix share URL extraction](https://github.com/yt-dlp/yt-dlp/commit/90c1f5120694105496a6ad9e3ecfc6c25de6cae1) ([#6789](https://github.com/yt-dlp/yt-dlp/issues/6789)) by [bashonly](https://github.com/bashonly) + +#### Downloader changes +- **curl**: [Fix progress reporting](https://github.com/yt-dlp/yt-dlp/commit/66aeaac9aa30b5959069ba84e53a5508232deb38) by [pukkandan](https://github.com/pukkandan) +- **fragment**: [Do not sleep between fragments](https://github.com/yt-dlp/yt-dlp/commit/424f3bf03305088df6e01d62f7311be8601ad3f4) by [pukkandan](https://github.com/pukkandan) + +#### Postprocessor changes +- [Fix chapters if duration is not extracted](https://github.com/yt-dlp/yt-dlp/commit/01ddec7e661bf90dc4c34e6924eb9d7629886cef) ([#6037](https://github.com/yt-dlp/yt-dlp/issues/6037)) by [bashonly](https://github.com/bashonly) +- [Print newline for `--progress-template`](https://github.com/yt-dlp/yt-dlp/commit/13ff78095372fd98900a32572cf817994c07ccb5) by [pukkandan](https://github.com/pukkandan) +- **EmbedThumbnail, FFmpegMetadata**: [Fix error on attaching thumbnails and info json for mkv/mka](https://github.com/yt-dlp/yt-dlp/commit/0f0875ed555514f32522a0f30554fb08825d5124) ([#6647](https://github.com/yt-dlp/yt-dlp/issues/6647)) by [Lesmiscore](https://github.com/Lesmiscore) +- **FFmpegFixupM3u8PP**: [Check audio codec before fixup](https://github.com/yt-dlp/yt-dlp/commit/3f7e2bd80e3c5d8a1682f20a1b245fcd974f295d) ([#6778](https://github.com/yt-dlp/yt-dlp/issues/6778)) by [bashonly](https://github.com/bashonly) +- **FixupDuplicateMoov**: [Fix bug in triggering](https://github.com/yt-dlp/yt-dlp/commit/26010b5cec50193b98ad7845d1d77450f9f14c2b) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- [Add automatic duplicate issue detection](https://github.com/yt-dlp/yt-dlp/commit/15b2d3db1d40b0437fca79d8874d392aa54b3cdd) by [pukkandan](https://github.com/pukkandan) +- **build** + - [Fix macOS target](https://github.com/yt-dlp/yt-dlp/commit/44a79958f0b596ee71e1eb25f158610aada29d1b) by [Grub4K](https://github.com/Grub4K) + - [Implement build verification using `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/b73193c99aa23b135732408a5fcf655c68d731c6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Pin `pyinstaller` version for MacOS](https://github.com/yt-dlp/yt-dlp/commit/427a8fafbb0e18c28d0ed7960be838d7b26b88d3) by [pukkandan](https://github.com/pukkandan) + - [Various build workflow improvements](https://github.com/yt-dlp/yt-dlp/commit/c4efa0aefec8daef1de62fd1693f13edf3c8b03c) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- **cleanup** + - Miscellaneous + - [6f2287c](https://github.com/yt-dlp/yt-dlp/commit/6f2287cb18cbfb27518f068d868fa9390fee78ad) by [pukkandan](https://github.com/pukkandan) + - [ad54c91](https://github.com/yt-dlp/yt-dlp/commit/ad54c9130e793ce433bf9da334fa80df9f3aee58) by [freezboltz](https://github.com/freezboltz), [mikf](https://github.com/mikf), [pukkandan](https://github.com/pukkandan) +- **cleanup, utils**: [Split into submodules](https://github.com/yt-dlp/yt-dlp/commit/69bec6730ec9d724bcedeab199d9d684d61423ba) ([#7090](https://github.com/yt-dlp/yt-dlp/issues/7090)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +- **cli_to_api**: [Add script](https://github.com/yt-dlp/yt-dlp/commit/46f1370e9af6f8af8762f67e27e5acb8f0c48a47) by [pukkandan](https://github.com/pukkandan) +- **devscripts**: `make_changelog`: [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/23c39a4beadee382060bb47fdaa21316ca707d38) by [Grub4K](https://github.com/Grub4K) +- **docs**: [Misc improvements](https://github.com/yt-dlp/yt-dlp/commit/c8bc203fbf3bb09914e53f0833eed622ab7edbb9) by [pukkandan](https://github.com/pukkandan) + +### 2023.03.04 + +#### Extractor changes +- bilibili + - [Fix for downloading wrong subtitles](https://github.com/yt-dlp/yt-dlp/commit/8a83baaf218ab89e6e7faa76b7c7be3a2ec19e3a) ([#6358](https://github.com/yt-dlp/yt-dlp/issues/6358)) by [LXYan2333](https://github.com/LXYan2333) +- ESPNcricinfo + - [Handle new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/640c934823fc2d1ec77ec932566078014058635f) ([#6321](https://github.com/yt-dlp/yt-dlp/issues/6321)) by [venkata-krishnas](https://github.com/venkata-krishnas) +- lefigaro + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/eb8fd6d044e8926532772b72be0645c6b8ecb3aa) ([#6309](https://github.com/yt-dlp/yt-dlp/issues/6309)) by [elyse0](https://github.com/elyse0) +- lumni + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1f8489cccbdc6e96027ef527b88717458f0900e8) ([#6302](https://github.com/yt-dlp/yt-dlp/issues/6302)) by [carusocr](https://github.com/carusocr) +- Prankcast + - [Fix tags](https://github.com/yt-dlp/yt-dlp/commit/ed4cc4ea793314c50ae3f82e98248c1de1c25694) ([#6316](https://github.com/yt-dlp/yt-dlp/issues/6316)) by [columndeeply](https://github.com/columndeeply) +- rutube + - [Extract chapters from description](https://github.com/yt-dlp/yt-dlp/commit/22ccd5420b3eb0782776071f12cccd1fedaa1fd0) ([#6345](https://github.com/yt-dlp/yt-dlp/issues/6345)) by [mushbite](https://github.com/mushbite) +- SportDeutschland + - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/45db357289b4e1eec09093c8bc5446520378f426) by [pukkandan](https://github.com/pukkandan) +- telecaribe + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b40471282286bd2b09c485bf79afd271d229272c) ([#6311](https://github.com/yt-dlp/yt-dlp/issues/6311)) by [elyse0](https://github.com/elyse0) +- tubetugraz + - [Support `--twofactor` (#6424)](https://github.com/yt-dlp/yt-dlp/commit/f44cb4e77bb9be8be291d02ab6f79dc0b4c0d4a1) ([#6427](https://github.com/yt-dlp/yt-dlp/issues/6427)) by [Ferdi265](https://github.com/Ferdi265) +- tunein + - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/46580ced56c90b559885aded6aa8f46f20a9cdce) ([#6310](https://github.com/yt-dlp/yt-dlp/issues/6310)) by [elyse0](https://github.com/elyse0) +- twitch + - [Update for GraphQL API changes](https://github.com/yt-dlp/yt-dlp/commit/4a6272c6d1bff89969b67cd22b26ebe6d7e72279) ([#6318](https://github.com/yt-dlp/yt-dlp/issues/6318)) by [elyse0](https://github.com/elyse0) +- twitter + - [Fix retweet extraction](https://github.com/yt-dlp/yt-dlp/commit/cf605226521e99c89fc8dff26a319025810e63a0) ([#6422](https://github.com/yt-dlp/yt-dlp/issues/6422)) by [selfisekai](https://github.com/selfisekai) +- xvideos + - quickies: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/283a0b5bc511f3b350eead4488158f50c20ec526) ([#6414](https://github.com/yt-dlp/yt-dlp/issues/6414)) by [Yakabuff](https://github.com/Yakabuff) + +#### Misc. changes +- build + - [Fix publishing to PyPI and homebrew](https://github.com/yt-dlp/yt-dlp/commit/55676fe498345a389a2539d8baaba958d6d61c3e) by [bashonly](https://github.com/bashonly) + - [Only archive if `vars.ARCHIVE_REPO` is set](https://github.com/yt-dlp/yt-dlp/commit/08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52) by [Grub4K](https://github.com/Grub4K) +- cleanup + - Miscellaneous: [392389b](https://github.com/yt-dlp/yt-dlp/commit/392389b7df7b818f794b231f14dc396d4875fbad) by [pukkandan](https://github.com/pukkandan) +- devscripts + - `make_changelog`: [Stop at `Release ...` commit](https://github.com/yt-dlp/yt-dlp/commit/7accdd9845fe7ce9d0aa5a9d16faaa489c1294eb) by [pukkandan](https://github.com/pukkandan) + +### 2023.03.03 + +#### Important changes +- **A new release type has been added!** + * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs). + * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`). + * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades). + * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags. + * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG` +- **YouTube throttling fixes!** + +#### Core changes +- [Add option `--break-match-filters`](https://github.com/yt-dlp/yt-dlp/commit/fe2ce85aff0aa03735fc0152bb8cb9c3d4ef0753) by [pukkandan](https://github.com/pukkandan) +- [Fix `--break-on-existing` with `--lazy-playlist`](https://github.com/yt-dlp/yt-dlp/commit/d21056f4cf0a1623daa107f9181074f5725ac436) by [pukkandan](https://github.com/pukkandan) +- dependencies + - [Simplify `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/65f6e807804d2af5e00f2aecd72bfc43af19324a) by [pukkandan](https://github.com/pukkandan) +- jsinterp + - [Handle `Date` at epoch 0](https://github.com/yt-dlp/yt-dlp/commit/9acf1ee25f7ad3920ede574a9de95b8c18626af4) by [pukkandan](https://github.com/pukkandan) +- plugins + - [Don't look in `.egg` directories](https://github.com/yt-dlp/yt-dlp/commit/b059188383eee4fa336ef728dda3ff4bb7335625) by [pukkandan](https://github.com/pukkandan) +- update + - [Add option `--update-to`, including to nightly](https://github.com/yt-dlp/yt-dlp/commit/77df20f14cc9ed41dfe3a1fe2d77fd27f5365a94) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +- utils + - `LenientJSONDecoder`: [Parse unclosed objects](https://github.com/yt-dlp/yt-dlp/commit/cc09083636ce21e58ff74f45eac2dbda507462b0) by [pukkandan](https://github.com/pukkandan) + - `Popen`: [Shim undocumented `text_mode` property](https://github.com/yt-dlp/yt-dlp/commit/da8e2912b165005f76779a115a071cd6132ceedf) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Fix DRM detection in m3u8](https://github.com/yt-dlp/yt-dlp/commit/43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5) by [pukkandan](https://github.com/pukkandan) +- generic + - [Detect manifest links via extension](https://github.com/yt-dlp/yt-dlp/commit/b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e) by [bashonly](https://github.com/bashonly) + - [Handle basic-auth when checking redirects](https://github.com/yt-dlp/yt-dlp/commit/8e9fe43cd393e69fa49b3d842aa3180c1d105b8f) by [pukkandan](https://github.com/pukkandan) +- GoogleDrive + - [Fix some audio](https://github.com/yt-dlp/yt-dlp/commit/4d248e29d20d983ededab0b03d4fe69dff9eb4ed) by [pukkandan](https://github.com/pukkandan) +- iprima + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9fddc12ab022a31754e0eaa358fc4e1dfa974587) ([#6291](https://github.com/yt-dlp/yt-dlp/issues/6291)) by [std-move](https://github.com/std-move) +- mediastream + - [Improve WinSports support](https://github.com/yt-dlp/yt-dlp/commit/2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e) ([#6401](https://github.com/yt-dlp/yt-dlp/issues/6401)) by [bashonly](https://github.com/bashonly) +- ntvru + - [Extract HLS and DASH formats](https://github.com/yt-dlp/yt-dlp/commit/77d6d136468d0c23c8e79bc937898747804f585a) ([#6403](https://github.com/yt-dlp/yt-dlp/issues/6403)) by [bashonly](https://github.com/bashonly) +- tencent + - [Add more formats and info](https://github.com/yt-dlp/yt-dlp/commit/18d295c9e0f95adc179eef345b7af64d6372db78) ([#5950](https://github.com/yt-dlp/yt-dlp/issues/5950)) by [Hill-98](https://github.com/Hill-98) +- yle_areena + - [Extract non-Kaltura videos](https://github.com/yt-dlp/yt-dlp/commit/40d77d89027cd0e0ce31d22aec81db3e1d433900) ([#6402](https://github.com/yt-dlp/yt-dlp/issues/6402)) by [bashonly](https://github.com/bashonly) +- youtube + - [Construct dash formats with `range` query](https://github.com/yt-dlp/yt-dlp/commit/5038f6d713303e0967d002216e7a88652401c22a) by [pukkandan](https://github.com/pukkandan) (With fixes in [f34804b](https://github.com/yt-dlp/yt-dlp/commit/f34804b2f920f62a6e893a14a9e2a2144b14dd23) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)) + - [Detect and break on looping comments](https://github.com/yt-dlp/yt-dlp/commit/7f51861b1820c37b157a239b1fe30628d907c034) ([#6301](https://github.com/yt-dlp/yt-dlp/issues/6301)) by [coletdjnz](https://github.com/coletdjnz) + - [Extract channel `view_count` when `/about` tab is passed](https://github.com/yt-dlp/yt-dlp/commit/31e183557fcd1b937582f9429f29207c1261f501) by [pukkandan](https://github.com/pukkandan) + +#### Misc. changes +- build + - [Add `cffi` as a dependency for `hypervideo_dl_linux`](https://github.com/yt-dlp/yt-dlp/commit/776d1c3f0c9b00399896dd2e40e78e9a43218109) by [bashonly](https://github.com/bashonly) + - [Automated builds and nightly releases](https://github.com/yt-dlp/yt-dlp/commit/29cb20bd563c02671b31dd840139e93dd37150a1) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [bfc861a](https://github.com/yt-dlp/yt-dlp/commit/bfc861a91ee65c9b0ac169754f512e052c6827cf) by [pukkandan](https://github.com/pukkandan)) + - [Sign SHA files and release public key](https://github.com/yt-dlp/yt-dlp/commit/12647e03d417feaa9ea6a458bea5ebd747494a53) by [Grub4K](https://github.com/Grub4K) +- cleanup + - [Fix `Changelog`](https://github.com/yt-dlp/yt-dlp/commit/17ca19ab60a6a13eb8a629c51442b5248b0d8394) by [pukkandan](https://github.com/pukkandan) + - jsinterp: [Give functions names to help debugging](https://github.com/yt-dlp/yt-dlp/commit/b2e0343ba0fc5d8702e90f6ba2b71358e2677e0b) by [pukkandan](https://github.com/pukkandan) + - Miscellaneous: [4815bbf](https://github.com/yt-dlp/yt-dlp/commit/4815bbfc41cf641e4a0650289dbff968cb3bde76), [5b28cef](https://github.com/yt-dlp/yt-dlp/commit/5b28cef72db3b531680d89c121631c73ae05354f) by [pukkandan](https://github.com/pukkandan) +- devscripts + - [Script to generate changelog](https://github.com/yt-dlp/yt-dlp/commit/d400e261cf029a3f20d364113b14de973be75404) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [Grub4K](https://github.com/Grub4K) (With fixes in [9344964](https://github.com/yt-dlp/yt-dlp/commit/93449642815a6973a4b09b289982ca7e1f961b5f)) + +### 2023.02.17 + +* Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) +* Fix `--concat-playlist` +* Imply `--no-progress` when `--print` +* Improve default subtitle language selection by [sdht0](https://github.com/sdht0) +* Make `title` completely non-fatal +* Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan) +* Support module level `__bool__` and `property` +* [dependencies] Standardize `Cryptodome` imports +* [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr) +* [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912) +* [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly) +* [jsinterp] Support `if` statements +* [plugins] Fix zip search paths +* [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K) +* [utils] `traverse_obj`: Fix more bugs +* [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K) +* [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars) +* [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu) +* [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K) +* [compat_utils] Improve `passthrough_module` +* [compat_utils] Simplify `EnhancedModule` +* [build] Update pyinstaller +* [pyinst] Fix for pyinstaller 5.8 +* [devscripts] Provide `pyinstaller` hooks +* [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome` +* [cleanup] Misc fixes and cleanup +* [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +* [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra) +* [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) +* [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) +* [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf) +* [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Handle `consent.youtube` +* [extractor/youtube] Support `/live/` URL +* [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb) +* [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86) +* [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr) +* [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz) +* [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream) +* [extractor/crunchyroll] Better message for premium videos +* [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K) +* [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299) +* [extractor/embedly] Embedded links may be for other extractors +* [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001) +* [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj) +* [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor) +* [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars) +* [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa) +* [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly) +* [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly) +* [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger) +* [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly) +* [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +* [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR) +* [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev) +* [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246) +* [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master) +* [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER) +* [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan) +* [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +* [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly) +* [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren) +* [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly) +* [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +* [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai) +* [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K) +* [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai) +* [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly) +* [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev) +* [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr) +* [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev) +* [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) + + +### 2023.01.06 + +* Fix config locations by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) +* [downloader/aria2c] Disable native progress +* [utils] `mimetype2ext`: `weba` is not standard +* [utils] `windows_enable_vt_mode`: Better error handling +* [build] Add minimal `pyproject.toml` +* [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K) +* [cleanup] Misc fixes and cleanup +* [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS) +* [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou) +* [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812) +* [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan) +* [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman) +* [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas) +* [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung) + +### 2023.01.02 + +* **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) + * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information +* Add `--compat-options 2021,2022` + * This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes. +* [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) +* Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) +* Add pre-processor stage `video` +* Let `--parse/replace-in-metadata` run at any post-processing stage +* Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz) +* Add new field `aspect_ratio` +* Add `ac4` to known codecs +* Add `weba` to known extensions +* [FFmpegVideoConvertor] Add `gif` to `--recode-video` +* Add message when there are no subtitles/thumbnails +* Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore) +* Make early reject of `--match-filter` stricter +* Fix `--cookies-from-browser` CLI parsing +* Fix `original_url` in playlists +* Fix bug in writing playlist info-json +* Fix bugs in `PlaylistEntries` +* [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor] Add a way to distinguish IEs that returns only videos +* [extractor] Implement universal format sorting and deprecate `_sort_formats` +* [extractor] Let `_extract_format` functions obey `--ignore-no-formats` +* [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly) +* [extractor/generic] Don't report redirect to https +* [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz) +* [FormatSort] Add `mov` to `vext` +* [jsinterp] Escape regex that looks like nested set +* [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger) +* [utils] `classproperty`: Add cache support +* [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan) +* [utils] Make `ExtractorError` mutable +* [utils] Move `FileDownloader.parse_bytes` into utils +* [utils] Move format sorting code into `utils` +* [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K) +* [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632) +* [docs] Improvements +* [cleanup] Misc fixes and cleanup +* [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz) +* [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h) +* [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan) +* [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan) +* [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123) +* [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) +* [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger) +* [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly) +* [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321) +* [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly) +* [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife) +* [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly) +* [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123) +* [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro) +* [extractor/youtube] Consider language in format de-duplication +* [extractor/youtube] Extract DRC formats +* [extractor/youtube] Fix `ytuser:` +* [extractor/youtube] Fix bug in handling of music URLs +* [extractor/youtube] Subtitles cannot be translated to `und` +* [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz) +* [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet) +* [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow) +* [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes) +* [extractor/bilibili] Fix `--no-playlist` for anthology +* [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh) +* [extractor/biliintl:series] Make partial download of series faster +* [extractor/BiliLive] Fix extractor +* [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction +* [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai) +* [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda) +* [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn) +* [extractor/embedly] Handle vimeo embeds +* [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf) +* [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900) +* [extractor/hotstar] Improve format metadata +* [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly) +* [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo) +* [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo) +* [extractor/mixch] Support `--wait-for-video` +* [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly) +* [extractor/naver] Treat fan subtitles as separate language +* [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly) +* [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly) +* [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall) +* [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai) +* [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn) +* [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly) +* [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly) +* [extractor/rutube] Support private videos by [mexus](https://github.com/mexus) +* [extractor/sibnet] Separate from VKIE +* [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop) +* [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246) +* [extractor/stv] Detect DRM +* [extractor/swearnet] Fix description bug +* [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0) +* [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly) +* [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal) +* [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo) +* [extractor/udemy] Fix lectures that have no URL and detect DRM +* [extractor/unsupported] Add more URLs +* [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick) +* [extractor/wistia] Improve extension detection by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) +* [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender) +* [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor) +* [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell) +* [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd) + ### 2022.11.11 diff --git a/Makefile b/Makefile index a395a0e..3e0f3d8 100644 --- a/Makefile +++ b/Makefile @@ -13,9 +13,10 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md completions devscripts/* test .PHONY: all clean install test tar pypi-files completions ot offlinetest codetest clean-test: - rm -rf *.3gp *.annotations.xml *.ape *.avi *.description *.dump *.flac *.flv *.frag *.frag.aria2 *.frag.urls \ - *.info.json *.jpeg *.jpg *.live_chat.json *.m4a *.m4v *.mkv *.mp3 *.mp4 *.ogg *.opus *.part* *.png *.sbv *.srt \ - *.swf *.swp *.ttml *.vtt *.wav *.webm *.webp *.ytdl test/testdata/player-*.js + rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag *.frag.urls \ + *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ + *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 \ + *.mp4 *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.swf *.swp *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp *.ytdl clean-dist: rm -rf MANIFEST build/ dist/ .coverage cover/ hypervideo.tar.gz completions/ hypervideo_dl/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp hypervideo hypervideo.exe hypervideo_dl.egg-info/ AUTHORS .mailmap clean-cache: diff --git a/README.md b/README.md index 3548389..ffa2289 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ hypervideo - A fork of yt-dlp without nonfree parts * [Extractor Options](#extractor-options) * [CONFIGURATION](#configuration) * [Configuration file encoding](#configuration-file-encoding) - * [Authentication with .netrc file](#authentication-with-netrc-file) + * [Authentication with netrc](#authentication-with-netrc) * [Notes about environment variables](#notes-about-environment-variables) * [OUTPUT TEMPLATE](#output-template) * [Output template examples](#output-template-examples) @@ -41,7 +41,9 @@ hypervideo - A fork of yt-dlp without nonfree parts * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) -* [EMBEDDING HYPERVIDEO](#embedding-hypervideo) + * [Installing Plugins](#installing-plugins) + * [Developing Plugins](#developing-plugins) +* [EMBEDDING HYPERVIDEO](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) * [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) @@ -54,16 +56,16 @@ hypervideo - A fork of yt-dlp without nonfree parts # NEW FEATURES -* Merged with **youtube-dl v2021.12.17+ [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128)** and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) -* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. +* **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **YouTube improvements**: - * Supports Clips, Stories (`ytstories:`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, YouTube Music Albums/Channels ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)), and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) + * Supports Clips, Stories (`ytstories:`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** * Supports some (but not all) age-gated content without cookies * Download livestreams from the start using `--live-from-start` (*experimental*) @@ -92,12 +94,16 @@ hypervideo - A fork of yt-dlp without nonfree parts * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` -* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc +* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc * **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details +* **Self updater**: The releases can be updated using `yt-dlp -U`, and downgraded using `--update-to` if required + +* **Nightly builds**: [Automated nightly builds](#update-channels) can be used with `--update-to nightly` + See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes Features marked with a **\*** have been back-ported to youtube-dl @@ -106,6 +112,7 @@ Features marked with a **\*** have been back-ported to youtube-dl Some of hypervideo's default options are different from that of youtube-dl and youtube-dlc: +* yt-dlp supports only [Python 3.7+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * hypervideo stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations @@ -125,16 +132,20 @@ Some of hypervideo's default options are different from that of youtube-dl and y * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/hypervideo_dl/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead -* Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this +* Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` -* hypervideo's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [~~aria2c~~](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is +* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (Do NOT use) -* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` -* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` +* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter` +* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter` +* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` +* `--compat-options 2022`: Same as `--compat-options playlist-match-filter,no-external-downloader-progress`. Use this to enable all future compat options # INSTALLATION @@ -200,7 +211,7 @@ On some systems, you may need to use `py` or `python` instead of `python3`. `pyinst.py` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). -Note that pyinstaller with versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. +**Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Important**: Running `pyinstaller` directly **without** using `pyinst.py` is **not** officially supported. This may or may not work correctly. @@ -226,7 +237,10 @@ If you wish to build it anyway, install Python and py2exe, and then simply run ` * **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. -You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release +Note: See their `--help` for more info. + +### Forking the project +If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases. # USAGE AND OPTIONS @@ -285,7 +299,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi configuration files --flat-playlist Do not extract the videos of a playlist, only list them - --no-flat-playlist Extract the videos of a playlist + --no-flat-playlist Fully extract the videos of a playlist + (default) --live-from-start Download livestreams from the start. Currently only supported for YouTube (Experimental) @@ -297,8 +312,12 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --no-wait-for-video Do not wait for scheduled streams (default) --mark-watched Mark videos watched (even with --simulate) --no-mark-watched Do not mark videos watched (default) - --no-colors Do not emit color codes in output (Alias: - --no-colours) + --color [STREAM:]POLICY Whether to emit color codes in output, + optionally prefixed by the STREAM (stdout or + stderr) to apply the setting to. Can be one + of "always", "auto" (default), "never", or + "no_color" (use non color terminal + sequences). Can be used multiple times --compat-options OPTS Options that can help keep compatibility with youtube-dl or youtube-dlc configurations by reverting some of the @@ -330,6 +349,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --source-address IP Client-side IP address to bind to -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 + --enable-file-urls Enable file:// URLs. This is disabled by + default for security reasons. ## Geo-restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for @@ -337,34 +358,31 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi specified by --proxy (or none, if the option is not present) is used for the actual downloading - --geo-bypass Bypass geographic restriction via faking - X-Forwarded-For HTTP header (default) - --no-geo-bypass Do not bypass geographic restriction via - faking X-Forwarded-For HTTP header - --geo-bypass-country CODE Force bypass geographic restriction with - explicitly provided two-letter ISO 3166-2 - country code - --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with - explicitly provided IP block in CIDR notation + --xff VALUE How to fake X-Forwarded-For HTTP header to + try bypassing geographic restriction. One of + "default" (only when known to be useful), + "never", an IP block in CIDR notation, or a + two-letter ISO 3166-2 country code ## Video Selection: - -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the videos + -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items to download. You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. Use negative indices to count from the right and negative STEP to download in reverse order. E.g. "-I 1:3,7,-5::2" used on a - playlist of size 15 will download the videos + playlist of size 15 will download the items at index 1,2,3,7,11,13,15 --min-filesize SIZE Abort download if filesize is smaller than SIZE, e.g. 50k or 44.6M - --max-filesize SIZE Abort download if filesize if larger than + --max-filesize SIZE Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format - [now|today|yesterday][-N[day|week|month|year - ]]. E.g. --date today-2weeks + [now|today|yesterday][-N[day|week|month|year]]. + E.g. "--date today-2weeks" downloads only + videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date @@ -391,7 +409,10 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi dogs" (caseless). Use "--match-filter -" to interactively ask whether to download each video - --no-match-filter Do not use generic video filter (default) + --no-match-filters Do not use any --match-filter (default) + --break-match-filters FILTER Same as "--match-filters" but stops the + download process when a video is rejected + --no-break-match-filters Do not use any --break-match-filters (default) --no-playlist Download only the video, if the URL refers to a video and a playlist --yes-playlist Download the playlist, if the URL refers to @@ -405,11 +426,9 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --max-downloads NUMBER Abort after downloading NUMBER files --break-on-existing Stop the download process when encountering a file that is in the archive - --break-on-reject Stop the download process when encountering - a file that has been filtered out - --break-per-input --break-on-existing, --break-on-reject, - --max-downloads, and autonumber resets per - input URL + --break-per-input Alters --max-downloads, --break-on-existing, + --break-match-filter, and autonumber to + reset per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue --skip-playlist-after-errors N Number of allowed failures until the rest of @@ -441,8 +460,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi linear=1::2 --retry-sleep fragment:exp=1:20 --skip-unavailable-fragments Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) - (Alias: --no-abort-on-unavailable-fragment) - --abort-on-unavailable-fragment + (Alias: --no-abort-on-unavailable-fragments) + --abort-on-unavailable-fragments Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments) --keep-fragments Keep downloaded fragments on disk after @@ -477,12 +496,14 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --no-hls-use-mpegts Do not use the mpegts container for HLS videos. This is default when not downloading live streams - --download-sections REGEX Download only chapters whose title matches - the given regular expression. Time ranges - prefixed by a "*" can also be used in place - of chapters to download the specified range. - Needs ffmpeg. This option can be used - multiple times to download multiple + --download-sections REGEX Download only chapters that match the + regular expression. A "*" prefix denotes + time-range instead of chapter. Negative + timestamps are calculated from the end. + "*from-url" can be used to download between + the "start_time" and "end_time" extracted + from the URL. Needs ffmpeg. This option can + be used multiple times to download multiple sections, e.g. --download-sections "*10:15-inf" --download-sections "intro" --downloader [PROTO:]NAME Name or path of the external downloader to @@ -566,9 +587,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --write-description etc. (default) --no-write-playlist-metafiles Do not write playlist metadata when using --write-info-json, --write-description etc. - --clean-info-json Remove some private fields such as filenames - from the infojson. Note that it could still - contain some personal information (default) + --clean-info-json Remove some internal metadata such as + filenames from the infojson (default) --no-clean-info-json Write all fields to the infojson --write-comments Retrieve video comments to be placed in the infojson. The comments are fetched even @@ -596,7 +616,7 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi By default, all containers of the most recently accessed profile are used. Currently supported keyrings are: basictext, - gnomekeyring, kwallet + gnomekeyring, kwallet, kwallet5, kwallet6 --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where hypervideo can store some downloaded information (such @@ -624,6 +644,7 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi ## Verbosity and Simulation Options: -q, --quiet Activate quiet mode. If used with --verbose, print the log to stderr + --no-quiet Deactivate quiet mode. (Default) --no-warnings Ignore warnings -s, --simulate Do not download the video and do not write anything to disk @@ -641,7 +662,7 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of - --use-postprocessor, and "video" (default). + --use-postprocessor (default: video). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times @@ -694,7 +715,7 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) - --add-header FIELD:VALUE Specify a custom HTTP header and its value, + --add-headers FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times --bidi-workaround Work around terminals that lack @@ -776,6 +797,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --netrc-location PATH Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc + --netrc-cmd NETRC_CMD Command to execute to get the credentials + for an extractor. --video-password PASSWORD Video password (vimeo, youku) --ap-mso MSO Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for @@ -810,11 +833,11 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if necessary (currently supported: avi, flv, - mkv, mov, mp4, webm, aac, aiff, alac, flac, - m4a, mka, mp3, ogg, opus, vorbis, wav). If - target container does not support the - video/audio codec, remuxing will fail. You - can specify multiple rules; e.g. + gif, mkv, mov, mp4, webm, aac, aiff, alac, + flac, m4a, mka, mp3, ogg, opus, vorbis, + wav). If target container does not support + the video/audio codec, remuxing will fail. + You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if @@ -869,13 +892,18 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file - --parse-metadata FROM:TO Parse additional metadata like title/artist + --parse-metadata [WHEN:]FROM:TO + Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" - for details - --replace-in-metadata FIELDS REGEX REPLACE + for details. Supported values of "WHEN" are + the same as that of --use-postprocessor + (default: pre_process) + --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used - multiple times + multiple times. Supported values of "WHEN" + are the same as that of --use-postprocessor + (default: pre_process) --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of @@ -896,16 +924,13 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with - when to execute it (after_move if - unspecified), separated by a ":". Supported - values of "WHEN" are the same as that of - --use-postprocessor. Same syntax as the - output template can be used to pass any - field as arguments to the command. After - download, an additional field "filepath" - that contains the final path of the - downloaded file is also available, and if no - fields are passed, %(filepath)q is appended + when to execute it, separated by a ":". + Supported values of "WHEN" are the same as + that of --use-postprocessor (default: + after_move). Same syntax as the output + template can be used to pass any field as + arguments to the command. If no fields are + passed, %(filepath,_filename|)q is appended to the end of the command. This option can be used multiple times --no-exec Remove any previously defined --exec @@ -945,19 +970,21 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), - "before_dl" (before each video download), - "post_process" (after each video download; - default), "after_move" (after moving video - file to it's final locations), "after_video" - (after downloading and processing all - formats of a video), or "playlist" (at end - of playlist). This option can be used - multiple times to add different postprocessors + "video" (after --format; before + --print/--output), "before_dl" (before each + video download), "post_process" (after each + video download; default), "after_move" + (after moving video file to it's final + locations), "after_video" (after downloading + and processing all formats of a video), or + "playlist" (at end of playlist). This option + can be used multiple times to add different + postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, introductions, etc.) from downloaded YouTube videos using the - SponsorBlock API (https://sponsor.ajay.app) + [SponsorBlock API](https://sponsor.ajay.app) --sponsorblock-mark CATS SponsorBlock categories to create chapters for, separated by commas. Available @@ -1047,7 +1074,7 @@ E.g. with the following configuration file hypervideo will always extract the au -o ~/YouTube/%(title)s.%(ext)s ``` -Note that options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. +**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular hypervideo run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. @@ -1057,7 +1084,7 @@ The configuration files are decoded according to the UTF BOM if present, and in If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM. -### Authentication with `.netrc` file +### Authentication with netrc You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every hypervideo execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: ``` @@ -1077,6 +1104,14 @@ To activate authentication with the `.netrc` file you should pass `--netrc` to h The default location of the .netrc file is `~` (see below). +As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor. + +E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` +``` +yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc +``` + + ### Notes about environment variables * Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation * hypervideo also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` @@ -1106,7 +1141,7 @@ The field names themselves (the part inside the parenthesis) can also have some 1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s` -1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. +1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s` @@ -1121,9 +1156,9 @@ To summarize, the general syntax for a field is: Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. - + -Note: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. +**Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. The available fields are: @@ -1147,6 +1182,7 @@ The available fields are: - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel - `channel_follower_count` (numeric): Number of followers of the channel + - `channel_is_verified` (boolean): Whether the channel is verified on the platform - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `duration_string` (string): Length of the video (HH:mm:ss) @@ -1168,7 +1204,7 @@ The available fields are: - `extractor` (string): Name of the extractor - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch of when the information extraction was completed - - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` + - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits - `video_autonumber` (numeric): Number that will be increased with each video - `n_entries` (numeric): Total number of extracted items in the playlist - `playlist_id` (string): Identifier of the playlist that contains the video @@ -1231,7 +1267,6 @@ Available only when used in `--print`: - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` - Available only in `--sponsorblock-chapter-title`: - `start_time` (numeric): Start time of the chapter in seconds @@ -1244,7 +1279,7 @@ Available only in `--sponsorblock-chapter-title`: Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `hypervideo test video` and id `BaW_jenozKc`, this will result in a `hypervideo test video-BaW_jenozKc.mp4` file created in the current directory. -Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). +**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Tip**: Look at the `-j` output to identify which fields are available for the particular URL @@ -1351,7 +1386,7 @@ Unless `--video-multistreams` is used, all formats with a video stream except th ## Filtering Formats -You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). +You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): @@ -1385,9 +1420,9 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. -Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. +**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. -Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. +Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480. @@ -1405,13 +1440,13 @@ The available fields are: - `source`: The preference of the source - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) - - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `eac3` > `ac3` > `dts` > other) + - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac4` > `eac3` > `ac3` > `dts` > other) - `codec`: Equivalent to `vcodec,acodec` - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` - `ext`: Equivalent to `vext,aext` - `filesize`: Exact filesize, if known in advance - - `fs_approx`: Approximate filesize calculated from the manifests + - `fs_approx`: Approximate filesize - `size`: Exact filesize if available, otherwise approximate filesize - `height`: Height of video - `width`: Width of video @@ -1422,7 +1457,7 @@ The available fields are: - `tbr`: Total average bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s - - `br`: Equivalent to using `tbr,vbr,abr` + - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr` - `asr`: Audio sample rate in Hz **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. @@ -1572,7 +1607,7 @@ Note that these options preserve their relative order, allowing replacements to This option also has a few special uses: -* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description +* You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)"` will download the first vimeo video found in the description * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values. @@ -1635,17 +1670,20 @@ $ hypervideo --replace-in-metadata "title,uploader" "[ _]" "-" Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` + The following extractors use this feature: #### youtube -* `lang`: Language code to prefer translated metadata of this language (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/hypervideo_dl/extractor/youtube.py#L381-L390) for list of supported content language codes +* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/hypervideo_dl/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details +* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total -* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8) +* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests @@ -1654,7 +1692,10 @@ The following extractors use this feature: * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off #### generic -* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg +* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg +* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE` +* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist +* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` #### funimation * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` @@ -1682,6 +1723,7 @@ The following extractors use this feature: * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### tiktok +* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` * `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` @@ -1689,9 +1731,18 @@ The following extractors use this feature: * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` #### twitter -* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided +* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed + +#### stacommu, wrestleuniverse +* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage + +#### twitch +* `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitch:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko` -NOTE: These options may be changed/removed in the future without concern for backward compatibility +#### nhkradirulive (NHK らじる★らじる LIVE) +* `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo` + +**Note**: These options may be changed/removed in the future without concern for backward compatibility @@ -1700,17 +1751,69 @@ NOTE: These options may be changed/removed in the future without concern for bac Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/hypervideo`), or the root directory of the module if you are running directly from source-code (`/hypervideo_dl/__main__.py`). Plugins are currently not supported for the `pip` version -Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. +Plugins can be of ``s `extractor` or `postprocessor`. +- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. +- Extractor plugins take priority over builtin extractors. +- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. + -See [ytdlp_plugins](ytdlp_plugins) for example plugins. +Plugins are loaded from the namespace packages `hypervideo_dl_plugins.extractor` and `hypervideo_dl_plugins.postprocessor`. -Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code +In other words, the file structure on the disk looks something like: -If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability + hypervideo_dl_plugins/ + extractor/ + myplugin.py + postprocessor/ + myplugin.py + +yt-dlp looks for these `hypervideo_dl_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) +## Installing Plugins + +Plugins can be installed using various methods and locations. + +1. **Configuration directories**: + Plugin packages (containing a `hypervideo_dl_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): + * **User Plugins** + * `${XDG_CONFIG_HOME}/yt-dlp/plugins//hypervideo_dl_plugins/` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp-plugins//hypervideo_dl_plugins/` + * `${APPDATA}/yt-dlp/plugins//hypervideo_dl_plugins/` (recommended on Windows) + * `${APPDATA}/yt-dlp-plugins//hypervideo_dl_plugins/` + * `~/.yt-dlp/plugins//hypervideo_dl_plugins/` + * `~/yt-dlp-plugins//hypervideo_dl_plugins/` + * **System Plugins** + * `/etc/yt-dlp/plugins//hypervideo_dl_plugins/` + * `/etc/yt-dlp-plugins//hypervideo_dl_plugins/` +2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location (recommended for portable installations): + * Binary: where `/yt-dlp.exe`, `/yt-dlp-plugins//hypervideo_dl_plugins/` + * Source: where `/hypervideo_dl/__main__.py`, `/yt-dlp-plugins//hypervideo_dl_plugins/` + +3. **pip and other locations in `PYTHONPATH`** + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Note: plugin files between plugin packages installed with pip must have unique filenames. + * Any path in `PYTHONPATH` is searched in for the `hypervideo_dl_plugins` namespace folder. + * Note: This does not apply for Pyinstaller/py2exe builds. + + +`.zip`, `.egg` and `.whl` archives containing a `hypervideo_dl_plugins` namespace folder in their root are also supported as plugin packages. +* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `hypervideo_dl_plugins//myplugin.py` +Run yt-dlp with `--verbose` to check if the plugin has been loaded. + +## Developing Plugins + +See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide. + +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`). + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. + +If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability. + +See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. # EMBEDDING HYPERVIDEO @@ -1872,7 +1975,7 @@ with hypervideo_dl.YoutubeDL() as ydl: ```python import hypervideo_dl -URL = ['https://www.youtube.com/watch?v=BaW_jenozKc'] +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. @@ -1938,12 +2041,14 @@ While these options are redundant, they are still expected to be used due to the --reject-title REGEX --match-filter "title !~= (?i)REGEX" --min-views COUNT --match-filter "view_count >=? COUNT" --max-views COUNT --match-filter "view_count <=? COUNT" + --break-on-reject Use --break-match-filter --user-agent UA --add-header "User-Agent:UA" --referer URL --add-header "Referer:URL" --playlist-start NUMBER -I NUMBER: --playlist-end NUMBER -I :NUMBER --playlist-reverse -I ::-1 --no-playlist-reverse Default + --no-colors --color no_color #### Not recommended @@ -1967,6 +2072,10 @@ While these options still work, their use is not recommended since there are oth --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest) + --geo-bypass --xff "default" + --no-geo-bypass --xff "never" + --geo-bypass-country CODE --xff CODE + --geo-bypass-ip-block IP_BLOCK --xff IP_BLOCK #### Developer options diff --git a/completions/zsh/_hypervideo b/completions/zsh/_hypervideo index f31f234..b0068a9 100644 --- a/completions/zsh/_hypervideo +++ b/completions/zsh/_hypervideo @@ -21,7 +21,7 @@ __hypervideo_dl() { elif [[ ${prev} == "--recode-video" ]]; then _arguments '*: :(mp4 flv ogg webm mkv)' else - _arguments '*: :(--help --version --ignore-errors --no-abort-on-error --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --use-extractors --force-generic-extractor --default-search --ignore-config --no-config-locations --config-locations --flat-playlist --no-flat-playlist --live-from-start --no-live-from-start --wait-for-video --no-wait-for-video --mark-watched --no-mark-watched --no-colors --compat-options --alias --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --geo-verification-proxy --cn-verification-proxy --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filters --no-match-filter --no-playlist --yes-playlist --age-limit --download-archive --no-download-archive --max-downloads --break-on-existing --break-on-reject --break-per-input --no-break-per-input --skip-playlist-after-errors --include-ads --no-include-ads --concurrent-fragments --limit-rate --throttled-rate --retries --file-access-retries --fragment-retries --retry-sleep --skip-unavailable-fragments --abort-on-unavailable-fragment --keep-fragments --no-keep-fragments --buffer-size --resize-buffer --no-resize-buffer --http-chunk-size --test --playlist-reverse --no-playlist-reverse --playlist-random --lazy-playlist --no-lazy-playlist --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --no-hls-use-mpegts --download-sections --downloader --downloader-args --batch-file --no-batch-file --id --paths --output --output-na-placeholder --autonumber-size --autonumber-start --restrict-filenames --no-restrict-filenames --windows-filenames --no-windows-filenames --trim-filenames --no-overwrites --force-overwrites --no-force-overwrites --continue --no-continue --part --no-part --mtime --no-mtime --write-description --no-write-description --write-info-json --no-write-info-json --write-annotations --no-write-annotations --write-playlist-metafiles --no-write-playlist-metafiles --clean-info-json --no-clean-info-json --write-comments --no-write-comments --load-info-json --cookies --no-cookies --cookies-from-browser --no-cookies-from-browser --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --no-write-thumbnail --write-all-thumbnails --list-thumbnails --write-link --write-url-link --write-webloc-link --write-desktop-link --quiet --no-warnings --simulate --no-simulate --ignore-no-formats-error --no-ignore-no-formats-error --skip-download --print --print-to-file --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --force-write-archive --newline --no-progress --progress --console-title --progress-template --verbose --dump-pages --write-pages --load-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --legacy-server-connect --no-check-certificates --prefer-insecure --user-agent --referer --add-header --bidi-workaround --sleep-requests --sleep-interval --max-sleep-interval --sleep-subtitles --format --format-sort --format-sort-force --no-format-sort-force --video-multistreams --no-video-multistreams --audio-multistreams --no-audio-multistreams --all-formats --prefer-free-formats --no-prefer-free-formats --check-formats --check-all-formats --no-check-formats --list-formats --list-formats-as-table --list-formats-old --merge-output-format --allow-unplayable-formats --no-allow-unplayable-formats --write-subs --no-write-subs --write-auto-subs --no-write-auto-subs --all-subs --list-subs --sub-format --sub-langs --username --password --twofactor --netrc --netrc-location --video-password --ap-mso --ap-username --ap-password --ap-list-mso --client-certificate --client-certificate-key --client-certificate-password --extract-audio --audio-format --audio-quality --remux-video --recode-video --postprocessor-args --keep-video --no-keep-video --post-overwrites --no-post-overwrites --embed-subs --no-embed-subs --embed-thumbnail --no-embed-thumbnail --embed-metadata --no-embed-metadata --embed-chapters --no-embed-chapters --embed-info-json --no-embed-info-json --metadata-from-title --parse-metadata --replace-in-metadata --xattrs --concat-playlist --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --no-exec --exec-before-download --no-exec-before-download --convert-subs --convert-thumbnails --split-chapters --no-split-chapters --remove-chapters --no-remove-chapters --force-keyframes-at-cuts --no-force-keyframes-at-cuts --use-postprocessor --sponsorblock-mark --sponsorblock-remove --sponsorblock-chapter-title --no-sponsorblock --sponsorblock-api --sponskrub --no-sponskrub --sponskrub-cut --no-sponskrub-cut --sponskrub-force --no-sponskrub-force --sponskrub-location --sponskrub-args --extractor-retries --allow-dynamic-mpd --ignore-dynamic-mpd --hls-split-discontinuity --no-hls-split-discontinuity --extractor-args --youtube-include-dash-manifest --youtube-skip-dash-manifest --youtube-include-hls-manifest --youtube-skip-hls-manifest)' + _arguments '*: :(--help --version --ignore-errors --no-abort-on-error --abort-on-error --dump-user-agent --list-extractors --extractor-descriptions --use-extractors --force-generic-extractor --default-search --ignore-config --no-config-locations --config-locations --flat-playlist --no-flat-playlist --live-from-start --no-live-from-start --wait-for-video --no-wait-for-video --mark-watched --no-mark-watched --no-colors --color --compat-options --alias --proxy --socket-timeout --source-address --force-ipv4 --force-ipv6 --enable-file-urls --geo-verification-proxy --cn-verification-proxy --xff --geo-bypass --no-geo-bypass --geo-bypass-country --geo-bypass-ip-block --playlist-start --playlist-end --playlist-items --match-title --reject-title --min-filesize --max-filesize --date --datebefore --dateafter --min-views --max-views --match-filters --no-match-filters --break-match-filters --no-break-match-filters --no-playlist --yes-playlist --age-limit --download-archive --no-download-archive --max-downloads --break-on-existing --break-on-reject --break-per-input --no-break-per-input --skip-playlist-after-errors --include-ads --no-include-ads --concurrent-fragments --limit-rate --throttled-rate --retries --file-access-retries --fragment-retries --retry-sleep --skip-unavailable-fragments --abort-on-unavailable-fragments --keep-fragments --no-keep-fragments --buffer-size --resize-buffer --no-resize-buffer --http-chunk-size --test --playlist-reverse --no-playlist-reverse --playlist-random --lazy-playlist --no-lazy-playlist --xattr-set-filesize --hls-prefer-native --hls-prefer-ffmpeg --hls-use-mpegts --no-hls-use-mpegts --download-sections --downloader --downloader-args --batch-file --no-batch-file --id --paths --output --output-na-placeholder --autonumber-size --autonumber-start --restrict-filenames --no-restrict-filenames --windows-filenames --no-windows-filenames --trim-filenames --no-overwrites --force-overwrites --no-force-overwrites --continue --no-continue --part --no-part --mtime --no-mtime --write-description --no-write-description --write-info-json --no-write-info-json --write-annotations --no-write-annotations --write-playlist-metafiles --no-write-playlist-metafiles --clean-info-json --no-clean-info-json --write-comments --no-write-comments --load-info-json --cookies --no-cookies --cookies-from-browser --no-cookies-from-browser --cache-dir --no-cache-dir --rm-cache-dir --write-thumbnail --no-write-thumbnail --write-all-thumbnails --list-thumbnails --write-link --write-url-link --write-webloc-link --write-desktop-link --quiet --no-quiet --no-warnings --simulate --no-simulate --ignore-no-formats-error --no-ignore-no-formats-error --skip-download --print --print-to-file --get-url --get-title --get-id --get-thumbnail --get-description --get-duration --get-filename --get-format --dump-json --dump-single-json --print-json --force-write-archive --newline --no-progress --progress --console-title --progress-template --verbose --dump-pages --write-pages --load-pages --youtube-print-sig-code --print-traffic --call-home --no-call-home --encoding --legacy-server-connect --no-check-certificates --prefer-insecure --user-agent --referer --add-headers --bidi-workaround --sleep-requests --sleep-interval --max-sleep-interval --sleep-subtitles --format --format-sort --format-sort-force --no-format-sort-force --video-multistreams --no-video-multistreams --audio-multistreams --no-audio-multistreams --all-formats --prefer-free-formats --no-prefer-free-formats --check-formats --check-all-formats --no-check-formats --list-formats --list-formats-as-table --list-formats-old --merge-output-format --allow-unplayable-formats --no-allow-unplayable-formats --write-subs --no-write-subs --write-auto-subs --no-write-auto-subs --all-subs --list-subs --sub-format --sub-langs --username --password --twofactor --netrc --netrc-location --netrc-cmd --video-password --ap-mso --ap-username --ap-password --ap-list-mso --client-certificate --client-certificate-key --client-certificate-password --extract-audio --audio-format --audio-quality --remux-video --recode-video --postprocessor-args --keep-video --no-keep-video --post-overwrites --no-post-overwrites --embed-subs --no-embed-subs --embed-thumbnail --no-embed-thumbnail --embed-metadata --no-embed-metadata --embed-chapters --no-embed-chapters --embed-info-json --no-embed-info-json --metadata-from-title --parse-metadata --replace-in-metadata --xattrs --concat-playlist --fixup --prefer-avconv --prefer-ffmpeg --ffmpeg-location --exec --no-exec --exec-before-download --no-exec-before-download --convert-subs --convert-thumbnails --split-chapters --no-split-chapters --remove-chapters --no-remove-chapters --force-keyframes-at-cuts --no-force-keyframes-at-cuts --use-postprocessor --sponsorblock-mark --sponsorblock-remove --sponsorblock-chapter-title --no-sponsorblock --sponsorblock-api --sponskrub --no-sponskrub --sponskrub-cut --no-sponskrub-cut --sponskrub-force --no-sponskrub-force --sponskrub-location --sponskrub-args --extractor-retries --allow-dynamic-mpd --ignore-dynamic-mpd --hls-split-discontinuity --no-hls-split-discontinuity --extractor-args --youtube-include-dash-manifest --youtube-skip-dash-manifest --youtube-include-hls-manifest --youtube-skip-hls-manifest)' fi ;; esac diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 0000000..d03db3f --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1,73 @@ +[ + { + "action": "add", + "when": "29cb20bd563c02671b31dd840139e93dd37150a1", + "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" + }, + { + "action": "add", + "when": "5038f6d713303e0967d002216e7a88652401c22a", + "short": "[priority] **YouTube throttling fixes!**" + }, + { + "action": "remove", + "when": "2e023649ea4e11151545a34dc1360c114981a236" + }, + { + "action": "add", + "when": "01aba2519a0884ef17d5f85608dbd2a455577147", + "short": "[priority] YouTube: Improved throttling and signature fixes" + }, + { + "action": "change", + "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38", + "short": "[extractor/niconico:series] Fix extraction (#6898)", + "authors": ["sqrtNOT"] + }, + { + "action": "change", + "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2", + "short": "[extractor/youtube:music_search_url] Extract title (#7102)", + "authors": ["kangalio"] + }, + { + "action": "change", + "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb", + "short": "Add option `--color` (#6904)", + "authors": ["Grub4K"] + }, + { + "action": "change", + "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56", + "short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "1e75d97db21152acc764b30a688e516f04b8a142", + "short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such", + "authors": ["pukkandan"] + }, + { + "action": "change", + "when": "f2ff0f6f1914b82d4a51681a72cc0828115dcb4a", + "short": "[extractor/motherless] Add gallery support, fix groups (#7211)", + "authors": ["rexlambert22", "Ti4eeT4e"] + }, + { + "action": "change", + "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700", + "short": "[misc] Revert \"Add automatic duplicate issue detection\"", + "authors": ["pukkandan"] + }, + { + "action": "add", + "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729", + "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`" + }, + { + "action": "change", + "when": "b03fa7834579a01cc5fba48c0e73488a16683d48", + "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b" + } +] diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 0000000..9bd747b --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/cli_to_api.py b/devscripts/cli_to_api.py new file mode 100644 index 0000000..563fa9e --- /dev/null +++ b/devscripts/cli_to_api.py @@ -0,0 +1,48 @@ +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import hypervideo_dl +import hypervideo_dl.options + +create_parser = hypervideo_dl.options.create_parser + + +def parse_patched_options(opts): + patched_parser = create_parser() + patched_parser.defaults.update({ + 'ignoreerrors': False, + 'retries': 0, + 'fragment_retries': 0, + 'extract_flat': False, + 'concat_playlist': 'never', + }) + hypervideo_dl.options.create_parser = lambda: patched_parser + try: + return hypervideo_dl.parse_options(opts) + finally: + hypervideo_dl.options.create_parser = create_parser + + +default_opts = parse_patched_options([]).ydl_opts + + +def cli_to_api(opts, cli_defaults=False): + opts = (hypervideo_dl.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts + + diff = {k: v for k, v in opts.items() if default_opts[k] != v} + if 'postprocessors' in diff: + diff['postprocessors'] = [pp for pp in diff['postprocessors'] + if pp not in default_opts['postprocessors']] + return diff + + +if __name__ == '__main__': + from pprint import pprint + + print('\nThe arguments passed translate to:\n') + pprint(cli_to_api(sys.argv[1:])) + print('\nCombining these with the CLI defaults gives:\n') + pprint(cli_to_api(sys.argv[1:], True)) diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index c8815e0..6f52165 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -6,6 +6,7 @@ from ..utils import ( age_restricted, bug_reports_message, classproperty, + variadic, write_string, ) diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 0000000..1206fd9 --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,510 @@ +from __future__ import annotations + +# Allow direct execution +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import enum +import itertools +import json +import logging +import re +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +from devscripts.utils import read_file, run_process, write_file + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent +HASH_LENGTH = 7 + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + MISC = 'Misc.' + + @classmethod + @property + def ignorable_prefixes(cls): + return ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream') + + @classmethod + @lru_cache + def commit_lookup(cls): + return { + name: group + for group, names in { + cls.PRIORITY: {'priority'}, + cls.CORE: { + 'aes', + 'cache', + 'compat_utils', + 'compat', + 'cookies', + 'core', + 'dependencies', + 'formats', + 'jsinterp', + 'networking', + 'outtmpl', + 'plugins', + 'update', + 'upstream', + 'utils', + }, + cls.MISC: { + 'build', + 'cleanup', + 'devscripts', + 'docs', + 'misc', + 'test', + }, + cls.EXTRACTOR: {'extractor', 'ie'}, + cls.DOWNLOADER: {'downloader', 'fd'}, + cls.POSTPROCESSOR: {'postprocessor', 'pp'}, + }.items() + for name in names + } + + @classmethod + def get(cls, value): + result = cls.commit_lookup().get(value) + if result: + logger.debug(f'Mapped {value!r} => {result.name}') + return result + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:HASH_LENGTH]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +def unique(items): + return sorted({item.strip().lower(): item for item in items if item}.values()) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + ALWAYS_SHOWN = (CommitGroup.PRIORITY,) + + def __init__(self, groups, repo, collapsible=False): + self._groups = groups + self._repo = repo + self._collapsible = collapsible + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + first = True + for item in CommitGroup: + if self._collapsible and item not in self.ALWAYS_SHOWN and first: + first = False + yield '\n

Changelog

\n' + + group = groups[item] + if group: + yield self.format_module(item.value, group) + + if self._collapsible: + yield '\n
' + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for _, items in detail_groups: + items = list(items) + details = items[0].details + + if details == 'cleanup': + items = self._prepare_cleanup_misc_items(items) + + prefix = '-' + if details: + if len(items) == 1: + prefix = f'- **{details}**:' + else: + yield f'- **{details}**' + prefix = '\t-' + + sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{prefix} {self.format_single_change(entry)}' + continue + + entries = list(entries) + sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}' + if len(entries) == 1: + yield f'{sub_prefix}: {self.format_single_change(entries[0])}' + continue + + yield sub_prefix + for entry in entries: + yield f'\t{prefix} {self.format_single_change(entry)}' + + def _prepare_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + sorted_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + sorted_items.append(item) + + for commit_infos in cleanup_misc_items.values(): + sorted_items.append(CommitInfo( + 'cleanup', ('Miscellaneous',), ', '.join( + self._format_message_link(None, info.commit.hash).strip() + for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')), + [], Commit(None, '', commit_infos[0].commit.authors), [])) + + return sorted_items + + def format_single_change(self, info): + message = self._format_message_link(info.message, info.commit.hash) + if info.issues: + message = message.replace('\n', f' ({self._format_issues(info.issues)})\n', 1) + + if info.commit.authors: + message = message.replace('\n', f' by {self._format_authors(info.commit.authors)}\n', 1) + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = message.replace('\n', f' (With fixes in {fix_message})\n', 1) + + return message[:-1] + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:HASH_LENGTH] + if not hash: + return f'{message}\n' + return f'[{message}\n'.replace('\n', f']({self.repo_url}/commit/{hash})\n', 1) + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[(?P[^\]]+)\]\ )? + (?:(?P`?[^:`]+`?): )? + (?P.+?) + (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None): + self._start, self._end = start, end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _get_commits_and_fixes(self, default_author): + result = run_process( + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}' if self._start else self._end).stdout + + commits, reverts = {}, {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for i, commit_hash in enumerate(lines): + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip and (self._start or not i): + logger.debug(f'Skipped commit: {commit}') + continue + elif skip: + logger.debug(f'Reached Release commit, breaking: {commit}') + break + + revert_match = self.REVERT_RE.fullmatch(commit.short) + if revert_match: + reverts[revert_match.group(1)] = commit + continue + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, revert_commit in reverts.items(): + reverted = commits.pop(commitish, None) + if reverted: + logger.debug(f'{commit} fully reverted {reverted}') + else: + commits[revert_commit.hash] = revert_commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') + continue + + override_hash = override.get('hash') or when + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override.get('authors') or []) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + group_dict = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) + if upstream_re: + commit.short = f'[core/upstream] Merged with youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, sub_details_alt, message, issues = match.groups() + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if prefix: + groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(','))) + group = next(iter(filter(None, groups)), None) + details = ', '.join(unique(details)) + sub_details = list(itertools.chain.from_iterable(sub_details)) + else: + group = CommitGroup.CORE + details = None + sub_details = [] + + if sub_details_alt: + sub_details.append(sub_details_alt) + sub_details = tuple(unique(sub_details)) + + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + else: + group = CommitGroup.POSTPROCESSOR + logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}') + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + group_dict[group].append(commit_info) + + return group_dict + + @staticmethod + def details_from_prefix(prefix): + if not prefix: + return CommitGroup.CORE, None, () + + prefix, _, details = prefix.partition('/') + prefix = prefix.strip() + details = details.strip() + + group = CommitGroup.get(prefix.lower()) + if group is CommitGroup.PRIORITY: + prefix, _, details = details.partition('/') + + if not details and prefix and prefix not in CommitGroup.ignorable_prefixes: + logger.debug(f'Replaced details with {prefix!r}') + details = prefix or None + + if details == 'common': + details = None + + if details: + details, *sub_details = details.split(':') + else: + sub_details = [] + + return group, details, sub_details + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + for line in read_file(contributors_path).splitlines(): + author, _, _ = line.strip().partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='hypervideo/hypervideo', + help='the github repository to use for the operations (default: %(default)s)') + parser.add_argument( + '--collapsible', action='store_true', + help='make changelog collapsible (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange(None, args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + overrides = json.loads(read_file(args.override_path)) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo, args.collapsible)) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 69e1758..bc4b5ac 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -40,8 +40,12 @@ def main(): _ALL_CLASSES = get_all_ies() # Must be before import + import hypervideo_dl.plugins from hypervideo_dl.extractor.common import InfoExtractor, SearchInfoExtractor + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{hypervideo_dl.plugins.PACKAGE_NAME}.')] + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 6adfca0..525349f 100644 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -45,33 +45,43 @@ switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' PATCHES = ( - ( # Standardize update message + ( # Standardize `--update` message r'(?m)^( -U, --update\s+).+(\n \s.+)*$', r'\1Update this program to the latest version', ), - ( # Headings + ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', r'## \1' ), - ( # Do not split URLs + ( # Fixup `--date` formatting + rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', + (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' + f'E.g. "--date today-2weeks" downloads only{delim}' + 'videos uploaded on the same day two weeks ago'), + ), + ( # Do not split URLs rf'({delim[:-1]})? (?P

[\S\s]*?by\s*\s*\s*([^>]+?)\s*', + webpage, 'album artist', fatal=False) timestamp = unified_timestamp( current.get('publish_date') or tralbum.get('album_publish_date')) @@ -205,6 +246,7 @@ class BandcampIE(InfoExtractor): 'track_id': track_id, 'artist': artist, 'album': embed.get('album_title'), + 'album_artist': album_artist, 'formats': formats, } diff --git a/hypervideo_dl/extractor/bbc.py b/hypervideo_dl/extractor/bbc.py index 9d28e70..a55cdef 100644 --- a/hypervideo_dl/extractor/bbc.py +++ b/hypervideo_dl/extractor/bbc.py @@ -2,11 +2,11 @@ import functools import itertools import json import re -import urllib.error import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_HTTPError, compat_str, compat_urlparse +from ..compat import compat_str, compat_urlparse +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, OnDemandPagedList, @@ -277,7 +277,7 @@ class BBCCoUkIE(InfoExtractor): post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={'Referer': self._LOGIN_URL}) - if self._LOGIN_URL in urlh.geturl(): + if self._LOGIN_URL in urlh.url: error = clean_html(get_element_by_class('form-message', response)) if error: raise ExtractorError( @@ -388,8 +388,8 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) except ExtractorError as e: - if not (isinstance(e.exc_info[1], urllib.error.HTTPError) - and e.exc_info[1].code in (403, 404)): + if not (isinstance(e.exc_info[1], HTTPError) + and e.exc_info[1].status in (403, 404)): raise fmts = [] formats.extend(fmts) @@ -472,7 +472,7 @@ class BBCCoUkIE(InfoExtractor): return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: - if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): + if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404): raise # fallback to legacy playlist @@ -983,7 +983,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # Some playlist URL may fail with 500, at the same time # the other one may work fine (e.g. # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + if isinstance(e.cause, HTTPError) and e.cause.status == 500: continue raise if entry: diff --git a/hypervideo_dl/extractor/beatbump.py b/hypervideo_dl/extractor/beatbump.py new file mode 100644 index 0000000..0f40ebe --- /dev/null +++ b/hypervideo_dl/extractor/beatbump.py @@ -0,0 +1,101 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE, YoutubeTabIE + + +class BeatBumpVideoIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/listen\?id=(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs', + 'md5': '5ff3fff41d3935b9810a9731e485fe66', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'artist': 'Stephen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', + 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA', + 'upload_date': '20190312', + 'categories': ['Music'], + 'playable_in_embed': True, + 'duration': 169, + 'like_count': int, + 'alt_title': 'Voyeur Girl', + 'view_count': int, + 'track': 'Voyeur Girl', + 'uploader': 'Stephen - Topic', + 'title': 'Voyeur Girl', + 'channel_follower_count': int, + 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA', + 'age_limit': 0, + 'availability': 'public', + 'live_status': 'not_live', + 'album': 'it\'s too much love to know my dear', + 'channel': 'Stephen', + 'comment_count': int, + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'tags': 'count:11', + 'creator': 'Stephen', + 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_) + + +class BeatBumpPlaylistIE(InfoExtractor): + _VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE', + 'playlist_count': 50, + 'info_dict': { + 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', + 'availability': 'unlisted', + 'view_count': int, + 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', + 'description': '', + 'tags': [], + 'modified_date': '20221223', + } + }, { + 'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'channel_follower_count': int, + 'title': 'NoCopyrightSounds - Videos', + 'uploader': 'NoCopyrightSounds', + 'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a', + 'channel': 'NoCopyrightSounds', + 'tags': 'count:12', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + }, + }, { + 'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'playlist_mincount': 1, + 'params': {'flatplaylist': True}, + 'info_dict': { + 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', + 'view_count': int, + 'channel_url': 'https://www.youtube.com/@NoCopyrightSounds', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'title': 'NCS : All Releases 💿', + 'uploader': 'NoCopyrightSounds', + 'availability': 'public', + 'channel': 'NoCopyrightSounds', + 'tags': [], + 'modified_date': '20221225', + 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + } + }] + + def _real_extract(self, url): + id_ = self._match_id(url) + return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_) diff --git a/hypervideo_dl/extractor/bfmtv.py b/hypervideo_dl/extractor/bfmtv.py index d86d283..a7be0e6 100644 --- a/hypervideo_dl/extractor/bfmtv.py +++ b/hypervideo_dl/extractor/bfmtv.py @@ -5,7 +5,7 @@ from ..utils import extract_attributes class BFMTVBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/' + _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/' _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P\d{12})\.html' _VIDEO_BLOCK_REGEX = r'(]+class="video_block"[^>]*>)' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' @@ -31,6 +31,9 @@ class BFMTVIE(BFMTVBaseIE): 'uploader_id': '876450610001', 'upload_date': '20201002', 'timestamp': 1601629620, + 'duration': 44.757, + 'tags': ['bfmactu', 'politique'], + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876450610001/5041f4c1-bc48-4af8-a256-1b8300ad8ef0/cf2f9114-e8e2-4494-82b4-ab794ea4bc7d/1920x1080/match/image.jpg', }, }] @@ -81,6 +84,20 @@ class BFMTVArticleIE(BFMTVBaseIE): }, { 'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html', 'only_matching': True, + }, { + 'url': 'https://rmc.bfmtv.com/actualites/societe/transports/ce-n-est-plus-tout-rentable-le-bioethanol-e85-depasse-1eu-le-litre-des-automobilistes-regrettent_AV-202301100268.html', + 'info_dict': { + 'id': '6318445464112', + 'ext': 'mp4', + 'title': 'Le plein de bioéthanol fait de plus en plus mal à la pompe', + 'description': None, + 'uploader_id': '876630703001', + 'upload_date': '20230110', + 'timestamp': 1673341692, + 'duration': 109.269, + 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' + } }] def _real_extract(self, url): diff --git a/hypervideo_dl/extractor/bibeltv.py b/hypervideo_dl/extractor/bibeltv.py index fd20aad..34464da 100644 --- a/hypervideo_dl/extractor/bibeltv.py +++ b/hypervideo_dl/extractor/bibeltv.py @@ -1,27 +1,197 @@ +from functools import partial + from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + determine_ext, + format_field, + int_or_none, + js_to_json, + orderedSet, + parse_iso8601, + traverse_obj, + url_or_none, +) + + +class BibelTVBaseIE(InfoExtractor): + _GEO_COUNTRIES = ['AT', 'CH', 'DE'] + _GEO_BYPASS = False + + API_URL = 'https://www.bibeltv.de/mediathek/api' + AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm' + + def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False): + formats = [] + subtitles = {} + for media_url in traverse_obj(data, (..., 'src', {url_or_none})): + media_ext = determine_ext(media_url) + if media_ext == 'm3u8': + m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + media_url, crn_id, live=is_live) + formats.extend(m3u8_formats) + subtitles.update(m3u8_subs) + elif media_ext == 'mpd': + mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id) + formats.extend(mpd_formats) + subtitles.update(mpd_subs) + elif media_ext == 'mp4': + formats.append({'url': media_url}) + else: + self.report_warning(f'Unknown format {media_ext!r}') + + return formats, subtitles + + @staticmethod + def _extract_base_info(data): + return { + 'id': data['crn'], + **traverse_obj(data, { + 'title': 'title', + 'description': 'description', + 'duration': ('duration', {partial(int_or_none, scale=1000)}), + 'timestamp': ('schedulingStart', {parse_iso8601}), + 'season_number': 'seasonNumber', + 'episode_number': 'episodeNumber', + 'view_count': 'viewCount', + 'like_count': 'likeCount', + }), + 'thumbnails': orderedSet(traverse_obj(data, ('images', ..., { + 'url': ('url', {url_or_none}), + }))), + } + + def _extract_url_info(self, data): + return { + '_type': 'url', + 'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'), + **self._extract_base_info(data), + } + + def _extract_video_info(self, data): + crn_id = data['crn'] + if data.get('drm'): + self.report_drm(crn_id) + + json_data = self._download_json( + format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id, + headers={'Authorization': self.AUTH_TOKEN}, fatal=False, + errnote='No formats available') or {} + + formats, subtitles = self._extract_formats_and_subtitles( + traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id) + + return { + '_type': 'video', + **self._extract_base_info(data), + 'formats': formats, + 'subtitles': subtitles, + } + + +class BibelTVVideoIE(BibelTVBaseIE): + IE_DESC = 'BibelTV single video' + _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P\d+)[\w-]+' + IE_NAME = 'bibeltv:video' -class BibelTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P\d+)' _TESTS = [{ - 'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch', - 'md5': '252f908192d611de038b8504b08bf97f', + 'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege', + 'md5': 'ec1c07efe54353780512e8a4103b612e', 'info_dict': { - 'id': 'ref:329703', + 'id': '344436', 'ext': 'mp4', - 'title': 'Sprachkurs in Malaiisch', - 'description': 'md5:3e9f197d29ee164714e67351cf737dfe', - 'timestamp': 1608316701, - 'uploader_id': '5840105145001', - 'upload_date': '20201218', - } + 'title': 'Alte Wege', + 'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9', + 'timestamp': 1677877071, + 'duration': 150.0, + 'upload_date': '20230303', + 'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg', + 'episode': 'Episode 1', + 'episode_number': 1, + 'view_count': int, + 'like_count': int, + }, + 'params': { + 'format': '6', + }, + }] + + def _real_extract(self, url): + crn_id = self._match_id(url) + video_data = traverse_obj( + self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id), + ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict})) + if not video_data: + raise ExtractorError('Missing video data.') + + return self._extract_video_info(video_data) + + +class BibelTVSeriesIE(BibelTVBaseIE): + IE_DESC = 'BibelTV series playlist' + _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P\d+)[\w-]+' + IE_NAME = 'bibeltv:series' + + _TESTS = [{ + 'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag', + 'playlist_mincount': 400, + 'info_dict': { + 'id': '333485', + 'title': 'Ein Wunder für jeden Tag', + 'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.', + }, + }] + + def _real_extract(self, url): + crn_id = self._match_id(url) + webpage = self._download_webpage(url, crn_id) + nextjs_data = self._search_nextjs_data(webpage, crn_id) + series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict})) + if not series_data: + raise ExtractorError('Missing series data.') + + return self.playlist_result( + traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})), + crn_id, series_data.get('title'), clean_html(series_data.get('description'))) + + +class BibelTVLiveIE(BibelTVBaseIE): + IE_DESC = 'BibelTV live program' + _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P[\w-]+)' + IE_NAME = 'bibeltv:live' + + _TESTS = [{ + 'url': 'https://www.bibeltv.de/livestreams/bibeltv/', + 'info_dict': { + 'id': 'bibeltv', + 'ext': 'mp4', + 'title': 're:Bibel TV', + 'live_status': 'is_live', + 'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp', + }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374', + 'url': 'https://www.bibeltv.de/livestreams/impuls/', 'only_matching': True, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s' def _real_extract(self, url): - crn_id = self._match_id(url) - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew') + stream_id = self._match_id(url) + webpage = self._download_webpage(url, stream_id) + stream_data = self._search_json( + r'\\"video\\":', webpage, 'bibeltvData', stream_id, + transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"'))) + + formats, subtitles = self._extract_formats_and_subtitles( + traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True) + + return { + 'id': stream_id, + 'title': stream_data.get('title'), + 'thumbnail': stream_data.get('poster'), + 'is_live': True, + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/hypervideo_dl/extractor/bilibili.py b/hypervideo_dl/extractor/bilibili.py index bc04241..cb7ab2a 100644 --- a/hypervideo_dl/extractor/bilibili.py +++ b/hypervideo_dl/extractor/bilibili.py @@ -1,11 +1,14 @@ import base64 import functools +import hashlib import itertools import math -import urllib.error +import time import urllib.parse from .common import InfoExtractor, SearchInfoExtractor +from ..dependencies import Cryptodome +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, GeoRestrictedError, @@ -15,14 +18,20 @@ from ..utils import ( float_or_none, format_field, int_or_none, + join_nonempty, make_archive_id, + merge_dicts, mimetype2ext, parse_count, parse_qs, qualities, + smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, + try_call, + unified_timestamp, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -77,7 +86,7 @@ class BilibiliBaseIE(InfoExtractor): f'{line["content"]}\n\n') return srt_data - def _get_subtitles(self, video_id, initial_state, cid): + def _get_subtitles(self, video_id, aid, cid): subtitles = { 'danmaku': [{ 'ext': 'xml', @@ -85,7 +94,8 @@ class BilibiliBaseIE(InfoExtractor): }] } - for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []: + video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id) + for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)): subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) @@ -126,9 +136,20 @@ class BilibiliBaseIE(InfoExtractor): for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): yield from children + def _get_episodes_from_season(self, ss_id, url): + season_info = self._download_json( + 'https://api.bilibili.com/pgc/web/season/section', ss_id, + note='Downloading season info', query={'season_id': ss_id}, + headers={'Referer': url, **self.geo_verification_headers()}) + + for entry in traverse_obj(season_info, ( + 'result', 'main_section', 'episodes', + lambda _, v: url_or_none(v['share_url']) and v['id'])): + yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}') + class BiliBiliIE(BilibiliBaseIE): - _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P[^/?#&]+)' + _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.bilibili.com/video/BV13x41117TL', @@ -276,19 +297,60 @@ class BiliBiliIE(BilibiliBaseIE): 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'params': {'skip_download': True}, + }, { + 'note': 'video redirects to festival page', + 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h', + 'info_dict': { + 'id': 'BV1wP4y1P72h', + 'ext': 'mp4', + 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】', + 'timestamp': 1643947497, + 'upload_date': '20220204', + 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6', + 'uploader': '叨叨冯聊音乐', + 'duration': 246.719, + 'uploader_id': '528182630', + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + }, + 'params': {'skip_download': True}, + }, { + 'note': 'newer festival video', + 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f', + 'info_dict': { + 'id': 'BV1ay4y1d77f', + 'ext': 'mp4', + 'title': '【崩坏3新春剧场】为特别的你送上祝福!', + 'timestamp': 1674273600, + 'upload_date': '20230121', + 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8', + 'uploader': '果蝇轰', + 'duration': 1111.722, + 'uploader_id': '8469526', + 'view_count': int, + 'like_count': int, + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + }, + 'params': {'skip_download': True}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) - play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] - video_data = initial_state['videoData'] + is_festival = 'videoData' not in initial_state + if is_festival: + video_data = initial_state['videoInfo'] + else: + play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] + video_data = initial_state['videoData'] + video_id, title = video_data['bvid'], video_data.get('title') # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself. - page_list_json = traverse_obj( + page_list_json = not is_festival and traverse_obj( self._download_json( 'https://api.bilibili.com/x/player/pagelist', video_id, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, @@ -303,106 +365,143 @@ class BiliBiliIE(BilibiliBaseIE): getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') if is_anthology: - title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}' + part_id = part_id or 1 + title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}' aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') + festival_info = {} + if is_festival: + play_info = self._download_json( + 'https://api.bilibili.com/x/player/playurl', video_id, + query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, + note='Extracting festival video formats')['data'] + + festival_info = traverse_obj(initial_state, { + 'uploader': ('videoInfo', 'upName'), + 'uploader_id': ('videoInfo', 'upMid', {str_or_none}), + 'like_count': ('videoStatus', 'like', {int_or_none}), + 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'), + }, get_all=False) + return { + **traverse_obj(initial_state, { + 'uploader': ('upData', 'name'), + 'uploader_id': ('upData', 'mid', {str_or_none}), + 'like_count': ('videoData', 'stat', 'like', {int_or_none}), + 'tags': ('tags', ..., 'tag_name'), + 'thumbnail': ('videoData', 'pic', {url_or_none}), + }), + **festival_info, + **traverse_obj(video_data, { + 'description': 'desc', + 'timestamp': ('pubdate', {int_or_none}), + 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}), + 'comment_count': ('stat', 'reply', {int_or_none}), + }, get_all=False), 'id': f'{video_id}{format_field(part_id, None, "_p%d")}', 'formats': self.extract_formats(play_info), '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None, 'title': title, - 'description': traverse_obj(initial_state, ('videoData', 'desc')), - 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')), - 'uploader': traverse_obj(initial_state, ('upData', 'name')), - 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')), - 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')), - 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')), - 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')), - 'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')), - 'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')), 'duration': float_or_none(play_info.get('timelength'), scale=1000), 'chapters': self._get_chapters(aid, cid), - 'subtitles': self.extract_subtitles(video_id, initial_state, cid), + 'subtitles': self.extract_subtitles(video_id, aid, cid), '__post_extractor': self.extract_comments(aid), 'http_headers': {'Referer': url}, } class BiliBiliBangumiIE(BilibiliBaseIE): - _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P(?:ss|ep)\d+)' + _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?Pep\d+)' _TESTS = [{ - 'url': 'https://www.bilibili.com/bangumi/play/ss897', + 'url': 'https://www.bilibili.com/bangumi/play/ep267851', 'info_dict': { - 'id': 'ss897', + 'id': '267851', 'ext': 'mp4', - 'series': '神的记事本', - 'season': '神的记事本', - 'season_id': 897, + 'series': '鬼灭之刃', + 'series_id': '4358', + 'season': '鬼灭之刃', + 'season_id': '26801', 'season_number': 1, - 'episode': '你与旅行包', - 'episode_number': 2, - 'title': '神的记事本:第2话 你与旅行包', - 'duration': 1428.487, - 'timestamp': 1310809380, - 'upload_date': '20110716', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', + 'episode': '残酷', + 'episode_id': '267851', + 'episode_number': 1, + 'title': '1 残酷', + 'duration': 1425.256, + 'timestamp': 1554566400, + 'upload_date': '20190406', + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' }, - }, { - 'url': 'https://www.bilibili.com/bangumi/play/ep508406', - 'only_matching': True, + 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.' }] def _real_extract(self, url): video_id = self._match_id(url) + episode_id = video_id[2:] webpage = self._download_webpage(url, video_id) if '您所在的地区无法观看本片' in webpage: raise GeoRestrictedError('This video is restricted') - elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage - or '正在观看预览,大会员免费看全片' in webpage): + elif '正在观看预览,大会员免费看全片' in webpage: self.raise_login_required('This video is for premium members only') - play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] + headers = {'Referer': url, **self.geo_verification_headers()} + play_info = self._download_json( + 'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id, + 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, + headers=headers) + premium_only = play_info.get('code') == -10403 + play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {} + formats = self.extract_formats(play_info) - if (not formats and '成为大会员抢先看' in webpage - and play_info.get('durl') and not play_info.get('dash')): + if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage): self.raise_login_required('This video is for premium members only') - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) + bangumi_info = self._download_json( + 'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details', + query={'ep_id': episode_id}, headers=headers)['result'] - season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) + episode_number, episode_info = next(( + (idx, ep) for idx, ep in enumerate(traverse_obj( + bangumi_info, ('episodes', ..., {dict})), 1) + if str_or_none(ep.get('id')) == episode_id), (1, {})) + + season_id = bangumi_info.get('season_id') season_number = season_id and next(( idx + 1 for idx, e in enumerate( - traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) + traverse_obj(bangumi_info, ('seasons', ...))) if e.get('season_id') == season_id ), None) + aid = episode_info.get('aid') + return { 'id': video_id, 'formats': formats, - 'title': traverse_obj(initial_state, 'h1Title'), - 'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), - 'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), - 'series': traverse_obj(initial_state, ('mediaInfo', 'series')), - 'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), - 'season_id': season_id, + **traverse_obj(bangumi_info, { + 'series': ('series', 'series_title', {str}), + 'series_id': ('series', 'series_id', {str_or_none}), + 'thumbnail': ('square_cover', {url_or_none}), + }), + 'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info), + 'episode': episode_info.get('long_title'), + 'episode_id': episode_id, + 'episode_number': int_or_none(episode_info.get('title')) or episode_number, + 'season_id': str_or_none(season_id), 'season_number': season_number, - 'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), - 'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')), + 'timestamp': int_or_none(episode_info.get('pub_time')), 'duration': float_or_none(play_info.get('timelength'), scale=1000), - 'subtitles': self.extract_subtitles( - video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), - '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), - 'http_headers': {'Referer': url, **self.geo_verification_headers()}, + 'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')), + '__post_extractor': self.extract_comments(aid), + 'http_headers': headers, } -class BiliBiliBangumiMediaIE(InfoExtractor): +class BiliBiliBangumiMediaIE(BilibiliBaseIE): _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P\d+)' _TESTS = [{ 'url': 'https://www.bilibili.com/bangumi/media/md24097891', @@ -415,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor): def _real_extract(self, url): media_id = self._match_id(url) webpage = self._download_webpage(url, media_id) + ss_id = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id'] + + return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id) + + +class BiliBiliBangumiSeasonIE(BilibiliBaseIE): + _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P\d+)' + _TESTS = [{ + 'url': 'https://www.bilibili.com/bangumi/play/ss26801', + 'info_dict': { + 'id': '26801' + }, + 'playlist_mincount': 26 + }] - initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) - episode_list = self._download_json( - 'https://api.bilibili.com/pgc/web/season/section', media_id, - query={'season_id': initial_state['mediaInfo']['season_id']}, - note='Downloading season info')['result']['main_section']['episodes'] + def _real_extract(self, url): + ss_id = self._match_id(url) - return self.playlist_result(( - self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) - for entry in episode_list), media_id) + return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id) class BilibiliSpaceBaseIE(InfoExtractor): @@ -447,21 +556,65 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): 'id': '3985676', }, 'playlist_mincount': 178, + }, { + 'url': 'https://space.bilibili.com/313580179/video', + 'info_dict': { + 'id': '313580179', + }, + 'playlist_mincount': 92, }] + def _extract_signature(self, playlist_id): + session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False) + + key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0] + img_key = traverse_obj( + session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100' + sub_key = traverse_obj( + session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6' + + session_key = img_key + sub_key + + signature_values = [] + for position in ( + 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, + 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, + 57, 62, 11, 36, 20, 34, 44, 52 + ): + char_at_position = try_call(lambda: session_key[position]) + if char_at_position: + signature_values.append(char_at_position) + + return ''.join(signature_values)[:32] + def _real_extract(self, url): playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') if not is_video_url: self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' 'To download audios, add a "/audio" to the URL') + signature = self._extract_signature(playlist_id) + def fetch_page(page_idx): + query = { + 'keyword': '', + 'mid': playlist_id, + 'order': 'pubdate', + 'order_avoided': 'true', + 'platform': 'web', + 'pn': page_idx + 1, + 'ps': 30, + 'tid': 0, + 'web_location': 1550101, + 'wts': int(time.time()), + } + query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest() + try: - response = self._download_json('https://api.bilibili.com/x/space/arc/search', - playlist_id, note=f'Downloading page {page_idx}', - query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'}) + response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search', + playlist_id, note=f'Downloading page {page_idx}', query=query) except ExtractorError as e: - if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412: + if isinstance(e.cause, HTTPError) and e.cause.status == 412: raise ExtractorError( 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) raise @@ -489,9 +642,9 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE): _VALID_URL = r'https?://space\.bilibili\.com/(?P\d+)/audio' _TESTS = [{ - 'url': 'https://space.bilibili.com/3985676/audio', + 'url': 'https://space.bilibili.com/313580179/audio', 'info_dict': { - 'id': '3985676', + 'id': '313580179', }, 'playlist_mincount': 1, }] @@ -880,35 +1033,24 @@ class BiliIntlBaseIE(InfoExtractor): return formats - def _extract_video_info(self, video_data, *, ep_id=None, aid=None): + def _parse_video_metadata(self, video_data): return { - 'id': ep_id or aid, 'title': video_data.get('title_display') or video_data.get('title'), 'thumbnail': video_data.get('cover'), 'episode_number': int_or_none(self._search_regex( r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)), - 'formats': self._get_formats(ep_id=ep_id, aid=aid), - 'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid), - 'extractor_key': BiliIntlIE.ie_key(), } def _perform_login(self, username, password): - try: - from Cryptodome.PublicKey import RSA - from Cryptodome.Cipher import PKCS1_v1_5 - except ImportError: - try: - from Crypto.PublicKey import RSA - from Crypto.Cipher import PKCS1_v1_5 - except ImportError: - raise ExtractorError('pycryptodomex not found. Please install', expected=True) + if not Cryptodome.RSA: + raise ExtractorError('pycryptodomex not found. Please install', expected=True) key_data = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None, note='Downloading login key', errnote='Unable to download login key')['data'] - public_key = RSA.importKey(key_data['key']) - password_hash = PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + public_key = Cryptodome.RSA.importKey(key_data['key']) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) login_post = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'username': username, @@ -935,6 +1077,23 @@ class BiliIntlIE(BiliIntlBaseIE): 'title': 'E2 - The First Night', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 2, + 'upload_date': '20201009', + 'episode': 'Episode 2', + 'timestamp': 1602259500, + 'description': 'md5:297b5a17155eb645e14a14b385ab547e', + 'chapters': [{ + 'start_time': 0, + 'end_time': 76.242, + 'title': '' + }, { + 'start_time': 76.242, + 'end_time': 161.161, + 'title': 'Intro' + }, { + 'start_time': 1325.742, + 'end_time': 1403.903, + 'title': 'Outro' + }], } }, { # Non-Bstation page @@ -945,6 +1104,23 @@ class BiliIntlIE(BiliIntlBaseIE): 'title': 'E3 - Who?', 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 3, + 'description': 'md5:e1a775e71a35c43f141484715470ad09', + 'episode': 'Episode 3', + 'upload_date': '20211219', + 'timestamp': 1639928700, + 'chapters': [{ + 'start_time': 0, + 'end_time': 88.0, + 'title': '' + }, { + 'start_time': 88.0, + 'end_time': 156.0, + 'title': 'Intro' + }, { + 'start_time': 1173.0, + 'end_time': 1259.535, + 'title': 'Outro' + }], } }, { # Subtitle with empty content @@ -957,6 +1133,78 @@ class BiliIntlIE(BiliIntlBaseIE): 'episode_number': 140, }, 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + }, { + 'url': 'https://www.bilibili.tv/en/video/2041863208', + 'info_dict': { + 'id': '2041863208', + 'ext': 'mp4', + 'timestamp': 1670874843, + 'description': 'Scheduled for April 2023.\nStudio: ufotable', + 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$', + 'upload_date': '20221212', + 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation', + }, + }, { + # episode comment extraction + 'url': 'https://www.bilibili.tv/en/play/34580/340317', + 'info_dict': { + 'id': '340317', + 'ext': 'mp4', + 'timestamp': 1604057820, + 'upload_date': '20201030', + 'episode_number': 5, + 'title': 'E5 - My Own Steel', + 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2', + 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$', + 'episode': 'Episode 5', + 'comment_count': int, + 'chapters': [{ + 'start_time': 0, + 'end_time': 61.0, + 'title': '' + }, { + 'start_time': 61.0, + 'end_time': 134.0, + 'title': 'Intro' + }, { + 'start_time': 1290.0, + 'end_time': 1379.0, + 'title': 'Outro' + }], + }, + 'params': { + 'getcomments': True + } + }, { + # user generated content comment extraction + 'url': 'https://www.bilibili.tv/en/video/2045730385', + 'info_dict': { + 'id': '2045730385', + 'ext': 'mp4', + 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a', + 'timestamp': 1667891924, + 'upload_date': '20221108', + 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation', + 'comment_count': int, + 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg', + }, + 'params': { + 'getcomments': True + } + }, { + # episode id without intro and outro + 'url': 'https://www.bilibili.tv/en/play/1048837/11246489', + 'info_dict': { + 'id': '11246489', + 'ext': 'mp4', + 'title': 'E1 - Operation \'Strix\' ', + 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17', + 'timestamp': 1649516400, + 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png', + 'episode': 'Episode 1', + 'episode_number': 1, + 'upload_date': '20220409', + }, }, { 'url': 'https://www.biliintl.com/en/play/34613/341736', 'only_matching': True, @@ -974,42 +1222,156 @@ class BiliIntlIE(BiliIntlBaseIE): 'only_matching': True, }] - def _real_extract(self, url): - season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') - video_id = ep_id or aid + def _make_url(video_id, series_id=None): + if series_id: + return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}' + return f'https://www.bilibili.tv/en/video/{video_id}' + + def _extract_video_metadata(self, url, video_id, season_id): + url, smuggled_data = unsmuggle_url(url, {}) + if smuggled_data.get('title'): + return smuggled_data + webpage = self._download_webpage(url, video_id) # Bstation layout initial_data = ( self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={}) or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None)) video_data = traverse_obj( - initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) + initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {} if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - video_data = traverse_obj(season_json, - ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), - expected_type=dict, get_all=False) - return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid) + video_data = traverse_obj(season_json, ( + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + ), expected_type=dict, get_all=False) + + # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found + return merge_dicts( + self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), { + 'title': self._html_search_meta('og:title', webpage), + 'description': self._html_search_meta('og:description', webpage) + }) + + def _get_comments_reply(self, root_id, next_id=0, display_id=None): + comment_api_raw_data = self._download_json( + 'https://api.bilibili.tv/reply/web/detail', display_id, + note=f'Downloading reply comment of {root_id} - {next_id}', + query={ + 'platform': 'web', + 'ps': 20, # comment's reply per page (default: 3) + 'root': root_id, + 'next': next_id, + }) + + for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)): + yield { + 'author': traverse_obj(replies, ('member', 'name')), + 'author_id': traverse_obj(replies, ('member', 'mid')), + 'author_thumbnail': traverse_obj(replies, ('member', 'face')), + 'text': traverse_obj(replies, ('content', 'message')), + 'id': replies.get('rpid'), + 'like_count': int_or_none(replies.get('like_count')), + 'parent': replies.get('parent'), + 'timestamp': unified_timestamp(replies.get('ctime_text')) + } + + if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): + yield from self._get_comments_reply( + root_id, comment_api_raw_data['data']['cursor']['next'], display_id) + + def _get_comments(self, video_id, ep_id): + for i in itertools.count(0): + comment_api_raw_data = self._download_json( + 'https://api.bilibili.tv/reply/web/root', video_id, + note=f'Downloading comment page {i + 1}', + query={ + 'platform': 'web', + 'pn': i, # page number + 'ps': 20, # comment per page (default: 20) + 'oid': video_id, + 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content + 'sort_type': 1, # 1: best, 2: recent + }) + + for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)): + yield { + 'author': traverse_obj(replies, ('member', 'name')), + 'author_id': traverse_obj(replies, ('member', 'mid')), + 'author_thumbnail': traverse_obj(replies, ('member', 'face')), + 'text': traverse_obj(replies, ('content', 'message')), + 'id': replies.get('rpid'), + 'like_count': int_or_none(replies.get('like_count')), + 'timestamp': unified_timestamp(replies.get('ctime_text')), + 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))), + } + if replies.get('count'): + yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id) + + if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): + break + + def _real_extract(self, url): + season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') + video_id = ep_id or aid + chapters = None + + if ep_id: + intro_ending_json = self._call_api( + f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web', + video_id, fatal=False) or {} + if intro_ending_json.get('skip'): + # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js + # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js + chapters = [{ + 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000), + 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000), + 'title': 'Intro' + }, { + 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000), + 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000), + 'title': 'Outro' + }] + + return { + 'id': video_id, + **self._extract_video_metadata(url, video_id, season_id), + 'formats': self._get_formats(ep_id=ep_id, aid=aid), + 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid), + 'chapters': chapters, + '__post_extractor': self.extract_comments(video_id, ep_id) + } class BiliIntlSeriesIE(BiliIntlBaseIE): - _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P\d+)/?(?:[?#]|$)' + IE_NAME = 'biliIntl:series' + _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P\d+)/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.tv/en/play/34613', 'playlist_mincount': 15, 'info_dict': { 'id': '34613', - 'title': 'Fly Me to the Moon', - 'description': 'md5:a861ee1c4dc0acfad85f557cc42ac627', - 'categories': ['Romance', 'Comedy', 'Slice of life'], + 'title': 'TONIKAWA: Over the Moon For You', + 'description': 'md5:297b5a17155eb645e14a14b385ab547e', + 'categories': ['Slice of life', 'Comedy', 'Romance'], 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'view_count': int, }, 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.bilibili.tv/en/media/1048837', + 'info_dict': { + 'id': '1048837', + 'title': 'SPY×FAMILY', + 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17', + 'categories': ['Adventure', 'Action', 'Comedy'], + 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$', + 'view_count': int, + }, + 'playlist_mincount': 25, }, { 'url': 'https://www.biliintl.com/en/play/34613', 'only_matching': True, @@ -1020,9 +1382,12 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): def _entries(self, series_id): series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id) - for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]): - episode_id = str(episode.get('episode_id')) - yield self._extract_video_info(episode, ep_id=episode_id) + for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict): + episode_id = str(episode['episode_id']) + yield self.url_result(smuggle_url( + BiliIntlIE._make_url(episode_id, series_id), + self._parse_video_metadata(episode) + ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) @@ -1034,7 +1399,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): class BiliLiveIE(InfoExtractor): - _VALID_URL = r'https?://live.bilibili.com/(?P\d+)' + _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P\d+)' _TESTS = [{ 'url': 'https://live.bilibili.com/196', @@ -1050,6 +1415,9 @@ class BiliLiveIE(InfoExtractor): }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', 'only_matching': True + }, { + 'url': 'https://live.bilibili.com/blanc/196', + 'only_matching': True }] _FORMATS = { @@ -1111,6 +1479,7 @@ class BiliLiveIE(InfoExtractor): 'thumbnail': room_data.get('user_cover'), 'timestamp': stream_data.get('live_time'), 'formats': formats, + 'is_live': True, 'http_headers': { 'Referer': url, }, diff --git a/hypervideo_dl/extractor/bitchute.py b/hypervideo_dl/extractor/bitchute.py index 10e7b0b..0805b8b 100644 --- a/hypervideo_dl/extractor/bitchute.py +++ b/hypervideo_dl/extractor/bitchute.py @@ -2,9 +2,9 @@ import functools import re from .common import InfoExtractor +from ..networking import HEADRequest from ..utils import ( ExtractorError, - HEADRequest, OnDemandPagedList, clean_html, get_element_by_class, @@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor): def _check_format(self, video_url, video_id): urls = orderedSet( re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url) - for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153')) + for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128', + 'seed132', 'seed150', 'seed151', 'seed152', 'seed153', + 'seed167', 'seed171', 'seed177', 'seed305', 'seed307', + 'seedp29xb', 'zb10-7gsop1v78')) for url in urls: try: response = self._request_webpage( diff --git a/hypervideo_dl/extractor/blerp.py b/hypervideo_dl/extractor/blerp.py new file mode 100644 index 0000000..4631ad2 --- /dev/null +++ b/hypervideo_dl/extractor/blerp.py @@ -0,0 +1,167 @@ +import json + +from .common import InfoExtractor +from ..utils import strip_or_none, traverse_obj + + +class BlerpIE(InfoExtractor): + IE_NAME = 'blerp' + _VALID_URL = r'https?://(?:www\.)?blerp\.com/soundbites/(?P[0-9a-zA-Z]+)' + _TESTS = [{ + 'url': 'https://blerp.com/soundbites/6320fe8745636cb4dd677a5a', + 'info_dict': { + 'id': '6320fe8745636cb4dd677a5a', + 'title': 'Samsung Galaxy S8 Over the Horizon Ringtone 2016', + 'uploader': 'luminousaj', + 'uploader_id': '5fb81e51aa66ae000c395478', + 'ext': 'mp3', + 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], + } + }, { + 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', + 'info_dict': { + 'id': '5bc94ef4796001000498429f', + 'title': 'Yee', + 'uploader': '179617322678353920', + 'uploader_id': '5ba99cf71386730004552c42', + 'ext': 'mp3', + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] + } + }] + + _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_QUERY = ( + '''query webBitePageGetBite($_id: MongoID!) { + web { + biteById(_id: $_id) { + ...bitePageFrag + __typename + } + __typename + } + } + + fragment bitePageFrag on Bite { + _id + title + userKeywords + keywords + color + visibility + isPremium + owned + price + extraReview + isAudioExists + image { + filename + original { + url + __typename + } + __typename + } + userReactions { + _id + reactions + createdAt + __typename + } + topReactions + totalSaveCount + saved + blerpLibraryType + license + licenseMetaData + playCount + totalShareCount + totalFavoriteCount + totalAddedToBoardCount + userCategory + userAudioQuality + audioCreationState + transcription + userTranscription + description + createdAt + updatedAt + author + listingType + ownerObject { + _id + username + profileImage { + filename + original { + url + __typename + } + __typename + } + __typename + } + transcription + favorited + visibility + isCurated + sourceUrl + audienceRating + strictAudienceRating + ownerId + reportObject { + reportedContentStatus + __typename + } + giphy { + mp4 + gif + __typename + } + audio { + filename + original { + url + __typename + } + mp3 { + url + __typename + } + __typename + } + __typename + } + + ''') + + def _real_extract(self, url): + audio_id = self._match_id(url) + + data = { + 'operationName': self._GRAPHQL_OPERATIONNAME, + 'query': self._GRAPHQL_QUERY, + 'variables': { + '_id': audio_id + } + } + + headers = { + 'Content-Type': 'application/json' + } + + json_result = self._download_json('https://api.blerp.com/graphql', + audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + + bite_json = json_result['data']['web']['biteById'] + + info_dict = { + 'id': bite_json['_id'], + 'url': bite_json['audio']['mp3']['url'], + 'title': bite_json['title'], + 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), + 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), + 'ext': 'mp3', + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + } + + return info_dict diff --git a/hypervideo_dl/extractor/boxcast.py b/hypervideo_dl/extractor/boxcast.py new file mode 100644 index 0000000..51f9eb7 --- /dev/null +++ b/hypervideo_dl/extractor/boxcast.py @@ -0,0 +1,102 @@ +from .common import InfoExtractor +from ..utils import ( + js_to_json, + traverse_obj, + unified_timestamp +) + + +class BoxCastVideoIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://boxcast\.tv/(?: + view-embed/| + channel/\w+\?(?:[^#]+&)?b=| + video-portal/(?:\w+/){2} + )(?P[\w-]+)''' + _EMBED_REGEX = [r']+src=["\'](?Phttps?://boxcast\.tv/view-embed/[\w-]+)'] + _TESTS = [{ + 'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx', + 'info_dict': { + 'id': 'da1eqqgkacngd5djlqld', + 'ext': 'mp4', + 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$', + 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium', + 'release_timestamp': 1670686812, + 'release_date': '20221210', + 'uploader_id': 're8w0v8hohhvpqtbskpe', + 'uploader': 'Children\'s Health Defense', + } + }, { + 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad', + 'info_dict': { + 'id': 'otbpltj2kzkveo2qz3ad', + 'ext': 'mp4', + 'uploader_id': 'vctwevwntun3o0ikq7af', + 'uploader': 'Legacy Christian Church', + 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools', + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg' + } + }, { + 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev', + 'info_dict': { + 'id': 'ssihlw5gvfij2by8tkev', + 'ext': 'mp4', + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$', + 'release_date': '20230101', + 'uploader_id': 'ds25vaazhlu4ygcvffid', + 'release_timestamp': 1672543201, + 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland', + 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340', + 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022', + } + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://childrenshealthdefense.eu/live-stream/', + 'info_dict': { + 'id': 'da1eqqgkacngd5djlqld', + 'ext': 'mp4', + 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$', + 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium', + 'release_timestamp': 1670686812, + 'release_date': '20221210', + 'uploader_id': 're8w0v8hohhvpqtbskpe', + 'uploader': 'Children\'s Health Defense', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + webpage_json_data = self._search_json( + r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id, + transform_source=js_to_json, default={}) + + # Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api + broadcast_json_data = ( + traverse_obj(webpage_json_data, ('broadcast', 'data')) + or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id)) + view_json_data = ( + traverse_obj(webpage_json_data, ('view', 'data')) + or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view', + display_id, fatal=False) or {}) + + formats, subtitles = [], {} + if view_json_data.get('status') == 'recorded': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + view_json_data['playlist'], display_id) + + return { + 'id': str(broadcast_json_data['id']), + 'title': (broadcast_json_data.get('name') + or self._html_search_meta(['og:title', 'twitter:title'], webpage)), + 'description': (broadcast_json_data.get('description') + or self._html_search_meta(['og:description', 'twitter:description'], webpage) + or None), + 'thumbnail': (broadcast_json_data.get('preview') + or self._html_search_meta(['og:image', 'twitter:image'], webpage)), + 'formats': formats, + 'subtitles': subtitles, + 'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')), + 'uploader': broadcast_json_data.get('account_name'), + 'uploader_id': broadcast_json_data.get('account_id'), + } diff --git a/hypervideo_dl/extractor/brainpop.py b/hypervideo_dl/extractor/brainpop.py new file mode 100644 index 0000000..1200437 --- /dev/null +++ b/hypervideo_dl/extractor/brainpop.py @@ -0,0 +1,318 @@ +import json +import re + +from .common import InfoExtractor +from ..utils import ( + classproperty, + int_or_none, + traverse_obj, + urljoin +) + + +class BrainPOPBaseIE(InfoExtractor): + _NETRC_MACHINE = 'brainpop' + _ORIGIN = '' # So that _VALID_URL doesn't crash + _LOGIN_ERRORS = { + 1502: 'The username and password you entered did not match.', # LOGIN_FAILED + 1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE + 1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED + 1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED + 1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE + 1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED + 1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP + 1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED + 1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE + 1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS + 1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD + 1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED + } + + @classproperty + def _VALID_URL(cls): + root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?') + return rf'{root}/(?P[^/]+/[^/]+/(?P[^/?#&]+))' + + def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}): + formats = [] + formats = self._extract_m3u8_formats( + f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}', + display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False) + formats.append({ + 'format_id': format_id, + 'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}', + }) + for f in formats: + f.update(extra_fields) + return formats + + def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}): + formats = [] + additional_key_formats = { + '%s': {}, + 'ad_%s': { + 'format_note': 'Audio description', + 'source_preference': -2 + } + } + for additional_key_format, additional_key_fields in additional_key_formats.items(): + for key_quality, key_index in enumerate(('high', 'low')): + full_key_index = additional_key_format % (key_format % key_index) + if data.get(full_key_index): + formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { + 'quality': -1 - key_quality, + **additional_key_fields, + **extra_fields + })) + return formats + + def _perform_login(self, username, password): + login_res = self._download_json( + 'https://api.brainpop.com/api/login', None, + data=json.dumps({'username': username, 'password': password}).encode(), + headers={ + 'Content-Type': 'application/json', + 'Referer': self._ORIGIN + }, note='Logging in', errnote='Unable to log in', expected_status=400) + status_code = int_or_none(login_res['status_code']) + if status_code != 1505: + self.report_warning( + f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}' + or f'Got status code {status_code}') + + +class BrainPOPIE(BrainPOPBaseIE): + _ORIGIN = 'https://www.brainpop.com' + _VIDEO_URL = 'https://svideos.brainpop.com' + _HLS_URL = 'https://hls.brainpop.com' + _CDN_URL = 'https://cdn.brainpop.com' + _TESTS = [{ + 'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null', + 'md5': '3ead374233ae74c7f1b0029a01c972f0', + 'info_dict': { + 'id': '1f3259fa457292b4', + 'ext': 'mp4', + 'title': 'Martin Luther King, Jr.', + 'display_id': 'martinlutherkingjr', + 'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349', + }, + }, { + 'url': 'https://www.brainpop.com/science/space/bigbang/', + 'md5': '9a1ff0e77444dd9e437354eb669c87ec', + 'info_dict': { + 'id': 'acae52cd48c99acf', + 'ext': 'mp4', + 'title': 'Big Bang', + 'display_id': 'bigbang', + 'description': 'md5:3e53b766b0f116f631b13f4cae185d38', + }, + 'skip': 'Requires login', + }] + + def _real_extract(self, url): + slug, display_id = self._match_valid_url(url).group('slug', 'id') + movie_data = self._download_json( + f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id, + 'Downloading movie data JSON', 'Unable to download movie data')['data'] + topic_data = traverse_obj(self._download_json( + f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id, + 'Downloading topic data JSON', 'Unable to download topic data', fatal=False), + ('data', 'topic'), expected_type=dict) or movie_data['topic'] + + if not traverse_obj(movie_data, ('access', 'allow')): + reason = traverse_obj(movie_data, ('access', 'reason')) + if 'logged' in reason: + self.raise_login_required(reason, metadata_available=True) + else: + self.raise_no_formats(reason, video_id=display_id) + movie_feature = movie_data['feature'] + movie_feature_data = movie_feature['data'] + + formats, subtitles = [], {} + formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { + 'language': movie_feature.get('language') or 'en', + 'language_preference': 10 + })) + for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): + formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { + 'language': lang, + 'language_preference': -10 + })) + + # TODO: Do localization fields also have subtitles? + for name, url in movie_feature_data.items(): + lang = self._search_regex( + r'^subtitles_(?P\w+)$', name, 'subtitle metadata', default=None) + if lang and url: + subtitles.setdefault(lang, []).append({ + 'url': urljoin(self._CDN_URL, url) + }) + + return { + 'id': topic_data['topic_id'], + 'display_id': display_id, + 'title': topic_data.get('name'), + 'description': topic_data.get('synopsis'), + 'formats': formats, + 'subtitles': subtitles, + } + + +class BrainPOPLegacyBaseIE(BrainPOPBaseIE): + def _parse_js_topic_data(self, topic_data, display_id, token): + movie_data = topic_data['movies'] + # TODO: Are there non-burned subtitles? + formats = self._extract_adaptive_formats(movie_data, token, display_id) + + return { + 'id': topic_data['EntryID'], + 'display_id': display_id, + 'title': topic_data.get('name'), + 'alt_title': topic_data.get('title'), + 'description': topic_data.get('synopsis'), + 'formats': formats, + } + + def _real_extract(self, url): + slug, display_id = self._match_valid_url(url).group('slug', 'id') + webpage = self._download_webpage(url, display_id) + topic_data = self._search_json( + r'var\s+content\s*=\s*', webpage, 'content data', + display_id, end_pattern=';')['category']['unit']['topic'] + token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token') + return self._parse_js_topic_data(topic_data, display_id, token) + + +class BrainPOPJrIE(BrainPOPLegacyBaseIE): + _ORIGIN = 'https://jr.brainpop.com' + _VIDEO_URL = 'https://svideos-jr.brainpop.com' + _HLS_URL = 'https://hls-jr.brainpop.com' + _CDN_URL = 'https://cdn-jr.brainpop.com' + _TESTS = [{ + 'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/', + 'md5': '04e0561bb21770f305a0ce6cf0d869ab', + 'info_dict': { + 'id': '347', + 'ext': 'mp4', + 'title': 'Emotions', + 'display_id': 'emotions', + }, + }, { + 'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/', + 'md5': 'b0ed063bbd1910df00220ee29340f5d6', + 'info_dict': { + 'id': '29', + 'ext': 'mp4', + 'title': 'Arctic Habitats', + 'display_id': 'arctichabitats', + }, + 'skip': 'Requires login', + }] + + +class BrainPOPELLIE(BrainPOPLegacyBaseIE): + _ORIGIN = 'https://ell.brainpop.com' + _VIDEO_URL = 'https://svideos-esl.brainpop.com' + _HLS_URL = 'https://hls-esl.brainpop.com' + _CDN_URL = 'https://cdn-esl.brainpop.com' + _TESTS = [{ + 'url': 'https://ell.brainpop.com/level1/unit1/lesson1/', + 'md5': 'a2012700cfb774acb7ad2e8834eed0d0', + 'info_dict': { + 'id': '1', + 'ext': 'mp4', + 'title': 'Lesson 1', + 'display_id': 'lesson1', + 'alt_title': 'Personal Pronouns', + }, + }, { + 'url': 'https://ell.brainpop.com/level3/unit6/lesson5/', + 'md5': 'be19c8292c87b24aacfb5fda2f3f8363', + 'info_dict': { + 'id': '101', + 'ext': 'mp4', + 'title': 'Lesson 5', + 'display_id': 'lesson5', + 'alt_title': 'Review: Unit 6', + }, + 'skip': 'Requires login', + }] + + +class BrainPOPEspIE(BrainPOPLegacyBaseIE): + IE_DESC = 'BrainPOP Español' + _ORIGIN = 'https://esp.brainpop.com' + _VIDEO_URL = 'https://svideos.brainpop.com' + _HLS_URL = 'https://hls.brainpop.com' + _CDN_URL = 'https://cdn.brainpop.com/mx' + _TESTS = [{ + 'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/', + 'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9', + 'info_dict': { + 'id': '3893', + 'ext': 'mp4', + 'title': 'Ecosistemas', + 'display_id': 'ecosistemas', + 'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3', + }, + }, { + 'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/', + 'md5': '98c1b9559e0e33777209c425cda7dac4', + 'info_dict': { + 'id': '7146', + 'ext': 'mp4', + 'title': 'Emily Dickinson', + 'display_id': 'emily_dickinson', + 'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b', + }, + 'skip': 'Requires login', + }] + + +class BrainPOPFrIE(BrainPOPLegacyBaseIE): + IE_DESC = 'BrainPOP Français' + _ORIGIN = 'https://fr.brainpop.com' + _VIDEO_URL = 'https://svideos.brainpop.com' + _HLS_URL = 'https://hls.brainpop.com' + _CDN_URL = 'https://cdn.brainpop.com/fr' + _TESTS = [{ + 'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/', + 'md5': '97e7f48af8af93f8a2be11709f239371', + 'info_dict': { + 'id': '1651', + 'ext': 'mp4', + 'title': 'Sources d\'énergie', + 'display_id': 'sourcesdenergie', + 'description': 'md5:7eece350f019a21ef9f64d4088b2d857', + }, + }, { + 'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/', + 'md5': '0cf2b4f89804d0dd4a360a51310d445a', + 'info_dict': { + 'id': '5803', + 'ext': 'mp4', + 'title': 'Plagiat', + 'display_id': 'plagiat', + 'description': 'md5:4496d87127ace28e8b1eda116e77cd2b', + }, + 'skip': 'Requires login', + }] + + +class BrainPOPIlIE(BrainPOPLegacyBaseIE): + IE_DESC = 'BrainPOP Hebrew' + _ORIGIN = 'https://il.brainpop.com' + _VIDEO_URL = 'https://svideos.brainpop.com' + _HLS_URL = 'https://hls.brainpop.com' + _CDN_URL = 'https://cdn.brainpop.com/he' + _TESTS = [{ + 'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/', + 'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641', + 'info_dict': { + 'id': '3782', + 'ext': 'mp4', + 'title': 'md5:e993632fcda0545d9205602ec314ad67', + 'display_id': 'subjects_3782', + 'description': 'md5:4cc084a8012beb01f037724423a4d4ed', + }, + }] diff --git a/hypervideo_dl/extractor/bravotv.py b/hypervideo_dl/extractor/bravotv.py index d489584..419fe8c 100644 --- a/hypervideo_dl/extractor/bravotv.py +++ b/hypervideo_dl/extractor/bravotv.py @@ -1,117 +1,189 @@ -import re - from .adobepass import AdobePassIE +from ..networking import HEADRequest from ..utils import ( - smuggle_url, - update_url_query, - int_or_none, + extract_attributes, float_or_none, - try_get, - dict_get, + get_element_html_by_class, + int_or_none, + merge_dicts, + parse_age_limit, + remove_end, + str_or_none, + traverse_obj, + unescapeHTML, + unified_timestamp, + update_url_query, + url_or_none, ) class BravoTVIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?Pbravotv|oxygen)\.com/(?:[^/]+/)+(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?Pbravotv|oxygen)\.com/(?:[^/]+/)+(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', - 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', 'info_dict': { - 'id': 'epL0pmK1kQlT', + 'id': '3923059', 'ext': 'mp4', 'title': 'The Top Chef Season 16 Winner Is...', 'description': 'Find out who takes the title of Top Chef!', - 'uploader': 'NBCU-BRAV', 'upload_date': '20190314', 'timestamp': 1552591860, 'season_number': 16, 'episode_number': 15, 'series': 'Top Chef', 'episode': 'The Top Chef Season 16 Winner Is...', - 'duration': 190.0, - } + 'duration': 190.357, + 'season': 'Season 16', + 'thumbnail': r're:^https://.+\.jpg', + }, + 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', - 'only_matching': True, + 'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling', + 'info_dict': { + 'id': '9000234570', + 'ext': 'mp4', + 'title': 'London Calling', + 'description': 'md5:5af95a8cbac1856bd10e7562f86bb759', + 'upload_date': '20230310', + 'timestamp': 1678410000, + 'season_number': 20, + 'episode_number': 1, + 'series': 'Top Chef', + 'episode': 'London Calling', + 'duration': 3266.03, + 'season': 'Season 20', + 'chapters': 'count:7', + 'thumbnail': r're:^https://.+\.jpg', + 'age_limit': 14, + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', + }, { + 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night', + 'info_dict': { + 'id': '3692045', + 'ext': 'mp4', + 'title': 'Closing Night', + 'description': 'md5:3170065c5c2f19548d72a4cbc254af63', + 'upload_date': '20180401', + 'timestamp': 1522623600, + 'season_number': 1, + 'episode_number': 1, + 'series': 'In Ice Cold Blood', + 'episode': 'Closing Night', + 'duration': 2629.051, + 'season': 'Season 1', + 'chapters': 'count:6', + 'thumbnail': r're:^https://.+\.jpg', + 'age_limit': 14, + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'This video requires AdobePass MSO credentials', }, { 'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2', + 'info_dict': { + 'id': '3974019', + 'ext': 'mp4', + 'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)', + 'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5', + 'upload_date': '20190617', + 'timestamp': 1560790800, + 'season_number': 2, + 'episode_number': 16, + 'series': 'In Ice Cold Blood', + 'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)', + 'duration': 68.235, + 'season': 'Season 2', + 'thumbnail': r're:^https://.+\.jpg', + 'age_limit': 14, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', 'only_matching': True, }] def _real_extract(self, url): - site, display_id = self._match_valid_url(url).groups() + site, display_id = self._match_valid_url(url).group('site', 'id') webpage = self._download_webpage(url, display_id) - settings = self._parse_json(self._search_regex( - r']+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})', webpage, 'drupal settings'), - display_id) - info = {} + settings = self._search_json( + r']+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id) + tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '') query = { - 'mbr': 'true', + 'manifest': 'm3u', + 'formats': 'm3u,mpeg4', } - account_pid, release_pid = [None] * 2 - tve = settings.get('ls_tve') + if tve: - query['manifest'] = 'm3u' - mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) - if mobj: - account_pid, tp_path = mobj.groups() - release_pid = tp_path.strip('/').split('/')[-1] - else: - account_pid = 'HNK2IC' - tp_path = release_pid = tve['release_pid'] - if tve.get('entitlement') == 'auth': - adobe_pass = settings.get('tve_adobe_auth', {}) - if site == 'bravotv': - site = 'bravo' + account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC' + account_id = tve['data-mpx-media-account-id'] + metadata = self._parse_json( + tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML) + video_id = tve.get('data-guid') or metadata['guid'] + if tve.get('data-entitlement') == 'auth': + auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {} + site = remove_end(site, 'tv') + release_pid = tve['data-release-pid'] resource = self._get_mvpd_resource( - adobe_pass.get('adobePassResourceId') or site, - tve['title'], release_pid, tve.get('rating')) - query['auth'] = self._extract_mvpd_auth( - url, release_pid, - adobe_pass.get('adobePassRequestorId') or site, resource) - else: - shared_playlist = settings['ls_playlist'] - account_pid = shared_playlist['account_pid'] - metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] - tp_path = release_pid = metadata.get('release_pid') - if not release_pid: - release_pid = metadata['guid'] - tp_path = 'media/guid/2140479951/' + release_pid - info.update({ - 'title': metadata['title'], - 'description': metadata.get('description'), - 'season_number': int_or_none(metadata.get('season_num')), - 'episode_number': int_or_none(metadata.get('episode_num')), - }) - query['switch'] = 'progressive' + tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site, + tve['data-title'], release_pid, tve.get('data-rating')) + query.update({ + 'switch': 'HLSServiceSecure', + 'auth': self._extract_mvpd_auth( + url, release_pid, auth.get('adobePassRequestorId') or site, resource), + }) - tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path) + else: + ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {} + account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B' + account_id = ls_playlist['mpxMediaAccountId'] + video_id = ls_playlist['defaultGuid'] + metadata = traverse_obj( + ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False) + tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}' tp_metadata = self._download_json( - update_url_query(tp_url, {'format': 'preview'}), - display_id, fatal=False) - if tp_metadata: - info.update({ - 'title': tp_metadata.get('title'), - 'description': tp_metadata.get('description'), - 'duration': float_or_none(tp_metadata.get('duration'), 1000), - 'season_number': int_or_none( - dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))), - 'episode_number': int_or_none( - dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))), - # For some reason the series is sometimes wrapped into a single element array. - 'series': try_get( - dict_get(tp_metadata, ('pl1$show', 'nbcu$show')), - lambda x: x[0] if isinstance(x, list) else x, - expected_type=str), - 'episode': dict_get( - tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')), - }) + update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) + + seconds_or_none = lambda x: float_or_none(x, 1000) + chapters = traverse_obj(tp_metadata, ('chapters', ..., { + 'start_time': ('startTime', {seconds_or_none}), + 'end_time': ('endTime', {seconds_or_none}), + })) + # prune pointless single chapters that span the entire duration from short videos + if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): + chapters = None - info.update({ - '_type': 'url_transparent', - 'id': release_pid, - 'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}), - 'ie_key': 'ThePlatform', - }) - return info + m3u8_url = self._request_webpage(HEADRequest( + update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url + if 'mpeg_cenc' in m3u8_url: + self.report_drm(video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'chapters': chapters, + **merge_dicts(traverse_obj(tp_metadata, { + 'title': 'title', + 'description': 'description', + 'duration': ('duration', {seconds_or_none}), + 'timestamp': ('pubDate', {seconds_or_none}), + 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), + 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), + 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), + 'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}), + 'age_limit': ('ratings', ..., 'rating', {parse_age_limit}), + }, get_all=False), traverse_obj(metadata, { + 'title': 'title', + 'description': 'description', + 'duration': ('durationInSeconds', {int_or_none}), + 'timestamp': ('airDate', {unified_timestamp}), + 'thumbnail': ('thumbnailUrl', {url_or_none}), + 'season_number': ('seasonNumber', {int_or_none}), + 'episode_number': ('episodeNumber', {int_or_none}), + 'episode': 'episodeTitle', + 'series': 'show', + })) + } diff --git a/hypervideo_dl/extractor/brightcove.py b/hypervideo_dl/extractor/brightcove.py index 2b7ddca..61b1841 100644 --- a/hypervideo_dl/extractor/brightcove.py +++ b/hypervideo_dl/extractor/brightcove.py @@ -7,10 +7,10 @@ from .adobepass import AdobePassIE from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, - compat_HTTPError, compat_parse_qs, compat_urlparse, ) +from ..networking.exceptions import HTTPError from ..utils import ( clean_html, dict_get, @@ -575,6 +575,7 @@ class BrightcoveNewBaseIE(AdobePassIE): self.raise_no_formats( error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) + headers.pop('Authorization', None) # or else http formats will give error 400 for f in formats: f.setdefault('http_headers', {}).update(headers) @@ -895,8 +896,9 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): store_pk(policy_key) return policy_key - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = {} + token = smuggled_data.get('token') + api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}' + headers = {'Authorization': f'Bearer {token}'} if token else {} referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key if referrer: headers.update({ @@ -913,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): json_data = self._download_json(api_url, video_id, headers=headers) break except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): + json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0] message = json_data.get('message') or json_data['error_code'] if json_data.get('error_subcode') == 'CLIENT_GEO': self.raise_geo_restricted(msg=message) diff --git a/hypervideo_dl/extractor/callin.py b/hypervideo_dl/extractor/callin.py index e966876..c77179c 100644 --- a/hypervideo_dl/extractor/callin.py +++ b/hypervideo_dl/extractor/callin.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - traverse_obj, - float_or_none, - int_or_none -) +from ..utils import float_or_none, int_or_none, make_archive_id, traverse_obj class CallinIE(InfoExtractor): @@ -35,6 +31,54 @@ class CallinIE(InfoExtractor): 'episode_number': 1, 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd' } + }, { + 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', + 'md5': '14ede27ee2c957b7e4db93140fc0745c', + 'info_dict': { + 'id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', + 'ext': 'ts', + 'title': 'FCC Commissioner Brendan Carr on Elon’s Starlink', + 'description': 'Or, why the government doesn’t like SpaceX', + 'channel': 'The Pull Request', + 'channel_url': 'https://callin.com/show/the-pull-request-ucnDJmEKAa', + 'duration': 3182.472, + 'series_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638', + 'uploader_url': 'http://thepullrequest.com', + 'upload_date': '20220902', + 'episode': 'FCC Commissioner Brendan Carr on Elon’s Starlink', + 'display_id': 'fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', + 'series': 'The Pull Request', + 'channel_id': '7e9c23156e4aecfdcaef46bfb2ed7ca268509622ec006c0f0f25d90e34496638', + 'view_count': int, + 'uploader': 'Antonio García Martínez', + 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png', + 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', + 'timestamp': 1662100688.005, + } + }, { + 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA', + 'md5': '16f704ddbf82a27e3930533b12062f07', + 'info_dict': { + 'id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', + 'ext': 'ts', + 'title': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?', + 'description': 'Let’s talk todays episode about the primary election shake up in NYC and the elites melting down over student debt cancelation.', + 'channel': 'The DEBRIEF With Briahna Joy Gray', + 'channel_url': 'https://callin.com/show/the-debrief-with-briahna-joy-gray-siiFDzGegm', + 'duration': 10043.16, + 'series_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7', + 'uploader_url': 'http://patreon.com/badfaithpodcast', + 'upload_date': '20220826', + 'episode': 'Episode 81- Elites MELT DOWN over Student Debt Victory? Rumble in NYC?', + 'display_id': 'episode-', + 'series': 'The DEBRIEF With Briahna Joy Gray', + 'channel_id': '61cea58444465fd26674069703bd8322993bc9e5b4f1a6d0872690554a046ff7', + 'view_count': int, + 'uploader': 'Briahna Gray', + 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png', + 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', + 'timestamp': 1661476708.282, + } }] def try_get_user_name(self, d): @@ -86,6 +130,7 @@ class CallinIE(InfoExtractor): return { 'id': id, + '_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])], 'display_id': display_id, 'title': title, 'formats': formats, diff --git a/hypervideo_dl/extractor/camfm.py b/hypervideo_dl/extractor/camfm.py new file mode 100644 index 0000000..a9850f4 --- /dev/null +++ b/hypervideo_dl/extractor/camfm.py @@ -0,0 +1,85 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + get_element_by_class, + get_elements_by_class, + join_nonempty, + traverse_obj, + unified_timestamp, + urljoin, +) + + +class CamFMShowIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/shows/(?P[^/]+)' + _TESTS = [{ + 'playlist_mincount': 5, + 'url': 'https://camfm.co.uk/shows/soul-mining/', + 'info_dict': { + 'id': 'soul-mining', + 'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a', + 'title': 'Soul Mining', + 'description': 'Telling the stories of jazz, funk and soul from all corners of the world.', + }, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + page = self._download_webpage(url, show_id) + + return { + '_type': 'playlist', + 'id': show_id, + 'entries': [self.url_result(urljoin('https://camfm.co.uk', i), CamFMEpisodeIE) + for i in re.findall(r"javascript:popup\('(/player/[^']+)', 'listen'", page)], + 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( + r']+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)), + 'title': self._html_search_regex('

([^<]+)

', page, 'title', fatal=False), + 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)) + } + + +class CamFMEpisodeIE(InfoExtractor): + _VALID_URL = r'https://(?:www\.)?camfm\.co\.uk/player/(?P[^/]+)' + _TESTS = [{ + 'url': 'https://camfm.co.uk/player/43336', + 'skip': 'Episode will expire - don\'t actually know when, but it will go eventually', + 'info_dict': { + 'id': '43336', + 'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023', + 'ext': 'mp3', + 'upload_date': '20230516', + 'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf', + 'timestamp': 1684263600, + 'series': 'AITAA: Am I the Agony Aunt?', + 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1', + 'categories': ['Entertainment'], + } + }] + + def _real_extract(self, url): + episode_id = self._match_id(url) + page = self._download_webpage(url, episode_id) + audios = self._parse_html5_media_entries('https://audio.camfm.co.uk', page, episode_id) + + caption = get_element_by_class('caption', page) + series = clean_html(re.sub(r'', '', caption)) + + card_section = get_element_by_class('card-section', page) + date = self._html_search_regex('>Aired at ([^<]+)<', card_section, 'air date', fatal=False) + + return { + 'id': episode_id, + 'title': join_nonempty(series, date, delim=' - '), + 'formats': traverse_obj(audios, (..., 'formats', ...)), + 'timestamp': unified_timestamp(date), # XXX: Does not account for UK's daylight savings + 'series': series, + 'description': clean_html(re.sub(r'[^<]+]+/>', '', card_section)), + 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( + r']+class="cover-art"[^>]+style="[^"]+url\(\'([^\']+)', + page, 'thumbnail', fatal=False)), + 'categories': get_elements_by_class('label', caption), + 'was_live': True, + } diff --git a/hypervideo_dl/extractor/cammodels.py b/hypervideo_dl/extractor/cammodels.py index 0509057..135b315 100644 --- a/hypervideo_dl/extractor/cammodels.py +++ b/hypervideo_dl/extractor/cammodels.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - url_or_none, -) +from ..utils import int_or_none, url_or_none class CamModelsIE(InfoExtractor): @@ -17,32 +13,11 @@ class CamModelsIE(InfoExtractor): def _real_extract(self, url): user_id = self._match_id(url) - webpage = self._download_webpage( - url, user_id, headers=self.geo_verification_headers()) - - manifest_root = self._html_search_regex( - r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None) - - if not manifest_root: - ERRORS = ( - ("I'm offline, but let's stay connected", 'This user is currently offline'), - ('in a private show', 'This user is in a private show'), - ('is currently performing LIVE', 'This model is currently performing live'), - ) - for pattern, message in ERRORS: - if pattern in webpage: - error = message - expected = True - break - else: - error = 'Unable to find manifest URL root' - expected = False - raise ExtractorError(error, expected=expected) - manifest = self._download_json( - '%s%s.json' % (manifest_root, user_id), user_id) + 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id) formats = [] + thumbnails = [] for format_id, format_dict in manifest['formats'].items(): if not isinstance(format_dict, dict): continue @@ -82,12 +57,20 @@ class CamModelsIE(InfoExtractor): 'quality': -10, }) else: + if format_id == 'jpeg': + thumbnails.append({ + 'url': f['url'], + 'width': f['width'], + 'height': f['height'], + 'format_id': f['format_id'], + }) continue formats.append(f) return { 'id': user_id, 'title': user_id, + 'thumbnails': thumbnails, 'is_live': True, 'formats': formats, 'age_limit': 18 diff --git a/hypervideo_dl/extractor/canalplus.py b/hypervideo_dl/extractor/canalplus.py index b7e2f9d..3ff5c3f 100644 --- a/hypervideo_dl/extractor/canalplus.py +++ b/hypervideo_dl/extractor/canalplus.py @@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor): # response = self._request_webpage( # HEADRequest(fmt_url), video_id, # 'Checking if the video is georestricted') - # if '/blocage' in response.geturl(): + # if '/blocage' in response.url: # raise ExtractorError( # 'The video is not available in your country', # expected=True) diff --git a/hypervideo_dl/extractor/cbc.py b/hypervideo_dl/extractor/cbc.py index a9f6cd2..e66071f 100644 --- a/hypervideo_dl/extractor/cbc.py +++ b/hypervideo_dl/extractor/cbc.py @@ -2,20 +2,23 @@ import re import json import base64 import time +import urllib.parse from .common import InfoExtractor from ..compat import ( compat_str, ) from ..utils import ( + ExtractorError, int_or_none, join_nonempty, js_to_json, orderedSet, + parse_iso8601, smuggle_url, strip_or_none, + traverse_obj, try_get, - ExtractorError, ) @@ -159,7 +162,7 @@ class CBCPlayerIE(InfoExtractor): 'upload_date': '20160210', 'uploader': 'CBCC-NEW', }, - 'skip': 'Geo-restricted to Canada', + 'skip': 'Geo-restricted to Canada and no longer available', }, { # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ 'url': 'http://www.cbc.ca/player/play/2657631896', @@ -172,6 +175,9 @@ class CBCPlayerIE(InfoExtractor): 'timestamp': 1425704400, 'upload_date': '20150307', 'uploader': 'CBCC-NEW', + 'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg', + 'chapters': [], + 'duration': 494.811, }, }, { 'url': 'http://www.cbc.ca/player/play/2164402062', @@ -184,6 +190,28 @@ class CBCPlayerIE(InfoExtractor): 'timestamp': 1320410746, 'upload_date': '20111104', 'uploader': 'CBCC-NEW', + 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg', + 'chapters': [], + 'duration': 186.867, + }, + }, { + # Has subtitles + # These broadcasts expire after ~1 month, can find new test URL here: + # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast + 'url': 'http://www.cbc.ca/player/play/2249992771553', + 'md5': '2f2fb675dd4f0f8a5bb7588d1b13bacd', + 'info_dict': { + 'id': '2249992771553', + 'ext': 'mp4', + 'title': 'The National | Women’s soccer pay, Florida seawater, Swift quake', + 'description': 'md5:adba28011a56cfa47a080ff198dad27a', + 'timestamp': 1690596000, + 'duration': 2716.333, + 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]}, + 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/481/326/thumbnail.jpeg', + 'uploader': 'CBCC-NEW', + 'chapters': 'count:5', + 'upload_date': '20230729', }, }] @@ -197,12 +225,45 @@ class CBCPlayerIE(InfoExtractor): 'force_smil_url': True }), 'id': video_id, + '_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS + } + + +class CBCPlayerPlaylistIE(InfoExtractor): + IE_NAME = 'cbc.ca:player:playlist' + _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P[^?#]+)' + _TESTS = [{ + 'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast', + 'playlist_mincount': 25, + 'info_dict': { + 'id': 'news/tv shows/the national/latest broadcast', + } + }, { + 'url': 'https://www.cbc.ca/player/news/Canada/North', + 'playlist_mincount': 25, + 'info_dict': { + 'id': 'news/canada/north', } + }] + + def _real_extract(self, url): + playlist_id = urllib.parse.unquote(self._match_id(url)).lower() + webpage = self._download_webpage(url, playlist_id) + json_content = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', playlist_id) + + def entries(): + for video_id in traverse_obj(json_content, ( + 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id' + )): + yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE) + + return self.playlist_result(entries(), playlist_id) class CBCGemIE(InfoExtractor): IE_NAME = 'gem.cbc.ca' - _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' + _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _TESTS = [{ # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', @@ -245,6 +306,9 @@ class CBCGemIE(InfoExtractor): }, 'params': {'format': 'bv'}, 'skip': 'Geo-restricted to Canada', + }, { + 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', + 'only_matching': True, }] _GEO_COUNTRIES = ['CA'] @@ -346,7 +410,9 @@ class CBCGemIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id) + video_info = self._download_json( + f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}', + video_id, expected_status=426) email, password = self._get_login_info() if email and password: @@ -401,7 +467,7 @@ class CBCGemIE(InfoExtractor): class CBCGemPlaylistIE(InfoExtractor): IE_NAME = 'gem.cbc.ca:playlist' - _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P(?P[0-9a-z-]+)/s(?P[0-9]+))/?(?:[?#]|$)' + _VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P(?P[0-9a-z-]+)/s(?P[0-9]+))/?(?:[?#]|$)' _TESTS = [{ # TV show playlist, all public videos 'url': 'https://gem.cbc.ca/media/schitts-creek/s06', @@ -411,6 +477,9 @@ class CBCGemPlaylistIE(InfoExtractor): 'title': 'Season 6', 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', }, + }, { + 'url': 'https://gem.cbc.ca/schitts-creek/s06', + 'only_matching': True, }] _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' @@ -418,7 +487,7 @@ class CBCGemPlaylistIE(InfoExtractor): match = self._match_valid_url(url) season_id = match.group('id') show = match.group('show') - show_info = self._download_json(self._API_BASE + show, season_id) + show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426) season = int(match.group('season')) season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) @@ -470,49 +539,90 @@ class CBCGemPlaylistIE(InfoExtractor): class CBCGemLiveIE(InfoExtractor): IE_NAME = 'gem.cbc.ca:live' - _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P\d+)' - _TEST = { - 'url': 'https://gem.cbc.ca/live/920604739687', - 'info_dict': { - 'title': 'Ottawa', - 'description': 'The live TV channel and local programming from Ottawa', - 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', - 'is_live': True, - 'id': 'AyqZwxRqh8EH', - 'ext': 'mp4', - 'timestamp': 1492106160, - 'upload_date': '20170413', - 'uploader': 'CBCC-NEW', + _VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P\d+)' + _TESTS = [ + { + 'url': 'https://gem.cbc.ca/live/920604739687', + 'info_dict': { + 'title': 'Ottawa', + 'description': 'The live TV channel and local programming from Ottawa', + 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', + 'is_live': True, + 'id': 'AyqZwxRqh8EH', + 'ext': 'mp4', + 'timestamp': 1492106160, + 'upload_date': '20170413', + 'uploader': 'CBCC-NEW', + }, + 'skip': 'Live might have ended', }, - 'skip': 'Live might have ended', - } - - # It's unclear where the chars at the end come from, but they appear to be - # constant. Might need updating in the future. - # There are two URLs, some livestreams are in one, and some - # in the other. The JSON schema is the same for both. - _API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT'] + { + 'url': 'https://gem.cbc.ca/live/44', + 'info_dict': { + 'id': '44', + 'ext': 'mp4', + 'is_live': True, + 'title': r're:^Ottawa [0-9\-: ]+', + 'description': 'The live TV channel and local programming from Ottawa', + 'live_status': 'is_live', + 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*' + }, + 'params': {'skip_download': True}, + 'skip': 'Live might have ended', + }, + { + 'url': 'https://gem.cbc.ca/live-event/10835', + 'info_dict': { + 'id': '10835', + 'ext': 'mp4', + 'is_live': True, + 'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+', + 'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.', + 'live_status': 'is_live', + 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', + 'timestamp': 1679706000, + 'upload_date': '20230325', + }, + 'params': {'skip_download': True}, + 'skip': 'Live might have ended', + } + ] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data'] - for api_url in self._API_URLS: - video_info = next(( - stream for stream in self._download_json(api_url, video_id)['entries'] - if stream.get('guid') == video_id), None) - if video_info: - break - else: + # Two types of metadata JSON + if not video_info.get('formattedIdMedia'): + video_info = traverse_obj( + video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}), + get_all=False, default={}) + + video_stream_id = video_info.get('formattedIdMedia') + if not video_stream_id: raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) + stream_data = self._download_json( + 'https://services.radio-canada.ca/media/validation/v2/', video_id, query={ + 'appCode': 'mpx', + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'idMedia': video_stream_id, + 'multibitrate': 'true', + 'output': 'json', + 'tech': 'hls', + 'manifestType': 'desktop', + }) + return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': video_info['content'][0]['url'], 'id': video_id, - 'title': video_info.get('title'), - 'description': video_info.get('description'), - 'tags': try_get(video_info, lambda x: x['keywords'].split(', ')), - 'thumbnail': video_info.get('cbc$staticImage'), + 'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True), 'is_live': True, + **traverse_obj(video_info, { + 'title': 'title', + 'description': 'description', + 'thumbnail': ('images', 'card', 'url'), + 'timestamp': ('airDate', {parse_iso8601}), + }) } diff --git a/hypervideo_dl/extractor/cbs.py b/hypervideo_dl/extractor/cbs.py index 9aacd50..1c0dbde 100644 --- a/hypervideo_dl/extractor/cbs.py +++ b/hypervideo_dl/extractor/cbs.py @@ -1,8 +1,14 @@ +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor from .theplatform import ThePlatformFeedIE +from .youtube import YoutubeIE from ..utils import ( ExtractorError, + extract_attributes, + get_element_html_by_id, int_or_none, find_xpath_attr, + smuggle_url, xpath_element, xpath_text, update_url_query, @@ -162,3 +168,110 @@ class CBSIE(CBSBaseIE): 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), 'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')), }) + + +class ParamountPressExpressIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?Pyt-)?video/?\?watch=(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx', + 'md5': '56631dbcadaab980d1fc47cb7b76cba4', + 'info_dict': { + 'id': '6322981580112', + 'ext': 'mp4', + 'title': 'I’m Felicia', + 'description': 'md5:88fad93f8eede1c9c8f390239e4c6290', + 'uploader_id': '6055873637001', + 'upload_date': '20230320', + 'timestamp': 1679334960, + 'duration': 49.557, + 'thumbnail': r're:^https://.+\.jpg', + 'tags': [], + }, + }, { + 'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc', + 'md5': 'edcb03e3210b88a3e56c05aa863e0e5b', + 'info_dict': { + 'id': '6323036027112', + 'ext': 'mp4', + 'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More', + 'description': 'md5:b929867a357aac5544b783d834c78383', + 'uploader_id': '6055873637001', + 'upload_date': '20230321', + 'timestamp': 1679430180, + 'duration': 132.032, + 'thumbnail': r're:^https://.+\.jpg', + 'tags': [], + }, + }, { + 'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck', + 'info_dict': { + 'id': 'OX9wJWOcqck', + 'ext': 'mp4', + 'title': 'Rugrats | Season 2 Official Trailer | Paramount+', + 'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de', + 'uploader': 'Paramount Plus', + 'uploader_id': '@paramountplus', + 'uploader_url': 'http://www.youtube.com/@paramountplus', + 'channel': 'Paramount Plus', + 'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg', + 'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg', + 'upload_date': '20230316', + 'duration': 88, + 'age_limit': 0, + 'availability': 'public', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'view_count': int, + 'like_count': int, + 'channel_follower_count': int, + 'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg', + 'categories': ['Entertainment'], + 'tags': ['Rugrats'], + }, + }, { + 'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw', + 'info_dict': { + 'id': '_ljssSoDLkw', + 'ext': 'mp4', + 'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME', + 'description': 'md5:39581bcc3fd810209b642609f448af70', + 'uploader': 'SHOWTIME', + 'uploader_id': '@Showtime', + 'uploader_url': 'http://www.youtube.com/@Showtime', + 'channel': 'SHOWTIME', + 'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ', + 'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ', + 'upload_date': '20230209', + 'duration': 49, + 'age_limit': 0, + 'availability': 'public', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'channel_follower_count': int, + 'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp', + 'categories': ['People & Blogs'], + 'tags': 'count:27', + }, + }] + + def _real_extract(self, url): + display_id, is_youtube = self._match_valid_url(url).group('id', 'yt') + if is_youtube: + return self.url_result(display_id, YoutubeIE) + + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID') + token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token') + + player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '') + account_id = player.get('data-account') or '6055873637001' + player_id = player.get('data-player') or 'OtLKgXlO9F' + embed = player.get('data-embed') or 'default' + + return self.url_result(smuggle_url( + f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}', + {'token': token}), BrightcoveNewIE) diff --git a/hypervideo_dl/extractor/cbsnews.py b/hypervideo_dl/extractor/cbsnews.py index 16edf3a..5a8ebb8 100644 --- a/hypervideo_dl/extractor/cbsnews.py +++ b/hypervideo_dl/extractor/cbsnews.py @@ -1,36 +1,153 @@ +import base64 import re +import urllib.error +import urllib.parse import zlib +from .anvato import AnvatoIE from .common import InfoExtractor -from .cbs import CBSIE -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) +from .paramountplus import ParamountPlusIE +from ..networking import HEADRequest from ..utils import ( + ExtractorError, + UserNotLive, + determine_ext, + float_or_none, + format_field, + int_or_none, + make_archive_id, + mimetype2ext, parse_duration, + smuggle_url, + traverse_obj, + url_or_none, ) -class CBSNewsEmbedIE(CBSIE): # XXX: Do not subclass from concrete IE +class CBSNewsBaseIE(InfoExtractor): + _LOCALES = { + 'atlanta': None, + 'baltimore': 'BAL', + 'boston': 'BOS', + 'chicago': 'CHI', + 'colorado': 'DEN', + 'detroit': 'DET', + 'losangeles': 'LA', + 'miami': 'MIA', + 'minnesota': 'MIN', + 'newyork': 'NY', + 'philadelphia': 'PHI', + 'pittsburgh': 'PIT', + 'sacramento': 'SAC', + 'sanfrancisco': 'SF', + 'texas': 'DAL', + } + _LOCALE_RE = '|'.join(map(re.escape, _LOCALES)) + _ANVACK = '5VD6Eyd6djewbCmNwBFnsJj17YAvGRwl' + + def _get_item(self, webpage, display_id): + return traverse_obj(self._search_json( + r'CBSNEWS\.defaultPayload\s*=', webpage, 'payload', display_id, + default={}), ('items', 0, {dict})) or {} + + def _get_video_url(self, item): + return traverse_obj(item, 'video', 'video2', expected_type=url_or_none) + + def _extract_playlist(self, webpage, playlist_id): + entries = [self.url_result(embed_url, CBSNewsEmbedIE) for embed_url in re.findall( + r']+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage)] + if entries: + return self.playlist_result( + entries, playlist_id, self._html_search_meta(['og:title', 'twitter:title'], webpage), + self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)) + + def _extract_video(self, item, video_url, video_id): + if mimetype2ext(item.get('format'), default=determine_ext(video_url)) == 'mp4': + formats = [{'url': video_url, 'ext': 'mp4'}] + + else: + manifest = self._download_webpage(video_url, video_id, note='Downloading m3u8 information') + + anvato_id = self._search_regex(r'anvato-(\d+)', manifest, 'Anvato ID', default=None) + # Prefer Anvato if available; cbsnews.com m3u8 formats are re-encoded from Anvato source + if anvato_id: + return self.url_result( + smuggle_url(f'anvato:{self._ANVACK}:{anvato_id}', {'token': 'default'}), + AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)]) + + formats, _ = self._parse_m3u8_formats_and_subtitles( + manifest, video_url, 'mp4', m3u8_id='hls', video_id=video_id) + + def get_subtitles(subs_url): + return { + 'en': [{ + 'url': subs_url, + 'ext': 'dfxp', # TTAF1 + }], + } if url_or_none(subs_url) else None + + episode_meta = traverse_obj(item, { + 'season_number': ('season', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + }) if item.get('isFullEpisode') else {} + + return { + 'id': video_id, + 'formats': formats, + **traverse_obj(item, { + 'title': (None, ('fulltitle', 'title')), + 'description': 'dek', + 'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}), + 'duration': ('duration', {float_or_none}), + 'subtitles': ('captions', {get_subtitles}), + 'thumbnail': ('images', ('hd', 'sd'), {url_or_none}), + 'is_live': ('type', {lambda x: x == 'live'}), + }, get_all=False), + **episode_meta, + } + + +class CBSNewsEmbedIE(CBSNewsBaseIE): IE_NAME = 'cbsnews:embed' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P.+)' _TESTS = [{ 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A', - 'only_matching': True, + 'info_dict': { + 'id': '6ZP4cXvo9FaX3VLH7MF4CgY30JFpY_GA', + 'ext': 'mp4', + 'title': 'Cops investigate gorilla incident at Cincinnati Zoo', + 'description': 'md5:fee7441ab8aaeb3c693482394738102b', + 'duration': 350, + 'timestamp': 1464719713, + 'upload_date': '20160531', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): - item = self._parse_json(zlib.decompress(compat_b64decode( - compat_urllib_parse_unquote(self._match_id(url))), - -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0] - return self._extract_video_info(item['mpxRefId'], 'cbsnews') + item = traverse_obj(self._parse_json(zlib.decompress(base64.b64decode( + urllib.parse.unquote(self._match_id(url))), + -zlib.MAX_WBITS).decode(), None), ('video', 'items', 0, {dict})) or {} + video_id = item['mpxRefId'] + video_url = self._get_video_url(item) + if not video_url: + # Old embeds redirect user to ParamountPlus but most links are 404 + pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}' + try: + self._request_webpage(HEADRequest(pplus_url), video_id) + return self.url_result(pplus_url, ParamountPlusIE) + except ExtractorError: + self.raise_no_formats('This video is no longer available', True, video_id) -class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE + return self._extract_video(item, video_url, video_id) + + +class CBSNewsIE(CBSNewsBaseIE): IE_NAME = 'cbsnews' IE_DESC = 'CBS News' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P[\w-]+)' _TESTS = [ { @@ -47,10 +164,7 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE 'timestamp': 1476046464, 'upload_date': '20161009', }, - 'params': { - # rtmp download - 'skip_download': True, - }, + 'skip': 'This video is no longer available', }, { 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', @@ -61,48 +175,234 @@ class CBSNewsIE(CBSIE): # XXX: Do not subclass from concrete IE 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', 'upload_date': '20140404', 'timestamp': 1396650660, - 'uploader': 'CBSI-NEW', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 205, 'subtitles': { 'en': [{ - 'ext': 'ttml', + 'ext': 'dfxp', }], }, }, 'params': { - # m3u8 download - 'skip_download': True, + 'skip_download': 'm3u8', }, }, { # 48 hours 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/', 'info_dict': { + 'id': 'maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved', 'title': 'Cold as Ice', 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?', }, 'playlist_mincount': 7, }, + { + 'url': 'https://www.cbsnews.com/video/032823-cbs-evening-news/', + 'info_dict': { + 'id': '_2wuO7hD9LwtyM_TwSnVwnKp6kxlcXgE', + 'ext': 'mp4', + 'title': 'CBS Evening News, March 28, 2023', + 'description': 'md5:db20615aae54adc1d55a1fd69dc75d13', + 'duration': 1189, + 'timestamp': 1680042600, + 'upload_date': '20230328', + 'season': 'Season 2023', + 'season_number': 2023, + 'episode': 'Episode 83', + 'episode_number': 83, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, ] def _real_extract(self, url): display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + playlist = self._extract_playlist(webpage, display_id) + if playlist: + return playlist + item = self._get_item(webpage, display_id) + video_id = item.get('mpxRefId') or display_id + video_url = self._get_video_url(item) + if not video_url: + self.raise_no_formats('No video content was found', expected=True, video_id=video_id) + + return self._extract_video(item, video_url, video_id) + + +class CBSLocalBaseIE(CBSNewsBaseIE): + def _real_extract(self, url): + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - entries = [] - for embed_url in re.findall(r']+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): - entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key())) - if entries: - return self.playlist_result( - entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage), - playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)) + item = self._get_item(webpage, display_id) + video_id = item.get('mpxRefId') or display_id + anvato_id = None + video_url = self._get_video_url(item) + + if not video_url: + anv_params = self._search_regex( + r']+\bdata-src="https?://w3\.mp\.lura\.live/player/prod/v3/anvload\.html\?key=([^"]+)"', + webpage, 'Anvato URL', default=None) + + if not anv_params: + playlist = self._extract_playlist(webpage, display_id) + if playlist: + return playlist + self.raise_no_formats('No video content was found', expected=True, video_id=video_id) + + anv_data = self._parse_json(base64.urlsafe_b64decode(f'{anv_params}===').decode(), video_id) + anvato_id = anv_data['v'] + return self.url_result( + smuggle_url(f'anvato:{anv_data.get("anvack") or self._ANVACK}:{anvato_id}', { + 'token': anv_data.get('token') or 'default', + }), AnvatoIE, url_transparent=True, _old_archive_ids=[make_archive_id(self, anvato_id)]) + + return self._extract_video(item, video_url, video_id) + - item = self._parse_json(self._html_search_regex( - r'CBSNEWS\.defaultPayload\s*=\s*({.+})', - webpage, 'video JSON info'), display_id)['items'][0] - return self._extract_video_info(item['mpxRefId'], 'cbsnews') +class CBSLocalIE(CBSLocalBaseIE): + _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/(?:live/)?video/(?P[\w-]+)' + _TESTS = [{ + # Anvato video via defaultPayload JSON + 'url': 'https://www.cbsnews.com/newyork/video/1st-cannabis-dispensary-opens-in-queens/', + 'info_dict': { + 'id': '6376747', + 'ext': 'mp4', + 'title': '1st cannabis dispensary opens in Queens', + 'description': 'The dispensary is women-owned and located in Jamaica.', + 'uploader': 'CBS', + 'duration': 20, + 'timestamp': 1680193657, + 'upload_date': '20230330', + 'categories': ['Stations\\Spoken Word\\WCBSTV', 'Content\\Google', 'Content\\News', 'Content\\News\\Local News'], + 'tags': 'count:11', + 'thumbnail': 're:^https?://.*', + '_old_archive_ids': ['cbslocal 6376747'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + # cbsnews.com video via defaultPayload JSON + 'url': 'https://www.cbsnews.com/newyork/live/video/20230330171655-the-city-is-sounding-the-alarm-on-dangerous-social-media-challenges/', + 'info_dict': { + 'id': 'sJqfw7YvgSC6ant2zVmzt3y1jYKoL5J3', + 'ext': 'mp4', + 'title': 'the city is sounding the alarm on dangerous social media challenges', + 'description': 'md5:8eccc9b1b73be5138a52e9c4350d2cd6', + 'thumbnail': 'https://images-cbsn.cbsnews.com/prod/2023/03/30/story_22509622_1680196925.jpg', + 'duration': 41.0, + 'timestamp': 1680196615, + 'upload_date': '20230330', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + +class CBSLocalArticleIE(CBSLocalBaseIE): + _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?:{CBSNewsBaseIE._LOCALE_RE})/news/(?P[\w-]+)' + _TESTS = [{ + # Anvato video via iframe embed + 'url': 'https://www.cbsnews.com/newyork/news/mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service/', + 'playlist_count': 2, + 'info_dict': { + 'id': 'mta-station-agents-leaving-their-booths-to-provide-more-direct-customer-service', + 'title': 'MTA station agents begin leaving their booths to provide more direct customer service', + 'description': 'The more than 2,200 agents will provide face-to-face customer service to passengers.', + }, + }, { + 'url': 'https://www.cbsnews.com/losangeles/news/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis/', + 'md5': 'f0ee3081e3843f575fccef901199b212', + 'info_dict': { + 'id': '3401037', + 'ext': 'mp4', + 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1463440500, + 'upload_date': '20160516', + }, + 'skip': 'Video has been removed', + }] + + +class CBSNewsLiveBaseIE(CBSNewsBaseIE): + def _get_id(self, url): + raise NotImplementedError('This method must be implemented by subclasses') + + def _real_extract(self, url): + video_id = self._get_id(url) + if not video_id: + raise ExtractorError('Livestream is not available', expected=True) + + data = traverse_obj(self._download_json( + 'https://feeds-cbsn.cbsnews.com/2.0/rundown/', video_id, query={ + 'partner': 'cbsnsite', + 'edition': video_id, + 'type': 'live', + }), ('navigation', 'data', 0, {dict})) + + video_url = traverse_obj(data, (('videoUrlDAI', ('videoUrl', 'base')), {url_or_none}), get_all=False) + if not video_url: + raise UserNotLive(video_id=video_id) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + **traverse_obj(data, { + 'title': 'headline', + 'description': 'rundown_slug', + 'thumbnail': ('images', 'thumbnail_url_hd', {url_or_none}), + }), + } + + +class CBSLocalLiveIE(CBSNewsLiveBaseIE): + _VALID_URL = rf'https?://(?:www\.)?cbsnews\.com/(?P{CBSNewsBaseIE._LOCALE_RE})/live/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.cbsnews.com/losangeles/live/', + 'info_dict': { + 'id': 'CBSN-LA', + 'ext': 'mp4', + 'title': str, + 'description': r're:KCBS/CBSN_LA.CRISPIN.\w+.RUNDOWN \w+ \w+', + 'thumbnail': r're:^https?://.*\.jpg$', + 'live_status': 'is_live', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _get_id(self, url): + return format_field(self._LOCALES, self._match_id(url), 'CBSN-%s') + + +class CBSNewsLiveIE(CBSNewsLiveBaseIE): + IE_NAME = 'cbsnews:live' + IE_DESC = 'CBS News Livestream' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.cbsnews.com/live/', + 'info_dict': { + 'id': 'CBSN-US', + 'ext': 'mp4', + 'title': str, + 'description': r're:\w+ \w+ CRISPIN RUNDOWN', + 'thumbnail': r're:^https?://.*\.jpg$', + 'live_status': 'is_live', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _get_id(self, url): + return 'CBSN-US' class CBSNewsLiveVideoIE(InfoExtractor): @@ -111,7 +411,7 @@ class CBSNewsLiveVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P[^/?#]+)' # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples - _TEST = { + _TESTS = [{ 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'info_dict': { 'id': 'clinton-sanders-prepare-to-face-off-in-nh', @@ -120,7 +420,7 @@ class CBSNewsLiveVideoIE(InfoExtractor): 'duration': 334, }, 'skip': 'Video gone', - } + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -131,13 +431,13 @@ class CBSNewsLiveVideoIE(InfoExtractor): 'dvr_slug': display_id, }) - formats = self._extract_akamai_formats(video_info['url'], display_id) - return { 'id': display_id, 'display_id': display_id, - 'title': video_info['headline'], - 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), - 'duration': parse_duration(video_info.get('segmentDur')), - 'formats': formats, + 'formats': self._extract_akamai_formats(video_info['url'], display_id), + **traverse_obj(video_info, { + 'title': 'headline', + 'thumbnail': ('thumbnail_url_hd', {url_or_none}), + 'duration': ('segmentDur', {parse_duration}), + }), } diff --git a/hypervideo_dl/extractor/cda.py b/hypervideo_dl/extractor/cda.py index d1212e6..1157114 100644 --- a/hypervideo_dl/extractor/cda.py +++ b/hypervideo_dl/extractor/cda.py @@ -4,6 +4,7 @@ import datetime import hashlib import hmac import json +import random import re from .common import InfoExtractor @@ -27,11 +28,10 @@ class CDAIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' _NETRC_MACHINE = 'cdapl' - _BASE_URL = 'http://www.cda.pl/' + _BASE_URL = 'https://www.cda.pl' _BASE_API_URL = 'https://api.cda.pl' _API_HEADERS = { 'Accept': 'application/vnd.cda.public+json', - 'User-Agent': 'pl.cda 1.0 (version 1.2.88 build 15306; Android 9; Xiaomi Redmi 3S)', } # hardcoded in the app _LOGIN_REQUEST_AUTH = 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q' @@ -101,6 +101,38 @@ class CDAIE(InfoExtractor): }, **kwargs) def _perform_login(self, username, password): + app_version = random.choice(( + '1.2.88 build 15306', + '1.2.174 build 18469', + )) + android_version = random.randrange(8, 14) + phone_model = random.choice(( + # x-kom.pl top selling Android smartphones, as of 2022-12-26 + # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android + 'ASUS ZenFone 8', + 'Motorola edge 20 5G', + 'Motorola edge 30 neo 5G', + 'Motorola moto g22', + 'OnePlus Nord 2T 5G', + 'Samsung Galaxy A32 SM‑A325F', + 'Samsung Galaxy M13', + 'Samsung Galaxy S20 FE 5G', + 'Xiaomi 11T', + 'Xiaomi POCO M4 Pro', + 'Xiaomi Redmi 10', + 'Xiaomi Redmi 10C', + 'Xiaomi Redmi 9C NFC', + 'Xiaomi Redmi Note 10 Pro', + 'Xiaomi Redmi Note 11 Pro', + 'Xiaomi Redmi Note 11', + 'Xiaomi Redmi Note 11S 5G', + 'Xiaomi Redmi Note 11S', + 'realme 10', + 'realme 9 Pro+', + 'vivo Y33s', + )) + self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})' + cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {} if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5: self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}' @@ -138,9 +170,6 @@ class CDAIE(InfoExtractor): meta = self._download_json( f'{self._BASE_API_URL}/video/{video_id}', video_id, headers=self._API_HEADERS)['video'] - if meta.get('premium') and not meta.get('premium_free'): - self.report_drm(video_id) - uploader = traverse_obj(meta, 'author', 'login') formats = [{ @@ -151,6 +180,10 @@ class CDAIE(InfoExtractor): 'filesize': quality.get('length'), } for quality in meta['qualities'] if quality.get('file')] + if meta.get('premium') and not meta.get('premium_free') and not formats: + raise ExtractorError( + 'Video requires CDA Premium - subscription needed', expected=True) + return { 'id': video_id, 'title': meta.get('title'), @@ -167,10 +200,10 @@ class CDAIE(InfoExtractor): def _web_extract(self, video_id, url): self._set_cookie('cda.pl', 'cda.player', 'html5') webpage = self._download_webpage( - self._BASE_URL + '/video/' + video_id, video_id) + f'{self._BASE_URL}/video/{video_id}/vfilm', video_id) if 'Ten film jest dostępny dla użytkowników premium' in webpage: - raise ExtractorError('This video is only available for premium users.', expected=True) + self.raise_login_required('This video is only available for premium users') if re.search(r'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage): self.raise_geo_restricted() diff --git a/hypervideo_dl/extractor/ceskatelevize.py b/hypervideo_dl/extractor/ceskatelevize.py index be2b0bb..8390160 100644 --- a/hypervideo_dl/extractor/ceskatelevize.py +++ b/hypervideo_dl/extractor/ceskatelevize.py @@ -1,20 +1,20 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_urllib_parse_urlparse, -) +from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse +from ..networking import Request from ..utils import ( ExtractorError, float_or_none, - sanitized_Request, str_or_none, traverse_obj, urlencode_postdata, - USER_AGENTS, ) +USER_AGENTS = { + 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27', +} + class CeskaTelevizeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P[^/#?]+)' @@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, playlist_id) - parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) + parsed_url = compat_urllib_parse_urlparse(urlh.url) site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: @@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor): entries = [] for user_agent in (None, USER_AGENTS['Safari']): - req = sanitized_Request( + req = Request( 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', data=urlencode_postdata(data)) - req.add_header('Content-type', 'application/x-www-form-urlencoded') - req.add_header('x-addr', '127.0.0.1') - req.add_header('X-Requested-With', 'XMLHttpRequest') + req.headers['Content-type'] = 'application/x-www-form-urlencoded' + req.headers['x-addr'] = '127.0.0.1' + req.headers['X-Requested-With'] = 'XMLHttpRequest' if user_agent: - req.add_header('User-Agent', user_agent) - req.add_header('Referer', url) + req.headers['User-Agent'] = user_agent + req.headers['Referer'] = url playlistpage = self._download_json(req, playlist_id, fatal=False) @@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_url == 'error_region': raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) - req.add_header('Referer', url) + req = Request(compat_urllib_parse_unquote(playlist_url)) + req.headers['Referer'] = url playlist = self._download_json(req, playlist_id, fatal=False) if not playlist: diff --git a/hypervideo_dl/extractor/chilloutzone.py b/hypervideo_dl/extractor/chilloutzone.py index 1a2f77c..ac4252f 100644 --- a/hypervideo_dl/extractor/chilloutzone.py +++ b/hypervideo_dl/extractor/chilloutzone.py @@ -1,93 +1,123 @@ -import json +import base64 from .common import InfoExtractor -from .youtube import YoutubeIE -from ..compat import compat_b64decode from ..utils import ( clean_html, - ExtractorError + int_or_none, + traverse_obj, ) class ChilloutzoneIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w|-]+)\.html' + _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w-]+)\.html' _TESTS = [{ - 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', + 'url': 'https://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', 'md5': 'a76f3457e813ea0037e5244f509e66d1', 'info_dict': { 'id': 'enemene-meck-alle-katzen-weg', 'ext': 'mp4', 'title': 'Enemene Meck - Alle Katzen weg', 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', + 'duration': 24, }, }, { 'note': 'Video hosted at YouTube', - 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', + 'url': 'https://www.chilloutzone.net/video/eine-sekunde-bevor.html', 'info_dict': { 'id': '1YVQaAgHyRU', 'ext': 'mp4', 'title': '16 Photos Taken 1 Second Before Disaster', 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', 'uploader': 'BuzzFeedVideo', - 'uploader_id': 'BuzzFeedVideo', + 'uploader_id': '@BuzzFeedVideo', 'upload_date': '20131105', + 'availability': 'public', + 'thumbnail': 'https://i.ytimg.com/vi/1YVQaAgHyRU/maxresdefault.jpg', + 'tags': 'count:41', + 'like_count': int, + 'playable_in_embed': True, + 'channel_url': 'https://www.youtube.com/channel/UCpko_-a4wgz2u_DgDgd9fqA', + 'chapters': 'count:6', + 'live_status': 'not_live', + 'view_count': int, + 'categories': ['Entertainment'], + 'age_limit': 0, + 'channel_id': 'UCpko_-a4wgz2u_DgDgd9fqA', + 'duration': 100, + 'uploader_url': 'http://www.youtube.com/@BuzzFeedVideo', + 'channel_follower_count': int, + 'channel': 'BuzzFeedVideo', }, }, { - 'note': 'Video hosted at Vimeo', - 'url': 'http://www.chilloutzone.net/video/icon-blending.html', - 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', + 'url': 'https://www.chilloutzone.net/video/icon-blending.html', + 'md5': '2f9d6850ec567b24f0f4fa143b9aa2f9', 'info_dict': { - 'id': '85523671', + 'id': 'LLNkHpSjBfc', 'ext': 'mp4', - 'title': 'The Sunday Times - Icons', - 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', - 'uploader': 'Us', - 'uploader_id': 'usfilms', - 'upload_date': '20140131' + 'title': 'The Sunday Times Making of Icons', + 'description': 'md5:b9259fcf63a1669e42001e5db677f02a', + 'uploader': 'MadFoxUA', + 'uploader_id': '@MadFoxUA', + 'upload_date': '20140204', + 'channel_id': 'UCSZa9Y6-Vl7c11kWMcbAfCw', + 'channel_url': 'https://www.youtube.com/channel/UCSZa9Y6-Vl7c11kWMcbAfCw', + 'comment_count': int, + 'uploader_url': 'http://www.youtube.com/@MadFoxUA', + 'duration': 66, + 'live_status': 'not_live', + 'channel_follower_count': int, + 'playable_in_embed': True, + 'view_count': int, + 'like_count': int, + 'thumbnail': 'https://i.ytimg.com/vi/LLNkHpSjBfc/maxresdefault.jpg', + 'categories': ['Comedy'], + 'availability': 'public', + 'tags': [], + 'channel': 'MadFoxUA', + 'age_limit': 0, + }, + }, { + 'url': 'https://www.chilloutzone.net/video/ordentlich-abgeschuettelt.html', + 'info_dict': { + 'id': 'ordentlich-abgeschuettelt', + 'ext': 'mp4', + 'title': 'Ordentlich abgeschüttelt', + 'description': 'md5:d41541966b75d3d1e8ea77a94ea0d329', + 'duration': 18, }, }] def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + b64_data = self._html_search_regex( + r'var cozVidData\s*=\s*"([^"]+)"', webpage, 'video data') + info = self._parse_json(base64.b64decode(b64_data).decode(), video_id) - base64_video_info = self._html_search_regex( - r'var cozVidData = "(.+?)";', webpage, 'video data') - decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') - video_info_dict = json.loads(decoded_video_info) - - # get video information from dict - video_url = video_info_dict['mediaUrl'] - description = clean_html(video_info_dict.get('description')) - title = video_info_dict['title'] - native_platform = video_info_dict['nativePlatform'] - native_video_id = video_info_dict['nativeVideoId'] - source_priority = video_info_dict['sourcePriority'] - - # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) - if native_platform is None: - youtube_url = YoutubeIE._extract_url(webpage) - if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + video_url = info.get('mediaUrl') + native_platform = info.get('nativePlatform') - # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or - # the own CDN - if source_priority == 'native': + if native_platform and info.get('sourcePriority') == 'native': + native_video_id = info['nativeVideoId'] if native_platform == 'youtube': - return self.url_result(native_video_id, ie='Youtube') - if native_platform == 'vimeo': - return self.url_result( - 'http://vimeo.com/' + native_video_id, ie='Vimeo') + return self.url_result(native_video_id, 'Youtube') + elif native_platform == 'vimeo': + return self.url_result(f'https://vimeo.com/{native_video_id}', 'Vimeo') - if not video_url: - raise ExtractorError('No video found') + elif not video_url: + # Possibly a standard youtube embed? + # TODO: Investigate if site still does this (there are no tests for it) + return self.url_result(url, 'Generic') return { 'id': video_id, 'url': video_url, 'ext': 'mp4', - 'title': title, - 'description': description, + **traverse_obj(info, { + 'title': 'title', + 'description': ('description', {clean_html}), + 'duration': ('videoLength', {int_or_none}), + 'width': ('videoWidth', {int_or_none}), + 'height': ('videoHeight', {int_or_none}), + }), } diff --git a/hypervideo_dl/extractor/cinetecamilano.py b/hypervideo_dl/extractor/cinetecamilano.py index 5e770eb..9cffa11 100644 --- a/hypervideo_dl/extractor/cinetecamilano.py +++ b/hypervideo_dl/extractor/cinetecamilano.py @@ -1,6 +1,6 @@ import json -import urllib.error from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, float_or_none, @@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor): 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' }) except ExtractorError as e: - if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500) + if ((isinstance(e.cause, HTTPError) and e.cause.status == 500) or isinstance(e.cause, json.JSONDecodeError)): self.raise_login_required(method='cookies') raise diff --git a/hypervideo_dl/extractor/ciscowebex.py b/hypervideo_dl/extractor/ciscowebex.py index 44595d8..85585df 100644 --- a/hypervideo_dl/extractor/ciscowebex.py +++ b/hypervideo_dl/extractor/ciscowebex.py @@ -1,5 +1,6 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, try_get, unified_timestamp, @@ -32,17 +33,36 @@ class CiscoWebexIE(InfoExtractor): if rcid: webpage = self._download_webpage(url, None, note='Getting video ID') url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url') - url = self._request_webpage(url, None, note='Resolving final URL').geturl() + url = self._request_webpage(url, None, note='Resolving final URL').url mobj = self._match_valid_url(url) subdomain = mobj.group('subdomain') siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') video_id = mobj.group('id') - stream = self._download_json( + password = self.get_param('videopassword') + + headers = {'Accept': 'application/json'} + if password: + headers['accessPwd'] = password + + stream, urlh = self._download_json_handle( 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), - video_id, fatal=False, query={'siteurl': siteurl}) - if not stream: - self.raise_login_required(method='cookies') + video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) + + if urlh.status == 403: + if stream['code'] == 53004: + self.raise_login_required() + if stream['code'] == 53005: + if password: + raise ExtractorError('Wrong password', expected=True) + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True) + + if urlh.status == 429: + self.raise_login_required( + f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and', + method='cookies') video_id = stream.get('recordUUID') or video_id @@ -78,7 +98,7 @@ class CiscoWebexIE(InfoExtractor): 'title': stream['recordName'], 'description': stream.get('description'), 'uploader': stream.get('ownerDisplayName'), - 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id + 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), 'timestamp': unified_timestamp(stream.get('createTime')), 'duration': int_or_none(stream.get('duration'), 1000), 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), diff --git a/hypervideo_dl/extractor/clipchamp.py b/hypervideo_dl/extractor/clipchamp.py new file mode 100644 index 0000000..a8bdf7e --- /dev/null +++ b/hypervideo_dl/extractor/clipchamp.py @@ -0,0 +1,61 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class ClipchampIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU', + 'info_dict': { + 'id': 'gRXZ4ZhdDaU', + 'ext': 'mp4', + 'title': 'Untitled video', + 'uploader': 'Alexander Schwartz', + 'timestamp': 1680805580, + 'upload_date': '20230406', + 'thumbnail': r're:^https?://.+\.jpg', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s' + _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video'] + + storage_location = data.get('storage_location') + if storage_location != 'cf_stream': + raise ExtractorError(f'Unsupported clip storage location "{storage_location}"') + + path = data['download_url'] + iframe = self._download_webpage( + f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe') + subdomain = self._search_regex( + r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe, + 'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe' + + formats = self._extract_mpd_formats( + self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id, + query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash') + formats.extend(self._extract_m3u8_formats( + self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4', + query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls')) + + return { + 'id': video_id, + 'formats': formats, + 'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None, + **traverse_obj(data, { + 'title': ('project', 'project_name', {str}), + 'timestamp': ('created_at', {unified_timestamp}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + }), + } diff --git a/hypervideo_dl/extractor/clyp.py b/hypervideo_dl/extractor/clyp.py index 0aaf73d..273d002 100644 --- a/hypervideo_dl/extractor/clyp.py +++ b/hypervideo_dl/extractor/clyp.py @@ -9,22 +9,22 @@ from ..utils import ( class ClypIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P[a-z0-9]+)' _TESTS = [{ - 'url': 'https://clyp.it/ojz2wfah', - 'md5': '1d4961036c41247ecfdcc439c0cddcbb', + 'url': 'https://clyp.it/iynkjk4b', + 'md5': '4bc6371c65210e7b372097fce4d92441', 'info_dict': { - 'id': 'ojz2wfah', - 'ext': 'mp3', - 'title': 'Krisson80 - bits wip wip', - 'description': '#Krisson80BitsWipWip #chiptune\n#wip', - 'duration': 263.21, - 'timestamp': 1443515251, - 'upload_date': '20150929', + 'id': 'iynkjk4b', + 'ext': 'ogg', + 'title': 'research', + 'description': '#Research', + 'duration': 51.278, + 'timestamp': 1435524981, + 'upload_date': '20150628', }, }, { 'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d', 'info_dict': { 'id': 'b04p1odi', - 'ext': 'mp3', + 'ext': 'ogg', 'title': 'GJ! (Reward Edit)', 'description': 'Metal Resistance (THE ONE edition)', 'duration': 177.789, @@ -34,6 +34,17 @@ class ClypIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://clyp.it/v42214lc', + 'md5': '4aca4dfc3236fb6d6ddc4ea08314f33f', + 'info_dict': { + 'id': 'v42214lc', + 'ext': 'wav', + 'title': 'i dont wanna go (old version)', + 'duration': 113.528, + 'timestamp': 1607348505, + 'upload_date': '20201207', + }, }] def _real_extract(self, url): @@ -59,8 +70,20 @@ class ClypIE(InfoExtractor): 'url': format_url, 'format_id': format_id, 'vcodec': 'none', + 'acodec': ext.lower(), }) + page = self._download_webpage(url, video_id=audio_id) + wav_url = self._html_search_regex( + r'var\s*wavStreamUrl\s*=\s*["\'](?Phttps?://[^\'"]+)', page, 'url', default=None) + if wav_url: + formats.append({ + 'url': wav_url, + 'format_id': 'wavStreamUrl', + 'vcodec': 'none', + 'acodec': 'wav', + }) + title = metadata['Title'] description = metadata.get('Description') duration = float_or_none(metadata.get('Duration')) diff --git a/hypervideo_dl/extractor/comedycentral.py b/hypervideo_dl/extractor/comedycentral.py index 05fc9f2..27d295b 100644 --- a/hypervideo_dl/extractor/comedycentral.py +++ b/hypervideo_dl/extractor/comedycentral.py @@ -2,7 +2,7 @@ from .mtv import MTVServicesInfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P[0-9a-z]{6})' + _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist|movies)/(?P[0-9a-z]{6})' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' _TESTS = [{ @@ -25,6 +25,9 @@ class ComedyCentralIE(MTVServicesInfoExtractor): }, { 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb', 'only_matching': True, + }, { + 'url': 'https://www.cc.com/movies/tkp406/a-cluesterfuenke-christmas', + 'only_matching': True, }] diff --git a/hypervideo_dl/extractor/common.py b/hypervideo_dl/extractor/common.py index 4b56307..5a561a2 100644 --- a/hypervideo_dl/extractor/common.py +++ b/hypervideo_dl/extractor/common.py @@ -13,6 +13,7 @@ import netrc import os import random import re +import subprocess import sys import time import types @@ -21,9 +22,21 @@ import urllib.request import xml.etree.ElementTree from ..compat import functools # isort: split -from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name +from ..compat import ( + compat_etree_fromstring, + compat_expanduser, + compat_os_name, + urllib_req_to_req, +) from ..cookies import LenientSimpleCookie from ..downloader.f4m import get_base_url, remove_encrypted_media +from ..downloader.hls import HlsFD +from ..networking import HEADRequest, Request +from ..networking.exceptions import ( + HTTPError, + IncompleteRead, + network_exceptions, +) from ..utils import ( IDENTITY, JSON_LD_RE, @@ -33,6 +46,7 @@ from ..utils import ( GeoRestrictedError, GeoUtils, LenientJSONDecoder, + Popen, RegexNotFoundError, RetryManager, UnsupportedError, @@ -55,7 +69,7 @@ from ..utils import ( join_nonempty, js_to_json, mimetype2ext, - network_exceptions, + netrc_from_content, orderedSet, parse_bitrate, parse_codecs, @@ -65,21 +79,20 @@ from ..utils import ( parse_resolution, sanitize_filename, sanitize_url, - sanitized_Request, smuggle_url, str_or_none, str_to_int, strip_or_none, traverse_obj, + truncate_string, try_call, try_get, unescapeHTML, unified_strdate, unified_timestamp, - update_Request, - update_url_query, url_basename, url_or_none, + urlhandle_detect_ext, urljoin, variadic, xpath_element, @@ -129,6 +142,7 @@ class InfoExtractor: is parsed from a string (in case of fragmented media) for MSS - URL of the ISM manifest. + * request_data Data to send in POST request to the URL * manifest_url The URL of the manifest file in case of fragmented media: @@ -216,7 +230,19 @@ class InfoExtractor: width : height ratio as float. * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. - * has_drm The format has DRM and cannot be downloaded. Boolean + * has_drm True if the format has DRM and cannot be downloaded. + 'maybe' if the format may have DRM and has to be tested before download. + * extra_param_to_segment_url A query string to append to each + fragment's URL, or to update each existing query string + with. Only applied by the native HLS/DASH downloaders. + * hls_aes A dictionary of HLS AES-128 decryption information + used by the native HLS downloader to override the + values in the media playlist when an '#EXT-X-KEY' tag + is present in the playlist: + * uri The URI from which the key will be downloaded + * key The key (as hex) used to decrypt fragments. + If `key` is given, any key URI will be ignored + * iv The IV (as hex) used to decrypt fragments * downloader_options A dictionary of downloader options (For internal use only) * http_chunk_size Chunk size for HTTP downloads @@ -271,6 +297,7 @@ class InfoExtractor: channel_id: Id of the channel. channel_url: Full URL to a channel webpage. channel_follower_count: Number of followers of the channel. + channel_is_verified: Whether the channel is verified on the platform. location: Physical location where the video was filmed. subtitles: The available subtitles as a dictionary in the format {tag: subformats}. "tag" is usually a language code, and @@ -299,6 +326,11 @@ class InfoExtractor: * "author" - human-readable name of the comment author * "author_id" - user ID of the comment author * "author_thumbnail" - The thumbnail of the comment author + * "author_url" - The url to the comment author's page + * "author_is_verified" - Whether the author is verified + on the platform + * "author_is_uploader" - Whether the comment is made by + the video uploader * "id" - Comment ID * "html" - Comment as HTML * "text" - Plain text of the comment @@ -310,8 +342,8 @@ class InfoExtractor: * "dislike_count" - Number of negative ratings of the comment * "is_favorited" - Whether the comment is marked as favorite by the video uploader - * "author_is_uploader" - Whether the comment is made by - the video uploader + * "is_pinned" - Whether the comment is pinned to + the top of the comments age_limit: Age restriction for the video, as an integer (years) webpage_url: The URL to the video webpage, if given to hypervideo it should allow to get the same result again. (It will be set @@ -335,6 +367,10 @@ class InfoExtractor: * "start_time" - The start time of the chapter in seconds * "end_time" - The end time of the chapter in seconds * "title" (optional, string) + heatmap: A list of dictionaries, with the following entries: + * "start_time" - The start time of the data point in seconds + * "end_time" - The end time of the data point in seconds + * "value" - The normalized value of the data point (float between 0 and 1) playable_in_embed: Whether this video is allowed to play in embedded players on other sites. Can be True (=always allowed), False (=never allowed), None (=unknown), or a string @@ -446,8 +482,8 @@ class InfoExtractor: Subclasses of this should also be added to the list of extractors and - should define a _VALID_URL regexp and, re-define the _real_extract() and - (optionally) _real_initialize() methods. + should define _VALID_URL as a regexp or a Sequence of regexps, and + re-define the _real_extract() and (optionally) _real_initialize() methods. Subclasses may also override suitable() if necessary, but ensure the function signature is preserved and that this function imports everything it needs @@ -510,7 +546,7 @@ class InfoExtractor: _EMBED_REGEX = [] def _login_hint(self, method=NO_DEFAULT, netrc=None): - password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials' + password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials' return { None: '', 'any': f'Use --cookies, --cookies-from-browser, {password_hint}', @@ -537,8 +573,8 @@ class InfoExtractor: # we have cached the regexp for *this* class, whereas getattr would also # match the superclass if '_VALID_URL_RE' not in cls.__dict__: - cls._VALID_URL_RE = re.compile(cls._VALID_URL) - return cls._VALID_URL_RE.match(url) + cls._VALID_URL_RE = tuple(map(re.compile, variadic(cls._VALID_URL))) + return next(filter(None, (regex.match(url) for regex in cls._VALID_URL_RE)), None) @classmethod def suitable(cls, url): @@ -674,7 +710,8 @@ class InfoExtractor: for _ in range(2): try: self.initialize() - self.write_debug('Extracting URL: %s' % url) + self.to_screen('Extracting URL: %s' % ( + url if self.get_param('verbose') else truncate_string(url, 100, 20))) ie_result = self._real_extract(url) if ie_result is None: return None @@ -692,11 +729,11 @@ class InfoExtractor: except UnsupportedError: raise except ExtractorError as e: - e.video_id = e.video_id or self.get_temp_id(url), + e.video_id = e.video_id or self.get_temp_id(url) e.ie = e.ie or self.IE_NAME, e.traceback = e.traceback or sys.exc_info()[2] raise - except http.client.IncompleteRead as e: + except IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) except (KeyError, StopIteration) as e: raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url)) @@ -755,20 +792,25 @@ class InfoExtractor: @staticmethod def __can_accept_status_code(err, expected_status): - assert isinstance(err, urllib.error.HTTPError) + assert isinstance(err, HTTPError) if expected_status is None: return False elif callable(expected_status): - return expected_status(err.code) is True + return expected_status(err.status) is True else: - return err.code in variadic(expected_status) + return err.status in variadic(expected_status) def _create_request(self, url_or_request, data=None, headers=None, query=None): if isinstance(url_or_request, urllib.request.Request): - return update_Request(url_or_request, data=data, headers=headers, query=query) - if query: - url_or_request = update_url_query(url_or_request, query) - return sanitized_Request(url_or_request, data, headers or {}) + self._downloader.deprecation_warning( + 'Passing a urllib.request.Request to _create_request() is deprecated. ' + 'Use hypervideo_dl.networking.common.Request instead.') + url_or_request = urllib_req_to_req(url_or_request) + elif not isinstance(url_or_request, Request): + url_or_request = Request(url_or_request) + + url_or_request.update(data=data, headers=headers, query=query) + return url_or_request def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None): """ @@ -804,14 +846,9 @@ class InfoExtractor: try: return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query)) except network_exceptions as err: - if isinstance(err, urllib.error.HTTPError): + if isinstance(err, HTTPError): if self.__can_accept_status_code(err, expected_status): - # Retain reference to error to prevent file object from - # being closed before it can be read. Works around the - # effects of - # introduced in Python 3.4.1. - err.fp._error = err - return err.fp + return err.response if errnote is False: return False @@ -943,11 +980,11 @@ class InfoExtractor: if prefix is not None: webpage_bytes = prefix + webpage_bytes if self.get_param('dump_intermediate_pages', False): - self.to_screen('Dumping request to ' + urlh.geturl()) + self.to_screen('Dumping request to ' + urlh.url) dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self.get_param('write_pages'): - filename = self._request_dump_filename(urlh.geturl(), video_id) + filename = self._request_dump_filename(urlh.url, video_id) self.to_screen(f'Saving request to {filename}') with open(filename, 'wb') as outf: outf.write(webpage_bytes) @@ -1005,7 +1042,7 @@ class InfoExtractor: fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): if self.get_param('load_pages'): url_or_request = self._create_request(url_or_request, data, headers, query) - filename = self._request_dump_filename(url_or_request.full_url, video_id) + filename = self._request_dump_filename(url_or_request.url, video_id) self.to_screen(f'Loading request from {filename}') try: with open(filename, 'rb') as dumpf: @@ -1079,7 +1116,7 @@ class InfoExtractor: while True: try: return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs) - except http.client.IncompleteRead as e: + except IncompleteRead as e: try_count += 1 if try_count >= tries: raise e @@ -1260,51 +1297,53 @@ class InfoExtractor: Like _search_regex, but strips HTML tags and unescapes entities. """ res = self._search_regex(pattern, string, name, default, fatal, flags, group) - if res: - return clean_html(res).strip() - else: - return res + if isinstance(res, tuple): + return tuple(map(clean_html, res)) + return clean_html(res) def _get_netrc_login_info(self, netrc_machine=None): - username = None - password = None netrc_machine = netrc_machine or self._NETRC_MACHINE - if self.get_param('usenetrc', False): - try: - netrc_file = compat_expanduser(self.get_param('netrc_location') or '~') - if os.path.isdir(netrc_file): - netrc_file = os.path.join(netrc_file, '.netrc') - info = netrc.netrc(file=netrc_file).authenticators(netrc_machine) - if info is not None: - username = info[0] - password = info[2] - else: - raise netrc.NetrcParseError( - 'No authenticators for %s' % netrc_machine) - except (OSError, netrc.NetrcParseError) as err: - self.report_warning( - 'parsing .netrc: %s' % error_to_compat_str(err)) + cmd = self.get_param('netrc_cmd') + if cmd: + cmd = cmd.replace('{}', netrc_machine) + self.to_screen(f'Executing command: {cmd}') + stdout, _, ret = Popen.run(cmd, text=True, shell=True, stdout=subprocess.PIPE) + if ret != 0: + raise OSError(f'Command returned error code {ret}') + info = netrc_from_content(stdout).authenticators(netrc_machine) + + elif self.get_param('usenetrc', False): + netrc_file = compat_expanduser(self.get_param('netrc_location') or '~') + if os.path.isdir(netrc_file): + netrc_file = os.path.join(netrc_file, '.netrc') + info = netrc.netrc(netrc_file).authenticators(netrc_machine) - return username, password + else: + return None, None + if not info: + raise netrc.NetrcParseError(f'No authenticators for {netrc_machine}') + return info[0], info[2] def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): """ Get the login info as (username, password) First look for the manually specified credentials using username_option and password_option as keys in params dictionary. If no such credentials - available look in the netrc file using the netrc_machine or _NETRC_MACHINE - value. + are available try the netrc_cmd if it is defined or look in the + netrc file using the netrc_machine or _NETRC_MACHINE value. If there's no info available, return (None, None) """ - # Attempt to use provided username and password or .netrc data username = self.get_param(username_option) if username is not None: password = self.get_param(password_option) else: - username, password = self._get_netrc_login_info(netrc_machine) - + try: + username, password = self._get_netrc_login_info(netrc_machine) + except (OSError, netrc.NetrcParseError) as err: + self.report_warning(f'Failed to parse .netrc: {err}') + return None, None return username, password def _get_tfa_info(self, note='two-factor verification code'): @@ -1324,7 +1363,7 @@ class InfoExtractor: # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): - content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' + content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?)(?=\s|/?>))' property_re = (r'(?:name|property)=(?:\'og%(sep)s%(prop)s\'|"og%(sep)s%(prop)s"|\s*og%(sep)s%(prop)s\b)' % {'prop': re.escape(prop), 'sep': '(?::|[:-])'}) template = r']+?%s[^>]+?%s' @@ -1394,10 +1433,16 @@ class InfoExtractor: # And then there are the jokers who advertise that they use RTA, but actually don't. AGE_LIMIT_MARKERS = [ r'Proudly Labeled RTA', + r'>[^<]*you acknowledge you are at least (\d+) years old', + r'>\s*(?:18\s+U(?:\.S\.C\.|SC)\s+)?(?:§+\s*)?2257\b', ] - if any(re.search(marker, html) for marker in AGE_LIMIT_MARKERS): - return 18 - return 0 + + age_limit = 0 + for marker in AGE_LIMIT_MARKERS: + mobj = re.search(marker, html) + if mobj: + age_limit = max(age_limit, int(traverse_obj(mobj, 1, default=18))) + return age_limit def _media_rating_search(self, html): # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/ @@ -1650,11 +1695,8 @@ class InfoExtractor: if js is None: return {} - args = dict(zip(arg_keys.split(','), arg_vals.split(','))) - - for key, val in args.items(): - if val in ('undefined', 'void 0'): - args[key] = 'null' + args = dict(zip(arg_keys.split(','), map(json.dumps, self._parse_json( + f'[{arg_vals}]', video_id, transform_source=js_to_json, fatal=fatal) or ()))) ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) return traverse_obj(ret, traverse) or {} @@ -1757,6 +1799,9 @@ class InfoExtractor: def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', @@ -1768,7 +1813,7 @@ class InfoExtractor: return [] manifest, urlh = res - manifest_url = urlh.geturl() + manifest_url = urlh.url return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id, @@ -1906,6 +1951,17 @@ class InfoExtractor: errnote=None, fatal=True, live=False, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + + if not m3u8_url: + if errnote is not False: + errnote = errnote or 'Failed to obtain m3u8 URL' + if fatal: + raise ExtractorError(errnote, video_id=video_id) + self.report_warning(f'{errnote}{bug_reports_message()}') + return [], {} + res = self._download_webpage_handle( m3u8_url, video_id, note='Downloading m3u8 information' if note is None else note, @@ -1916,7 +1972,7 @@ class InfoExtractor: return [], {} m3u8_doc, urlh = res - m3u8_url = urlh.geturl() + m3u8_url = urlh.url return self._parse_m3u8_formats_and_subtitles( m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, @@ -1930,11 +1986,7 @@ class InfoExtractor: errnote=None, fatal=True, data=None, headers={}, query={}, video_id=None): formats, subtitles = [], {} - - has_drm = re.search('|'.join([ - r'#EXT-X-FAXS-CM:', # Adobe Flash Access - r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay - ]), m3u8_doc) + has_drm = HlsFD._has_drm(m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) @@ -2032,6 +2084,7 @@ class InfoExtractor: 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, 'vcodec': 'none' if media_type == 'AUDIO' else None, } for idx in _extract_m3u8_playlist_indices(manifest_url)) @@ -2091,6 +2144,7 @@ class InfoExtractor: 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, } resolution = last_stream_inf.get('RESOLUTION') if resolution: @@ -2157,13 +2211,23 @@ class InfoExtractor: return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id) def _parse_m3u8_vod_duration(self, m3u8_vod, video_id): - if '#EXT-X-PLAYLIST-TYPE:VOD' not in m3u8_vod: + if '#EXT-X-ENDLIST' not in m3u8_vod: return None return int(sum( float(line[len('#EXTINF:'):].split(',')[0]) for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None + def _extract_mpd_vod_duration( + self, mpd_url, video_id, note=None, errnote=None, data=None, headers={}, query={}): + + mpd_doc = self._download_xml( + mpd_url, video_id, + note='Downloading MPD VOD manifest' if note is None else note, + errnote='Failed to download VOD manifest' if errnote is None else errnote, + fatal=False, data=data, headers=headers, query=query) or {} + return int_or_none(parse_duration(mpd_doc.get('mediaPresentationDuration'))) + @staticmethod def _xpath_ns(path, namespace=None): if not namespace: @@ -2177,22 +2241,17 @@ class InfoExtractor: return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) if res is False: assert not fatal return [], {} - smil, urlh = res - smil_url = urlh.geturl() - namespace = self._parse_smil_namespace(smil) - - fmts = self._parse_smil_formats( - smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) - subs = self._parse_smil_subtitles( - smil, namespace=namespace) - - return fmts, subs + return self._parse_smil_formats_and_subtitles(smil, urlh.url, video_id, f4m_params=f4m_params, + namespace=self._parse_smil_namespace(smil)) def _extract_smil_formats(self, *args, **kwargs): fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs) @@ -2206,7 +2265,7 @@ class InfoExtractor: return {} smil, urlh = res - smil_url = urlh.geturl() + smil_url = urlh.url return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) @@ -2218,9 +2277,8 @@ class InfoExtractor: def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): namespace = self._parse_smil_namespace(smil) - formats = self._parse_smil_formats( + formats, subtitles = self._parse_smil_formats_and_subtitles( smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params) - subtitles = self._parse_smil_subtitles(smil, namespace=namespace) video_id = os.path.splitext(url_basename(smil_url))[0] title = None @@ -2259,7 +2317,14 @@ class InfoExtractor: return self._search_regex( r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None) - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): + def _parse_smil_formats(self, *args, **kwargs): + fmts, subs = self._parse_smil_formats_and_subtitles(*args, **kwargs) + if subs: + self._report_ignoring_subs('SMIL') + return fmts + + def _parse_smil_formats_and_subtitles( + self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): base = smil_url for meta in smil.findall(self._xpath_ns('./head/meta', namespace)): b = meta.get('base') or meta.get('httpBase') @@ -2267,7 +2332,7 @@ class InfoExtractor: base = b break - formats = [] + formats, subtitles = [], {} rtmp_count = 0 http_count = 0 m3u8_count = 0 @@ -2287,7 +2352,8 @@ class InfoExtractor: height = int_or_none(medium.get('height')) proto = medium.get('proto') ext = medium.get('ext') - src_ext = determine_ext(src) + src_ext = determine_ext(src, default_ext=None) or ext or urlhandle_detect_ext( + self._request_webpage(HEADRequest(src), video_id, note='Requesting extension info', fatal=False)) streamer = medium.get('streamer') or base if proto == 'rtmp' or streamer.startswith('rtmp'): @@ -2314,8 +2380,9 @@ class InfoExtractor: src_url = src_url.strip() if proto == 'm3u8' or src_ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( + m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles( src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False) + self._merge_subtitles(m3u8_subs, target=subtitles) if len(m3u8_formats) == 1: m3u8_count += 1 m3u8_formats[0].update({ @@ -2336,11 +2403,15 @@ class InfoExtractor: f4m_url += urllib.parse.urlencode(f4m_params) formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False)) elif src_ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src_url, video_id, mpd_id='dash', fatal=False)) + mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles( + src_url, video_id, mpd_id='dash', fatal=False) + formats.extend(mpd_formats) + self._merge_subtitles(mpd_subs, target=subtitles) elif re.search(r'\.ism/[Mm]anifest', src_url): - formats.extend(self._extract_ism_formats( - src_url, video_id, ism_id='mss', fatal=False)) + ism_formats, ism_subs = self._extract_ism_formats_and_subtitles( + src_url, video_id, ism_id='mss', fatal=False) + formats.extend(ism_formats) + self._merge_subtitles(ism_subs, target=subtitles) elif src_url.startswith('http') and self._is_valid_url(src, video_id): http_count += 1 formats.append({ @@ -2371,7 +2442,10 @@ class InfoExtractor: 'format_note': 'SMIL storyboards', }) - return formats + smil_subs = self._parse_smil_subtitles(smil, namespace=namespace) + self._merge_subtitles(smil_subs, target=subtitles) + + return formats, subtitles def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): urls = [] @@ -2397,7 +2471,7 @@ class InfoExtractor: return [] xspf, urlh = res - xspf_url = urlh.geturl() + xspf_url = urlh.url return self._parse_xspf( xspf, playlist_id, xspf_url=xspf_url, @@ -2452,6 +2526,10 @@ class InfoExtractor: def _extract_mpd_formats_and_subtitles( self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( mpd_url, video_id, note='Downloading MPD manifest' if note is None else note, @@ -2464,7 +2542,7 @@ class InfoExtractor: return [], {} # We could have been redirected to a new url when we retrieved our mpd file. - mpd_url = urlh.geturl() + mpd_url = urlh.url mpd_base_url = base_url(mpd_url) return self._parse_mpd_formats_and_subtitles( @@ -2821,6 +2899,9 @@ class InfoExtractor: return fmts def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + if self.get_param('ignore_no_formats_error'): + fatal = False + res = self._download_xml_handle( ism_url, video_id, note='Downloading ISM manifest' if note is None else note, @@ -2832,7 +2913,7 @@ class InfoExtractor: if ism_doc is None: return [], {} - return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id) + return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id) def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): """ @@ -2928,6 +3009,8 @@ class InfoExtractor: 'protocol': 'ism', 'fragments': fragments, 'has_drm': ism_doc.find('Protection') is not None, + 'language': stream_language, + 'audio_channels': int_or_none(track.get('Channels')), '_download_params': { 'stream_type': stream_type, 'duration': duration, @@ -3190,7 +3273,7 @@ class InfoExtractor: def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): mobj = re.search( - r'(?s)jwplayer\((?P[\'"])[^\'" ]+(?P=quote)\)(?!).*?\.setup\s*\((?P[^)]+)\)', + r'''(?s)jwplayer\s*\(\s*(?P'|")(?!(?P=q)).+(?P=q)\s*\)(?!).*?\.\s*setup\s*\(\s*(?P(?:\([^)]*\)|[^)])+)\s*\)''', webpage) if mobj: try: @@ -3211,19 +3294,20 @@ class InfoExtractor: def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - entries = [] + if not isinstance(jwplayer_data, dict): + return entries - # JWPlayer backward compatibility: single playlist item + playlist_items = jwplayer_data.get('playlist') + # JWPlayer backward compatibility: single playlist item/flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if not isinstance(playlist_items, list): + playlist_items = (playlist_items or jwplayer_data, ) - for video_data in jwplayer_data['playlist']: + for video_data in playlist_items: + if not isinstance(video_data, dict): + continue # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 if 'sources' not in video_data: @@ -3261,6 +3345,13 @@ class InfoExtractor: 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, + 'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ... + 'genre': clean_html(video_data.get('genre')), + 'channel': clean_html(dict_get(video_data, ('category', 'channel'))), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'release_year': int_or_none(video_data.get('releasedate')), + 'age_limit': int_or_none(video_data.get('age_restriction')), } # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): @@ -3278,7 +3369,7 @@ class InfoExtractor: def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - urls = [] + urls = set() formats = [] for source in jwplayer_sources_data: if not isinstance(source, dict): @@ -3287,14 +3378,14 @@ class InfoExtractor: base_url, self._proto_relative_url(source.get('file'))) if not source_url or source_url in urls: continue - urls.append(source_url) + urls.add(source_url) source_type = source.get('type') or '' ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': + if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif source_type == 'dash' or ext == 'mpd': + elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url: formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) elif ext == 'smil': @@ -3309,13 +3400,12 @@ class InfoExtractor: 'ext': ext, }) else: + format_id = str_or_none(source.get('label')) height = int_or_none(source.get('height')) - if height is None: + if height is None and format_id: # Often no height is provided but there is a label in # format like "1080p", "720p SD", or 1080. - height = int_or_none(self._search_regex( - r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''), - 'height', default=None)) + height = parse_resolution(format_id).get('height') a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), @@ -3323,6 +3413,7 @@ class InfoExtractor: 'tbr': int_or_none(source.get('bitrate'), scale=1000), 'filesize': int_or_none(source.get('filesize')), 'ext': ext, + 'format_id': format_id } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' @@ -3375,7 +3466,7 @@ class InfoExtractor: def _get_cookies(self, url): """ Return a http.cookies.SimpleCookie with the cookies for the url """ - return LenientSimpleCookie(self._downloader._calc_cookies(url)) + return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url)) def _apply_first_set_cookie_header(self, url_handle, cookie): """ @@ -3416,13 +3507,17 @@ class InfoExtractor: continue t['name'] = cls.ie_key() yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_testcases(include_onlymatching) @classmethod def get_webpage_testcases(cls): tests = vars(cls).get('_WEBPAGE_TESTS', []) for t in tests: t['name'] = cls.ie_key() - return tests + yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_webpage_testcases() @classproperty(cache=True) def age_limit(cls): @@ -3446,8 +3541,8 @@ class InfoExtractor: @classmethod def is_single_video(cls, url): """Returns whether the URL is of a single video, None if unknown""" - assert cls.suitable(url), 'The URL must be suitable for the extractor' - return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE) + if cls.suitable(url): + return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE) @classmethod def is_suitable(cls, age_limit): @@ -3460,7 +3555,7 @@ class InfoExtractor: desc = '' if cls._NETRC_MACHINE: if markdown: - desc += f' [{cls._NETRC_MACHINE}]' + desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")' else: desc += f' [{cls._NETRC_MACHINE}]' if cls.IE_DESC is False: @@ -3468,7 +3563,7 @@ class InfoExtractor: elif cls.IE_DESC: desc += f' {cls.IE_DESC}' if cls.SEARCH_KEY: - desc += f'; "{cls.SEARCH_KEY}:" prefix' + desc += f'{";" if cls.IE_DESC else ""} "{cls.SEARCH_KEY}:" prefix' if search_examples: _COUNTS = ('', '5', '10', 'all') desc += f' (e.g. "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' @@ -3582,6 +3677,42 @@ class InfoExtractor: or urllib.parse.unquote(os.path.splitext(url_basename(url))[0]) or default) + def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True): + if not duration: + return + chapter_list = [{ + 'start_time': start_function(chapter), + 'title': title_function(chapter), + } for chapter in chapter_list or []] + if strict: + warn = self.report_warning + else: + warn = self.write_debug + chapter_list.sort(key=lambda c: c['start_time'] or 0) + + chapters = [{'start_time': 0}] + for idx, chapter in enumerate(chapter_list): + if chapter['start_time'] is None: + warn(f'Incomplete chapter {idx}') + elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: + chapters.append(chapter) + elif chapter not in chapters: + issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration + else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}') + warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"') + return chapters[1:] + + def _extract_chapters_from_description(self, description, duration): + duration_re = r'(?:\d+:)?\d{1,2}:\d{2}' + sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$' + return self._extract_chapters_helper( + re.findall(sep_re % (duration_re, r'.+?'), description or ''), + start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1], + duration=duration, strict=False) or self._extract_chapters_helper( + re.findall(sep_re % (r'.+?', duration_re), description or ''), + start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0], + duration=duration, strict=False) + @staticmethod def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None): all_known = all(map( @@ -3684,10 +3815,12 @@ class InfoExtractor: if plugin_name: mro = inspect.getmro(cls) super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] - cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key + cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key + cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}' while getattr(super_class, '__wrapped__', None): super_class = super_class.__wrapped__ setattr(sys.modules[super_class.__module__], super_class.__name__, cls) + _PLUGIN_OVERRIDES[super_class].append(cls) return super().__init_subclass__(**kwargs) @@ -3744,3 +3877,6 @@ class UnsupportedURLIE(InfoExtractor): def _real_extract(self, url): raise UnsupportedError(url) + + +_PLUGIN_OVERRIDES = collections.defaultdict(list) diff --git a/hypervideo_dl/extractor/crackle.py b/hypervideo_dl/extractor/crackle.py index 4610015..1ef90b5 100644 --- a/hypervideo_dl/extractor/crackle.py +++ b/hypervideo_dl/extractor/crackle.py @@ -4,7 +4,7 @@ import re import time from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..networking.exceptions import HTTPError from ..utils import ( determine_ext, float_or_none, @@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor): errnote='Unable to download media JSON') except ExtractorError as e: # 401 means geo restriction, trying next country - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: continue raise diff --git a/hypervideo_dl/extractor/crtvg.py b/hypervideo_dl/extractor/crtvg.py new file mode 100644 index 0000000..1aa8d77 --- /dev/null +++ b/hypervideo_dl/extractor/crtvg.py @@ -0,0 +1,34 @@ +from .common import InfoExtractor +from ..utils import remove_end + + +class CrtvgIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?crtvg\.es/tvg/a-carta/[^/#?]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.crtvg.es/tvg/a-carta/os-caimans-do-tea-5839623', + 'md5': 'c0958d9ff90e4503a75544358758921d', + 'info_dict': { + 'id': '5839623', + 'title': 'Os caimáns do Tea', + 'ext': 'mp4', + 'description': 'md5:f71cfba21ae564f0a6f415b31de1f842', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'params': {'skip_download': 'm3u8'} + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_url = self._search_regex(r'var\s+url\s*=\s*["\']([^"\']+)', webpage, 'video url') + formats = self._extract_m3u8_formats(video_url + '/playlist.m3u8', video_id, fatal=False) + formats.extend(self._extract_mpd_formats(video_url + '/manifest.mpd', video_id, fatal=False)) + + return { + 'id': video_id, + 'formats': formats, + 'title': remove_end(self._html_search_meta( + ['og:title', 'twitter:title'], webpage, 'title', default=None), ' | CRTVG'), + 'description': self._html_search_meta('description', webpage, 'description', default=None), + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, 'thumbnail', default=None), + } diff --git a/hypervideo_dl/extractor/crunchyroll.py b/hypervideo_dl/extractor/crunchyroll.py index d226050..241da11 100644 --- a/hypervideo_dl/extractor/crunchyroll.py +++ b/hypervideo_dl/extractor/crunchyroll.py @@ -1,27 +1,53 @@ import base64 -import urllib.parse from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, float_or_none, format_field, + int_or_none, join_nonempty, + parse_age_limit, + parse_count, parse_iso8601, qualities, + remove_start, + time_seconds, traverse_obj, - try_get, + url_or_none, + urlencode_postdata, ) class CrunchyrollBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.crunchyroll.com/welcome/login' + _BASE_URL = 'https://www.crunchyroll.com' _API_BASE = 'https://api.crunchyroll.com' _NETRC_MACHINE = 'crunchyroll' - params = None + _AUTH_HEADERS = None + _API_ENDPOINT = None + _BASIC_AUTH = None + _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q') + _LOCALE_LOOKUP = { + 'ar': 'ar-SA', + 'de': 'de-DE', + '': 'en-US', + 'es': 'es-419', + 'es-es': 'es-ES', + 'fr': 'fr-FR', + 'it': 'it-IT', + 'pt-br': 'pt-BR', + 'pt-pt': 'pt-PT', + 'ru': 'ru-RU', + 'hi': 'hi-IN', + } + + @property + def is_logged_in(self): + return bool(self._get_cookies(self._BASE_URL).get('etp_rt')) def _perform_login(self, username, password): - if self._get_cookies(self._LOGIN_URL).get('etp_rt'): + if self.is_logged_in: return upsell_response = self._download_json( @@ -31,7 +57,7 @@ class CrunchyrollBaseIE(InfoExtractor): 'device_id': 'whatvalueshouldbeforweb', 'device_type': 'com.crunchyroll.static', 'access_token': 'giKq5eY27ny3cqz', - 'referer': self._LOGIN_URL + 'referer': f'{self._BASE_URL}/welcome/login' }) if upsell_response['code'] != 'ok': raise ExtractorError('Could not get session id') @@ -39,66 +65,164 @@ class CrunchyrollBaseIE(InfoExtractor): login_response = self._download_json( f'{self._API_BASE}/login.1.json', None, 'Logging in', - data=urllib.parse.urlencode({ + data=urlencode_postdata({ 'account': username, 'password': password, 'session_id': session_id - }).encode('ascii')) + })) if login_response['code'] != 'ok': raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True) - if not self._get_cookies(self._LOGIN_URL).get('etp_rt'): + if not self.is_logged_in: raise ExtractorError('Login succeeded but did not set etp_rt cookie') - def _get_embedded_json(self, webpage, display_id): - initial_state = self._parse_json(self._search_regex( - r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'), display_id) - app_config = self._parse_json(self._search_regex( - r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'), display_id) - return initial_state, app_config - - def _get_params(self, lang): - if not CrunchyrollBaseIE.params: - if self._get_cookies(f'https://www.crunchyroll.com/{lang}').get('etp_rt'): - grant_type, key = 'etp_rt_cookie', 'accountAuthClientId' - else: - grant_type, key = 'client_id', 'anonClientId' + def _update_auth(self): + if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds(): + return - initial_state, app_config = self._get_embedded_json(self._download_webpage( - f'https://www.crunchyroll.com/{lang}', None, note='Retrieving main page'), None) - api_domain = app_config['cxApiParams']['apiDomain'].replace('beta.crunchyroll.com', 'www.crunchyroll.com') + if not CrunchyrollBaseIE._BASIC_AUTH: + cx_api_param = self._CLIENT_ID[self.is_logged_in] + self.write_debug(f'Using cxApiParam={cx_api_param}') + CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode() + grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id' + try: auth_response = self._download_json( - f'{api_domain}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', - headers={ - 'Authorization': 'Basic ' + str(base64.b64encode(('%s:' % app_config['cxApiParams'][key]).encode('ascii')), 'ascii') - }, data=f'grant_type={grant_type}'.encode('ascii')) - policy_response = self._download_json( - f'{api_domain}/index/v2', None, note='Retrieving signed policy', - headers={ - 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token'] - }) - cms = policy_response.get('cms_web') - bucket = cms['bucket'] - params = { - 'Policy': cms['policy'], - 'Signature': cms['signature'], - 'Key-Pair-Id': cms['key_pair_id'] + f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', + headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode()) + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 403: + raise ExtractorError( + 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, ' + 'then pass the fresh cookies (with --cookies-from-browser or --cookies) ' + 'and your browser\'s User-Agent (with --user-agent)', expected=True) + raise + + CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']} + CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10) + + def _locale_from_language(self, language): + config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True) + return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language) + + def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}): + self._update_auth() + + if not endpoint.startswith('/'): + endpoint = f'/{endpoint}' + + query = query.copy() + locale = self._locale_from_language(lang) + if locale: + query['locale'] = locale + + return self._download_json( + f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}', + headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query) + + def _call_api(self, path, internal_id, lang, note='api', query={}): + if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'): + path = f'/content/v2/{self._API_ENDPOINT}/{path}' + + try: + result = self._call_base_api( + path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query) + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 404: + return None + raise + + if not result: + raise ExtractorError(f'Unexpected response when downloading {note} JSON') + return result + + def _extract_formats(self, stream_response, display_id=None): + requested_formats = self._configuration_arg('format') or ['adaptive_hls'] + available_formats = {} + for stream_type, streams in traverse_obj( + stream_response, (('streams', ('data', 0)), {dict.items}, ...)): + if stream_type not in requested_formats: + continue + for stream in traverse_obj(streams, lambda _, v: v['url']): + hardsub_lang = stream.get('hardsub_locale') or '' + format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s')) + available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url']) + + requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] + if '' in available_formats and 'all' not in requested_hardsubs: + full_format_langs = set(requested_hardsubs) + self.to_screen( + 'To get all formats of a hardsub language, use ' + '"--extractor-args crunchyrollbeta:hardsub=". ' + 'See https://github.com/hypervideo/hypervideo#crunchyrollbeta-crunchyroll for more info', + only_once=True) + else: + full_format_langs = set(map(str.lower, available_formats)) + + audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False) + hardsub_preference = qualities(requested_hardsubs[::-1]) + formats = [] + for stream_type, format_id, hardsub_lang, stream_url in available_formats.values(): + if stream_type.endswith('hls'): + if hardsub_lang.lower() in full_format_langs: + adaptive_formats = self._extract_m3u8_formats( + stream_url, display_id, 'mp4', m3u8_id=format_id, + fatal=False, note=f'Downloading {format_id} HLS manifest') + else: + adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),) + elif stream_type.endswith('dash'): + adaptive_formats = self._extract_mpd_formats( + stream_url, display_id, mpd_id=format_id, + fatal=False, note=f'Downloading {format_id} MPD manifest') + else: + self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True) + continue + for f in adaptive_formats: + if f.get('acodec') != 'none': + f['language'] = audio_locale + f['quality'] = hardsub_preference(hardsub_lang.lower()) + formats.extend(adaptive_formats) + + return formats + + def _extract_subtitles(self, data): + subtitles = {} + + for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)): + subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})] + + return subtitles + + +class CrunchyrollCmsBaseIE(CrunchyrollBaseIE): + _API_ENDPOINT = 'cms' + _CMS_EXPIRY = None + + def _call_cms_api_signed(self, path, internal_id, lang, note='api'): + if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds(): + response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web'] + CrunchyrollCmsBaseIE._CMS_QUERY = { + 'Policy': response['policy'], + 'Signature': response['signature'], + 'Key-Pair-Id': response['key_pair_id'], } - locale = traverse_obj(initial_state, ('localization', 'locale')) - if locale: - params['locale'] = locale - CrunchyrollBaseIE.params = (api_domain, bucket, params) - return CrunchyrollBaseIE.params + CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket'] + CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10 + + if not path.startswith('/cms/v2'): + path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}' + + return self._call_base_api( + path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY) -class CrunchyrollBetaIE(CrunchyrollBaseIE): +class CrunchyrollBetaIE(CrunchyrollCmsBaseIE): IE_NAME = 'crunchyroll' _VALID_URL = r'''(?x) - https?://(?:beta|www)\.crunchyroll\.com/ - (?P(?:\w{2}(?:-\w{2})?/)?) - watch/(?P\w+) - (?:/(?P[\w-]+))?/?(?:[?#]|$)''' + https?://(?:beta\.|www\.)?crunchyroll\.com/ + (?:(?P\w{2}(?:-\w{2})?)/)? + watch/(?!concert|musicvideo)(?P\w+)''' _TESTS = [{ + # Premium only 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', 'info_dict': { 'id': 'GY2P1Q98Y', @@ -115,10 +239,15 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): 'season_number': 1, 'episode': 'To the Future', 'episode_number': 73, - 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$', + 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'chapters': 'count:2', + 'age_limit': 14, + 'like_count': int, + 'dislike_count': int, }, 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'}, }, { + # Premium only 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR', 'info_dict': { 'id': 'GYE5WKQGR', @@ -126,7 +255,7 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): 'duration': 366.459, 'timestamp': 1476788400, 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76', - 'title': 'SHELTER Episode – Porter Robinson presents Shelter the Animation', + 'title': 'SHELTER – Porter Robinson presents Shelter the Animation', 'upload_date': '20161018', 'series': 'SHELTER', 'series_id': 'GYGG09WWY', @@ -135,121 +264,206 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE): 'season_number': 1, 'episode': 'Porter Robinson presents Shelter the Animation', 'episode_number': 0, - 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg$', + 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'age_limit': 14, + 'like_count': int, + 'dislike_count': int, }, 'params': {'skip_download': True}, - 'skip': 'Video is Premium only', }, { - 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y', + 'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard', + 'info_dict': { + 'id': 'GJWU2VKK3', + 'ext': 'mp4', + 'duration': 1420.054, + 'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd', + 'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard', + 'series': 'The Ice Guy and His Cool Female Colleague', + 'series_id': 'GW4HM75NP', + 'season': 'The Ice Guy and His Cool Female Colleague', + 'season_id': 'GY9PC21VE', + 'season_number': 1, + 'episode': 'Cherry Blossom Meeting and a Coming Blizzard', + 'episode_number': 1, + 'chapters': 'count:2', + 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'timestamp': 1672839000, + 'upload_date': '20230104', + 'age_limit': 14, + 'like_count': int, + 'dislike_count': int, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.crunchyroll.com/watch/GM8F313NQ', + 'info_dict': { + 'id': 'GM8F313NQ', + 'ext': 'mp4', + 'title': 'Garakowa -Restore the World-', + 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608', + 'duration': 3996.104, + 'age_limit': 13, + 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6', + 'info_dict': { + 'id': 'G62PEZ2E6', + 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608', + 'age_limit': 13, + 'duration': 65.138, + 'title': 'Garakowa -Restore the World-', + }, + 'playlist_mincount': 5, + }, { + 'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y', 'only_matching': True, }, { 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy', 'only_matching': True, }] + # We want to support lazy playlist filtering and movie listings cannot be inside a playlist + _RETURN_TYPE = 'video' def _real_extract(self, url): - lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') - api_domain, bucket, params = self._get_params(lang) + lang, internal_id = self._match_valid_url(url).group('lang', 'id') - episode_response = self._download_json( - f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id, - note='Retrieving episode metadata', query=params) - if episode_response.get('is_premium_only') and not episode_response.get('playback'): - raise ExtractorError('This video is for premium members only.', expected=True) + # We need to use unsigned API call to allow ratings query string + response = traverse_obj(self._call_api( + f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict})) + if not response: + raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True) - stream_response = self._download_json( - f'{api_domain}{episode_response["__links__"]["streams"]["href"]}', display_id, - note='Retrieving stream info', query=params) - get_streams = lambda name: (traverse_obj(stream_response, name) or {}).items() + object_type = response.get('type') + if object_type == 'episode': + result = self._transform_episode_response(response) - requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])] - hardsub_preference = qualities(requested_hardsubs[::-1]) - requested_formats = self._configuration_arg('format') or ['adaptive_hls'] + elif object_type == 'movie': + result = self._transform_movie_response(response) - available_formats = {} - for stream_type, streams in get_streams('streams'): - if stream_type not in requested_formats: - continue - for stream in streams.values(): - if not stream.get('url'): - continue - hardsub_lang = stream.get('hardsub_locale') or '' - format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s')) - available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url']) + elif object_type == 'movie_listing': + first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id')) + if not self._yes_playlist(internal_id, first_movie_id): + return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id) + + def entries(): + movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list') + for movie_response in traverse_obj(movies, ('data', ...)): + yield self.url_result( + f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}', + CrunchyrollBetaIE, **self._transform_movie_response(movie_response)) + + return self.playlist_result(entries(), **self._transform_movie_response(response)) - if '' in available_formats and 'all' not in requested_hardsubs: - full_format_langs = set(requested_hardsubs) - self.to_screen( - 'To get all formats of a hardsub language, use ' - '"--extractor-args crunchyrollbeta:hardsub=". ' - 'See https://github.com/hypervideo/hypervideo#crunchyrollbeta for more info', - only_once=True) else: - full_format_langs = set(map(str.lower, available_formats)) + raise ExtractorError(f'Unknown object type {object_type}') - formats = [] - for stream_type, format_id, hardsub_lang, stream_url in available_formats.values(): - if stream_type.endswith('hls'): - if hardsub_lang.lower() in full_format_langs: - adaptive_formats = self._extract_m3u8_formats( - stream_url, display_id, 'mp4', m3u8_id=format_id, - fatal=False, note=f'Downloading {format_id} HLS manifest') - else: - adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),) - elif stream_type.endswith('dash'): - adaptive_formats = self._extract_mpd_formats( - stream_url, display_id, mpd_id=format_id, - fatal=False, note=f'Downloading {format_id} MPD manifest') - else: - self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True) - continue - for f in adaptive_formats: - if f.get('acodec') != 'none': - f['language'] = stream_response.get('audio_locale') - f['quality'] = hardsub_preference(hardsub_lang.lower()) - formats.extend(adaptive_formats) + # There might be multiple audio languages for one object (`_metadata.versions`), + # so we need to get the id from `streams_link` instead or we dont know which language to choose + streams_link = response.get('streams_link') + if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')): + message = f'This {object_type} is for premium members only' + if self.is_logged_in: + raise ExtractorError(message, expected=True) + self.raise_login_required(message) + + # We need go from unsigned to signed api to avoid getting soft banned + stream_response = self._call_cms_api_signed(remove_start( + streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info') + result['formats'] = self._extract_formats(stream_response, internal_id) + result['subtitles'] = self._extract_subtitles(stream_response) + + # if no intro chapter is available, a 403 without usable data is returned + intro_chapter = self._download_json( + f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json', + internal_id, note='Downloading chapter info', fatal=False, errnote=False) + if isinstance(intro_chapter, dict): + result['chapters'] = [{ + 'title': 'Intro', + 'start_time': float_or_none(intro_chapter.get('startTime')), + 'end_time': float_or_none(intro_chapter.get('endTime')), + }] + + def calculate_count(item): + return parse_count(''.join((item['displayed'], item.get('unit') or ''))) + + result.update(traverse_obj(response, ('rating', { + 'like_count': ('up', {calculate_count}), + 'dislike_count': ('down', {calculate_count}), + }))) + + return result + @staticmethod + def _transform_episode_response(data): + metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {} return { - 'id': internal_id, - 'title': '%s Episode %s – %s' % ( - episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')), - 'description': try_get(episode_response, lambda x: x['description'].replace(r'\r\n', '\n')), - 'duration': float_or_none(episode_response.get('duration_ms'), 1000), - 'timestamp': parse_iso8601(episode_response.get('upload_date')), - 'series': episode_response.get('series_title'), - 'series_id': episode_response.get('series_id'), - 'season': episode_response.get('season_title'), - 'season_id': episode_response.get('season_id'), - 'season_number': episode_response.get('season_number'), - 'episode': episode_response.get('title'), - 'episode_number': episode_response.get('sequence_number'), - 'formats': formats, - 'thumbnails': [{ - 'url': thumb.get('source'), - 'width': thumb.get('width'), - 'height': thumb.get('height'), - } for thumb in traverse_obj(episode_response, ('images', 'thumbnail', ..., ...)) or []], - 'subtitles': { - lang: [{ - 'url': subtitle_data.get('url'), - 'ext': subtitle_data.get('format') - }] for lang, subtitle_data in get_streams('subtitles') - }, + 'id': data['id'], + 'title': ' \u2013 '.join(( + ('%s%s' % ( + format_field(metadata, 'season_title'), + format_field(metadata, 'episode', ' Episode %s'))), + format_field(data, 'title'))), + **traverse_obj(data, { + 'episode': ('title', {str}), + 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}), + 'thumbnails': ('images', 'thumbnail', ..., ..., { + 'url': ('source', {url_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + }), + **traverse_obj(metadata, { + 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), + 'timestamp': ('upload_date', {parse_iso8601}), + 'series': ('series_title', {str}), + 'series_id': ('series_id', {str}), + 'season': ('season_title', {str}), + 'season_id': ('season_id', {str}), + 'season_number': ('season_number', ({int}, {float_or_none})), + 'episode_number': ('sequence_number', ({int}, {float_or_none})), + 'age_limit': ('maturity_ratings', -1, {parse_age_limit}), + 'language': ('audio_locale', {str}), + }, get_all=False), + } + + @staticmethod + def _transform_movie_response(data): + metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {} + return { + 'id': data['id'], + **traverse_obj(data, { + 'title': ('title', {str}), + 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}), + 'thumbnails': ('images', 'thumbnail', ..., ..., { + 'url': ('source', {url_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + }), + **traverse_obj(metadata, { + 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), + 'age_limit': ('maturity_ratings', -1, {parse_age_limit}), + }), } -class CrunchyrollBetaShowIE(CrunchyrollBaseIE): +class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE): IE_NAME = 'crunchyroll:playlist' _VALID_URL = r'''(?x) - https?://(?:beta|www)\.crunchyroll\.com/ + https?://(?:beta\.|www\.)?crunchyroll\.com/ (?P(?:\w{2}(?:-\w{2})?/)?) - series/(?P\w+) - (?:/(?P[\w-]+))?/?(?:[?#]|$)''' + series/(?P\w+)''' _TESTS = [{ 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', 'info_dict': { 'id': 'GY19NQ2QR', 'title': 'Girl Friend BETA', + 'description': 'md5:99c1b22ee30a74b536a8277ced8eb750', + # XXX: `thumbnail` does not get set from `thumbnails` in playlist + # 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'age_limit': 14, }, 'playlist_mincount': 10, }, { @@ -258,40 +472,179 @@ class CrunchyrollBetaShowIE(CrunchyrollBaseIE): }] def _real_extract(self, url): - lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'id', 'display_id') - api_domain, bucket, params = self._get_params(lang) + lang, internal_id = self._match_valid_url(url).group('lang', 'id') + + def entries(): + seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons') + for season in traverse_obj(seasons_response, ('items', ..., {dict})): + episodes_response = self._call_cms_api_signed( + f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list') + for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})): + yield self.url_result( + f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}', + CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response)) - series_response = self._download_json( - f'{api_domain}/cms/v2{bucket}/series/{internal_id}', display_id, - note='Retrieving series metadata', query=params) + return self.playlist_result( + entries(), internal_id, + **traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, { + 'title': ('title', {str}), + 'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}), + 'age_limit': ('maturity_ratings', -1, {parse_age_limit}), + 'thumbnails': ('images', ..., ..., ..., { + 'url': ('source', {url_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }) + }))) - seasons_response = self._download_json( - f'{api_domain}/cms/v2{bucket}/seasons?series_id={internal_id}', display_id, - note='Retrieving season list', query=params) + +class CrunchyrollMusicIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:music' + _VALID_URL = r'''(?x) + https?://(?:www\.)?crunchyroll\.com/ + (?P(?:\w{2}(?:-\w{2})?/)?) + watch/(?Pconcert|musicvideo)/(?P\w+)''' + _TESTS = [{ + 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79', + 'info_dict': { + 'ext': 'mp4', + 'id': 'MV5B02C79', + 'display_id': 'egaono-hana', + 'title': 'Egaono Hana', + 'track': 'Egaono Hana', + 'artist': 'Goose house', + 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'genre': ['J-Pop'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C', + 'info_dict': { + 'ext': 'mp4', + 'id': 'MV88BB7F2C', + 'display_id': 'crossing-field', + 'title': 'Crossing Field', + 'track': 'Crossing Field', + 'artist': 'LiSA', + 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'genre': ['Anime'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135', + 'info_dict': { + 'ext': 'mp4', + 'id': 'MC2E2AC135', + 'display_id': 'live-is-smile-always-364joker-at-yokohama-arena', + 'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA', + 'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA', + 'artist': 'LiSA', + 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'description': 'md5:747444e7e6300907b7a43f0a0503072e', + 'genre': ['J-Pop'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana', + 'only_matching': True, + }, { + 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena', + 'only_matching': True, + }, { + 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field', + 'only_matching': True, + }] + _API_ENDPOINT = 'music' + + def _real_extract(self, url): + lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type') + path, name = { + 'concert': ('concerts', 'concert info'), + 'musicvideo': ('music_videos', 'music video info'), + }[object_type] + response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict})) + if not response: + raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True) + + streams_link = response.get('streams_link') + if not streams_link and response.get('isPremiumOnly'): + message = f'This {response.get("type") or "media"} is for premium members only' + if self.is_logged_in: + raise ExtractorError(message, expected=True) + self.raise_login_required(message) + + result = self._transform_music_response(response) + stream_response = self._call_api(streams_link, internal_id, lang, 'stream info') + result['formats'] = self._extract_formats(stream_response, internal_id) + + return result + + @staticmethod + def _transform_music_response(data): + return { + 'id': data['id'], + **traverse_obj(data, { + 'display_id': 'slug', + 'title': 'title', + 'track': 'title', + 'artist': ('artist', 'name'), + 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}), + 'thumbnails': ('images', ..., ..., { + 'url': ('source', {url_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + 'genre': ('genres', ..., 'displayValue'), + 'age_limit': ('maturity_ratings', -1, {parse_age_limit}), + }), + } + + +class CrunchyrollArtistIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:artist' + _VALID_URL = r'''(?x) + https?://(?:www\.)?crunchyroll\.com/ + (?P(?:\w{2}(?:-\w{2})?/)?) + artist/(?P\w{10})''' + _TESTS = [{ + 'url': 'https://www.crunchyroll.com/artist/MA179CB50D', + 'info_dict': { + 'id': 'MA179CB50D', + 'title': 'LiSA', + 'genre': ['J-Pop', 'Anime', 'Rock'], + 'description': 'md5:16d87de61a55c3f7d6c454b73285938e', + }, + 'playlist_mincount': 83, + }, { + 'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa', + 'only_matching': True, + }] + _API_ENDPOINT = 'music' + + def _real_extract(self, url): + lang, internal_id = self._match_valid_url(url).group('lang', 'id') + response = traverse_obj(self._call_api( + f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0)) def entries(): - for season in seasons_response['items']: - episodes_response = self._download_json( - f'{api_domain}/cms/v2{bucket}/episodes?season_id={season["id"]}', display_id, - note=f'Retrieving episode list for {season.get("slug_title")}', query=params) - for episode in episodes_response['items']: - episode_id = episode['id'] - episode_display_id = episode['slug_title'] - yield { - '_type': 'url', - 'url': f'https://www.crunchyroll.com/{lang}watch/{episode_id}/{episode_display_id}', - 'ie_key': CrunchyrollBetaIE.ie_key(), - 'id': episode_id, - 'title': '%s Episode %s – %s' % (episode.get('season_title'), episode.get('episode'), episode.get('title')), - 'description': try_get(episode, lambda x: x['description'].replace(r'\r\n', '\n')), - 'duration': float_or_none(episode.get('duration_ms'), 1000), - 'series': episode.get('series_title'), - 'series_id': episode.get('series_id'), - 'season': episode.get('season_title'), - 'season_id': episode.get('season_id'), - 'season_number': episode.get('season_number'), - 'episode': episode.get('title'), - 'episode_number': episode.get('sequence_number') - } - - return self.playlist_result(entries(), internal_id, series_response.get('title')) + for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]: + for internal_id in traverse_obj(response, (attribute, ...)): + yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id) + + return self.playlist_result(entries(), **self._transform_artist_response(response)) + + @staticmethod + def _transform_artist_response(data): + return { + 'id': data['id'], + **traverse_obj(data, { + 'title': 'name', + 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}), + 'thumbnails': ('images', ..., ..., { + 'url': ('source', {url_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + }), + 'genre': ('genres', ..., 'displayValue'), + }), + } diff --git a/hypervideo_dl/extractor/cultureunplugged.py b/hypervideo_dl/extractor/cultureunplugged.py index 2fb2280..9c8509f 100644 --- a/hypervideo_dl/extractor/cultureunplugged.py +++ b/hypervideo_dl/extractor/cultureunplugged.py @@ -1,10 +1,8 @@ import time from .common import InfoExtractor -from ..utils import ( - int_or_none, - HEADRequest, -) +from ..networking import HEADRequest +from ..utils import int_or_none class CultureUnpluggedIE(InfoExtractor): diff --git a/hypervideo_dl/extractor/curiositystream.py b/hypervideo_dl/extractor/curiositystream.py index 26cf24f..941cf4e 100644 --- a/hypervideo_dl/extractor/curiositystream.py +++ b/hypervideo_dl/extractor/curiositystream.py @@ -1,4 +1,5 @@ import re +import urllib.parse from .common import InfoExtractor from ..compat import compat_str @@ -23,7 +24,7 @@ class CuriosityStreamBaseIE(InfoExtractor): auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token') if auth_cookie: self.write_debug('Obtained auth_token cookie') - self._auth_token = auth_cookie.value + self._auth_token = urllib.parse.unquote(auth_cookie.value) if self._auth_token: headers['X-Auth-Token'] = self._auth_token result = self._download_json( @@ -54,8 +55,11 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', 'channel': 'Curiosity Stream', 'categories': ['Technology', 'Interview'], - 'average_rating': 96.79, + 'average_rating': float, 'series_id': '2', + 'thumbnail': r're:https://img.curiositystream.com/.+\.jpg', + 'tags': [], + 'duration': 158 }, 'params': { # m3u8 download diff --git a/hypervideo_dl/extractor/dacast.py b/hypervideo_dl/extractor/dacast.py new file mode 100644 index 0000000..4e81aa4 --- /dev/null +++ b/hypervideo_dl/extractor/dacast.py @@ -0,0 +1,158 @@ +import hashlib +import re +import time + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + classproperty, + float_or_none, + traverse_obj, + url_or_none, +) + + +class DacastBaseIE(InfoExtractor): + _URL_TYPE = None + + @classproperty + def _VALID_URL(cls): + return fr'https?://iframe\.dacast\.com/{cls._URL_TYPE}/(?P[\w-]+)/(?P[\w-]+)' + + @classproperty + def _EMBED_REGEX(cls): + return [rf']+\bsrc=["\'](?P{cls._VALID_URL})'] + + _API_INFO_URL = 'https://playback.dacast.com/content/info' + + @classmethod + def _get_url_from_id(cls, content_id): + user_id, media_id = content_id.split(f'-{cls._URL_TYPE}-') + return f'https://iframe.dacast.com/{cls._URL_TYPE}/{user_id}/{media_id}' + + @classmethod + def _extract_embed_urls(cls, url, webpage): + yield from super()._extract_embed_urls(url, webpage) + for content_id in re.findall( + rf']+\bsrc=["\']https://player\.dacast\.com/js/player\.js\?contentId=([\w-]+-{cls._URL_TYPE}-[\w-]+)["\']', webpage): + yield cls._get_url_from_id(content_id) + + +class DacastVODIE(DacastBaseIE): + _URL_TYPE = 'vod' + _TESTS = [{ + 'url': 'https://iframe.dacast.com/vod/acae82153ef4d7a7344ae4eaa86af534/1c6143e3-5a06-371d-8695-19b96ea49090', + 'info_dict': { + 'id': '1c6143e3-5a06-371d-8695-19b96ea49090', + 'ext': 'mp4', + 'uploader_id': 'acae82153ef4d7a7344ae4eaa86af534', + 'title': '2_4||Adnexal mass characterisation: O-RADS US and MRI||N. Bharwani, London/UK', + 'thumbnail': 'https://universe-files.dacast.com/26137208-5858-65c1-5e9a-9d6b6bd2b6c2', + }, + 'params': {'skip_download': 'm3u8'}, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://www.dacast.com/support/knowledgebase/how-can-i-embed-a-video-on-my-website/', + 'info_dict': { + 'id': 'b6674869-f08a-23c5-1d7b-81f5309e1a90', + 'ext': 'mp4', + 'title': '4-HowToEmbedVideo.mp4', + 'uploader_id': '3b67c4a9-3886-4eb1-d0eb-39b23b14bef3', + 'thumbnail': 'https://universe-files.dacast.com/d26ab48f-a52a-8783-c42e-a90290ba06b6.png', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://gist.githubusercontent.com/bashonly/4ad249ef2910346fbdf3809b220f11ee/raw/87349778d4af1a80b1fcc3beb9c88108de5858f5/dacast_embeds.html', + 'info_dict': { + 'id': 'e7df418e-a83b-7a7f-7b5e-1a667981e8fa', + 'ext': 'mp4', + 'title': 'Evening Service 2-5-23', + 'uploader_id': '943bb1ab3c03695ba85330d92d6d226e', + 'thumbnail': 'https://universe-files.dacast.com/337472b3-e92c-2ea4-7eb7-5700da477f67', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + user_id, video_id = self._match_valid_url(url).group('user_id', 'id') + query = {'contentId': f'{user_id}-vod-{video_id}', 'provider': 'universe'} + info = self._download_json(self._API_INFO_URL, video_id, query=query, fatal=False) + access = self._download_json( + 'https://playback.dacast.com/content/access', video_id, + note='Downloading access JSON', query=query, expected_status=403) + + error = access.get('error') + if error in ('Broadcaster has been blocked', 'Content is offline'): + raise ExtractorError(error, expected=True) + elif error: + raise ExtractorError(f'Dacast API says "{error}"') + + hls_url = access['hls'] + hls_aes = {} + + if 'DRM_EXT' in hls_url: + self.report_drm(video_id) + elif '/uspaes/' in hls_url: + # From https://player.dacast.com/js/player.js + ts = int(time.time()) + signature = hashlib.sha1( + f'{10413792000 - ts}{ts}YfaKtquEEpDeusCKbvYszIEZnWmBcSvw').digest().hex() + hls_aes['uri'] = f'https://keys.dacast.com/uspaes/{video_id}.key?s={signature}&ts={ts}' + + for retry in self.RetryManager(): + try: + formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls') + except ExtractorError as e: + # CDN will randomly respond with 403 + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + retry.error = e + continue + raise + + return { + 'id': video_id, + 'uploader_id': user_id, + 'formats': formats, + 'hls_aes': hls_aes or None, + **traverse_obj(info, ('contentInfo', { + 'title': 'title', + 'duration': ('duration', {float_or_none}), + 'thumbnail': ('thumbnailUrl', {url_or_none}), + })), + } + + +class DacastPlaylistIE(DacastBaseIE): + _URL_TYPE = 'playlist' + _TESTS = [{ + 'url': 'https://iframe.dacast.com/playlist/943bb1ab3c03695ba85330d92d6d226e/b632eb053cac17a9c9a02bcfc827f2d8', + 'playlist_mincount': 28, + 'info_dict': { + 'id': 'b632eb053cac17a9c9a02bcfc827f2d8', + 'title': 'Archive Sermons', + }, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://gist.githubusercontent.com/bashonly/7efb606f49f3c6e07ea0327de5a661d1/raw/05a16eac830245ea301fb0a585023bec71e6093c/dacast_playlist_embed.html', + 'playlist_mincount': 28, + 'info_dict': { + 'id': 'b632eb053cac17a9c9a02bcfc827f2d8', + 'title': 'Archive Sermons', + }, + }] + + def _real_extract(self, url): + user_id, playlist_id = self._match_valid_url(url).group('user_id', 'id') + info = self._download_json( + self._API_INFO_URL, playlist_id, note='Downloading playlist JSON', query={ + 'contentId': f'{user_id}-playlist-{playlist_id}', + 'provider': 'universe', + })['contentInfo'] + + def entries(info): + for video in traverse_obj(info, ('features', 'playlist', 'contents', lambda _, v: v['id'])): + yield self.url_result( + DacastVODIE._get_url_from_id(video['id']), DacastVODIE, video['id'], video.get('title')) + + return self.playlist_result(entries(info), playlist_id, info.get('title')) diff --git a/hypervideo_dl/extractor/daftsex.py b/hypervideo_dl/extractor/daftsex.py index 551d5e3..92510c7 100644 --- a/hypervideo_dl/extractor/daftsex.py +++ b/hypervideo_dl/extractor/daftsex.py @@ -1,6 +1,7 @@ from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( + ExtractorError, int_or_none, js_to_json, parse_count, @@ -12,21 +13,24 @@ from ..utils import ( class DaftsexIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?daftsex\.com/watch/(?P-?\d+_\d+)' + _VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P-?\d+_\d+)' _TESTS = [{ - 'url': 'https://daftsex.com/watch/-35370899_456246186', - 'md5': 'd95135e6cea2d905bea20dbe82cda64a', + 'url': 'https://daft.sex/watch/-35370899_456246186', + 'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd', 'info_dict': { 'id': '-35370899_456246186', 'ext': 'mp4', 'title': 'just relaxing', - 'description': 'just relaxing - Watch video Watch video in high quality', + 'description': 'just relaxing – Watch video Watch video in high quality', 'upload_date': '20201113', 'timestamp': 1605261911, - 'thumbnail': r're:https://[^/]+/impf/-43BuMDIawmBGr3GLcZ93CYwWf2PBv_tVWoS1A/dnu41DnARU4\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=6af2c26ff4a45e55334189301c867384&type=video_thumb', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + 'duration': 15.0, + 'view_count': int }, }, { - 'url': 'https://daftsex.com/watch/-156601359_456242791', + 'url': 'https://daft.sex/watch/-156601359_456242791', 'info_dict': { 'id': '-156601359_456242791', 'ext': 'mp4', @@ -36,6 +40,7 @@ class DaftsexIE(InfoExtractor): 'timestamp': 1600250735, 'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ', }, + 'skip': 'deleted / private' }] def _real_extract(self, url): @@ -60,7 +65,7 @@ class DaftsexIE(InfoExtractor): webpage, 'player color', fatal=False) or '' embed_page = self._download_webpage( - 'https://daxab.com/player/%s?color=%s' % (player_hash, player_color), + 'https://dxb.to/player/%s?color=%s' % (player_hash, player_color), video_id, headers={'Referer': url}) video_params = self._parse_json( self._search_regex( @@ -94,15 +99,19 @@ class DaftsexIE(InfoExtractor): 'age_limit': 18, } - item = self._download_json( + items = self._download_json( f'{server_domain}/method/video.get/{video_id}', video_id, headers={'Referer': url}, query={ 'token': video_params['video']['access_token'], 'videos': video_id, 'ckey': video_params['c_key'], 'credentials': video_params['video']['credentials'], - })['response']['items'][0] + })['response']['items'] + + if not items: + raise ExtractorError('Video is not available', video_id=video_id, expected=True) + item = items[0] formats = [] for f_id, f_url in item.get('files', {}).items(): if f_id == 'external': diff --git a/hypervideo_dl/extractor/dailymotion.py b/hypervideo_dl/extractor/dailymotion.py index 2a44718..21263d4 100644 --- a/hypervideo_dl/extractor/dailymotion.py +++ b/hypervideo_dl/extractor/dailymotion.py @@ -3,7 +3,7 @@ import json import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, OnDemandPagedList, @@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor): None, 'Downloading Access Token', data=urlencode_postdata(data))['access_token'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + if isinstance(e.cause, HTTPError) and e.cause.status == 400: raise ExtractorError(self._parse_json( - e.cause.read().decode(), xid)['error_description'], expected=True) + e.cause.response.read().decode(), xid)['error_description'], expected=True) raise self._set_dailymotion_cookie('access_token' if username else 'client_token', token) self._HEADERS['Authorization'] = 'Bearer ' + token diff --git a/hypervideo_dl/extractor/digitalconcerthall.py b/hypervideo_dl/extractor/digitalconcerthall.py index 3461e36..c11cd79 100644 --- a/hypervideo_dl/extractor/digitalconcerthall.py +++ b/hypervideo_dl/extractor/digitalconcerthall.py @@ -11,7 +11,7 @@ from ..utils import ( class DigitalConcertHallIE(InfoExtractor): IE_DESC = 'DigitalConcertHall extractor' - _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P[a-z]+)/concert/(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P[a-z]+)/(?Pfilm|concert)/(?P[0-9]+)' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' _ACCESS_TOKEN = None _NETRC_MACHINE = 'digitalconcerthall' @@ -40,6 +40,19 @@ class DigitalConcertHallIE(InfoExtractor): }, 'params': {'skip_download': 'm3u8'}, 'playlist_count': 3, + }, { + 'url': 'https://www.digitalconcerthall.com/en/film/388', + 'info_dict': { + 'id': '388', + 'ext': 'mp4', + 'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann', + 'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2', + 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', + 'upload_date': '20220714', + 'timestamp': 1657785600, + 'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff', + }, + 'params': {'skip_download': 'm3u8'}, }] def _perform_login(self, username, password): @@ -75,7 +88,7 @@ class DigitalConcertHallIE(InfoExtractor): if not self._ACCESS_TOKEN: self.raise_login_required(method='password') - def _entries(self, items, language, **kwargs): + def _entries(self, items, language, type_, **kwargs): for item in items: video_id = item['id'] stream_info = self._download_json( @@ -103,11 +116,11 @@ class DigitalConcertHallIE(InfoExtractor): 'start_time': chapter.get('time'), 'end_time': try_get(chapter, lambda x: x['time'] + x['duration']), 'title': chapter.get('text'), - } for chapter in item['cuepoints']] if item.get('cuepoints') else None, + } for chapter in item['cuepoints']] if item.get('cuepoints') and type_ == 'concert' else None, } def _real_extract(self, url): - language, video_id = self._match_valid_url(url).group('language', 'id') + language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id') if not language: language = 'en' @@ -120,18 +133,18 @@ class DigitalConcertHallIE(InfoExtractor): }] vid_info = self._download_json( - f'https://api.digitalconcerthall.com/v2/concert/{video_id}', video_id, headers={ + f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={ 'Accept': 'application/json', 'Accept-Language': language }) album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '') + videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) return { '_type': 'playlist', 'id': video_id, 'title': vid_info.get('title'), - 'entries': self._entries(traverse_obj(vid_info, ('_embedded', ..., ...)), language, - thumbnails=thumbnails, album_artist=album_artist), + 'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_), 'thumbnails': thumbnails, 'album_artist': album_artist, } diff --git a/hypervideo_dl/extractor/discogs.py b/hypervideo_dl/extractor/discogs.py new file mode 100644 index 0000000..048c622 --- /dev/null +++ b/hypervideo_dl/extractor/discogs.py @@ -0,0 +1,35 @@ +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import traverse_obj + + +class DiscogsReleasePlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?discogs\.com/(?Prelease|master)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.discogs.com/release/1-The-Persuader-Stockholm', + 'info_dict': { + 'id': 'release1', + 'title': 'Stockholm', + }, + 'playlist_mincount': 7, + }, { + 'url': 'https://www.discogs.com/master/113-Vince-Watson-Moments-In-Time', + 'info_dict': { + 'id': 'master113', + 'title': 'Moments In Time', + }, + 'playlist_mincount': 53, + }] + + def _real_extract(self, url): + playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type') + + display_id = f'{playlist_type}{playlist_id}' + response = self._download_json( + f'https://api.discogs.com/{playlist_type}s/{playlist_id}', display_id) + + entries = [ + self.url_result(video['uri'], YoutubeIE, video_title=video.get('title')) + for video in traverse_obj(response, ('videos', lambda _, v: YoutubeIE.suitable(v['uri'])))] + + return self.playlist_result(entries, display_id, response.get('title')) diff --git a/hypervideo_dl/extractor/discovery.py b/hypervideo_dl/extractor/discovery.py index fd3fc8f..75b4643 100644 --- a/hypervideo_dl/extractor/discovery.py +++ b/hypervideo_dl/extractor/discovery.py @@ -3,8 +3,8 @@ import string from .discoverygo import DiscoveryGoBaseIE from ..compat import compat_urllib_parse_unquote +from ..networking.exceptions import HTTPError from ..utils import ExtractorError -from ..compat import compat_HTTPError class DiscoveryIE(DiscoveryGoBaseIE): @@ -78,7 +78,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): 'Downloading token JSON metadata', query={ 'authRel': 'authorization', 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + 'nonce': ''.join(random.choices(string.ascii_letters, k=32)), 'redirectUri': 'https://www.discovery.com/', })['access_token'] @@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE): self._API_BASE_URL + 'streaming/video/' + video_id, display_id, 'Downloading streaming JSON metadata', headers=headers) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): + if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): e_description = self._parse_json( - e.cause.read().decode(), display_id)['description'] + e.cause.response.read().decode(), display_id)['description'] if 'resource not available for country' in e_description: self.raise_geo_restricted(countries=self._GEO_COUNTRIES) if 'Authorized Networks' in e_description: diff --git a/hypervideo_dl/extractor/dlf.py b/hypervideo_dl/extractor/dlf.py new file mode 100644 index 0000000..88a4149 --- /dev/null +++ b/hypervideo_dl/extractor/dlf.py @@ -0,0 +1,192 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + extract_attributes, + int_or_none, + traverse_obj, + url_or_none, +) + + +class DLFBaseIE(InfoExtractor): + _VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/' + _BUTTON_REGEX = r'(]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)' + + def _parse_button_attrs(self, button, audio_id=None): + attrs = extract_attributes(button) + audio_id = audio_id or attrs['data-audio-diraid'] + + url = traverse_obj( + attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference', + 'data-audio-src', expected_type=url_or_none) + ext = determine_ext(url) + + return { + 'id': audio_id, + 'extractor_key': DLFIE.ie_key(), + 'extractor': DLFIE.IE_NAME, + **traverse_obj(attrs, { + 'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}), + 'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}), + 'thumbnail': ('data-audioimage', {url_or_none}), + 'uploader': 'data-audio-producer', + 'series': 'data-audio-series', + 'channel': 'data-audio-origin-site-name', + 'webpage_url': ('data-audio-download-tracking-path', {url_or_none}), + }, get_all=False), + 'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False) + if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}]) + } + + +class DLFIE(DLFBaseIE): + IE_NAME = 'dlf' + _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P[\da-f]{8})-100\.html' + _TESTS = [ + # Audio as an HLS stream + { + 'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html', + 'info_dict': { + 'id': '03a3eb19', + 'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien', + 'ext': 'm4a', + 'duration': 3298, + 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673', + 'uploader': 'Deutschlandfunk', + 'series': 'On Stage', + 'channel': 'deutschlandfunk' + }, + 'params': { + 'skip_download': 'm3u8' + }, + 'skip': 'This webpage no longer exists' + }, { + 'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html', + 'info_dict': { + 'id': 'd9cc1856', + 'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner', + 'ext': 'mp3', + 'duration': 291, + 'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673', + 'uploader': 'Deutschlandfunk', + 'series': 'Kommentare und Themen der Woche', + 'channel': 'deutschlandfunk' + } + }, + ] + + def _real_extract(self, url): + audio_id = self._match_id(url) + webpage = self._download_webpage(url, audio_id) + + return self._parse_button_attrs( + self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id) + + +class DLFCorpusIE(DLFBaseIE): + IE_NAME = 'dlf:corpus' + IE_DESC = 'DLF Multi-feed Archives' + _VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html' + _TESTS = [ + # Recorded news broadcast with referrals to related broadcasts + { + 'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html', + 'info_dict': { + 'id': 'fechten-russland-belarus-ukraine-protest-100', + 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet', + 'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad' + }, + 'playlist_mincount': 5, + 'playlist': [{ + 'info_dict': { + 'id': '1fc5d64a', + 'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet', + 'ext': 'mp3', + 'duration': 252, + 'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313', + 'uploader': 'Deutschlandfunk', + 'series': 'Sport', + 'channel': 'deutschlandfunk' + } + }, { + 'info_dict': { + 'id': '2ada145f', + 'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten', + 'ext': 'mp3', + 'duration': 336, + 'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005', + 'uploader': 'Deutschlandfunk', + 'series': 'Deutschlandfunk Nova', + 'channel': 'deutschlandfunk-nova' + } + }, { + 'info_dict': { + 'id': '5e55e8c9', + 'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis', + 'ext': 'mp3', + 'duration': 187, + 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412', + 'uploader': 'Deutschlandfunk', + 'series': 'Sport am Samstag', + 'channel': 'deutschlandfunk' + } + }, { + 'info_dict': { + 'id': '47e1a096', + 'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"', + 'ext': 'mp3', + 'duration': 602, + 'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770', + 'uploader': 'Deutschlandfunk', + 'series': 'Sport am Samstag', + 'channel': 'deutschlandfunk' + } + }, { + 'info_dict': { + 'id': '5e55e8c9', + 'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis', + 'ext': 'mp3', + 'duration': 187, + 'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412', + 'uploader': 'Deutschlandfunk', + 'series': 'Sport am Samstag', + 'channel': 'deutschlandfunk' + } + }] + }, + # Podcast feed with tag buttons, playlist count fluctuates + { + 'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html', + 'info_dict': { + 'id': 'kommentare-und-themen-der-woche-100', + 'title': 'Meinung - Kommentare und Themen der Woche', + 'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5', + }, + 'playlist_mincount': 10, + }, + # Podcast feed with no description + { + 'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html', + 'info_dict': { + 'id': 'podcast-tolle-idee-100', + 'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?', + }, + 'playlist_mincount': 11, + }, + ] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'description': self._html_search_meta( + ['description', 'og:description', 'twitter:description'], webpage, default=None), + 'title': self._html_search_meta( + ['og:title', 'twitter:title'], webpage, default=None), + 'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)), + } diff --git a/hypervideo_dl/extractor/douyutv.py b/hypervideo_dl/extractor/douyutv.py index 477f468..fa40844 100644 --- a/hypervideo_dl/extractor/douyutv.py +++ b/hypervideo_dl/extractor/douyutv.py @@ -1,6 +1,7 @@ import time import hashlib import re +import urllib from .common import InfoExtractor from ..utils import ( @@ -13,7 +14,7 @@ from ..utils import ( class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -22,7 +23,7 @@ class DouyuTVIE(InfoExtractor): 'ext': 'flv', 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.png', 'uploader': '7师傅', 'is_live': True, }, @@ -37,7 +38,7 @@ class DouyuTVIE(InfoExtractor): 'ext': 'flv', 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.png', 'uploader': 'douyu小漠', 'is_live': True, }, @@ -53,13 +54,28 @@ class DouyuTVIE(InfoExtractor): 'ext': 'flv', 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': r're:.*m7show@163\.com.*', - 'thumbnail': r're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.png', 'uploader': '7师傅', 'is_live': True, }, 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603', + 'info_dict': { + 'id': '6560603', + 'display_id': '6560603', + 'ext': 'flv', + 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 're:.*直播时间.*', + 'thumbnail': r're:^https?://.*\.png', + 'uploader': '阿涛皎月Carry', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.douyu.com/xiaocang', 'only_matching': True, @@ -79,28 +95,24 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id\\?"\s*:\s*(\d+),', page, 'room id') - # Grab metadata from mobile API + # Grab metadata from API + params = { + 'aid': 'wp', + 'client_sys': 'wp', + 'time': int(time.time()), + } + params['auth'] = hashlib.md5( + f'room/{video_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest() room = self._download_json( - 'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id, - note='Downloading room info')['data'] + f'http://www.douyutv.com/api/v1/room/{room_id}', video_id, + note='Downloading room info', query=params)['data'] # 1 = live, 2 = offline if room.get('show_status') == '2': raise ExtractorError('Live stream is offline', expected=True) - # Grab the URL from PC client API - # The m3u8 url from mobile API requires re-authentication every 5 minutes - tt = int(time.time()) - signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt) - sign = hashlib.md5(signContent.encode('ascii')).hexdigest() - video_url = self._download_json( - 'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id, - video_id, note='Downloading video URL info', - query={'rate': 0}, headers={ - 'auth': sign, - 'time': str(tt), - 'aid': 'pcclient' - })['data']['live_url'] + video_url = urljoin('https://hls3-akm.douyucdn.cn/', self._search_regex(r'(live/.*)', room['hls_url'], 'URL')) + formats, subs = self._extract_m3u8_formats_and_subtitles(video_url, room_id) title = unescapeHTML(room['room_name']) description = room.get('show_details') @@ -110,12 +122,13 @@ class DouyuTVIE(InfoExtractor): return { 'id': room_id, 'display_id': video_id, - 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, 'is_live': True, + 'subtitles': subs, + 'formats': formats, } diff --git a/hypervideo_dl/extractor/dplay.py b/hypervideo_dl/extractor/dplay.py index 8eb4d8f..363b4be 100644 --- a/hypervideo_dl/extractor/dplay.py +++ b/hypervideo_dl/extractor/dplay.py @@ -2,7 +2,7 @@ import json import uuid from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..networking.exceptions import HTTPError from ..utils import ( determine_ext, ExtractorError, @@ -39,7 +39,7 @@ class DPlayBaseIE(InfoExtractor): return f'Bearer {token}' def _process_errors(self, e, geo_countries): - info = self._parse_json(e.cause.read().decode('utf-8'), None) + info = self._parse_json(e.cause.response.read().decode('utf-8'), None) error = info['errors'][0] error_code = error.get('code') if error_code == 'access.denied.geoblocked': @@ -65,6 +65,7 @@ class DPlayBaseIE(InfoExtractor): return streaming_list def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''): + country = self.get_param('geo_bypass_country') or country geo_countries = [country.upper()] self._initialize_geo_bypass({ 'countries': geo_countries, @@ -86,7 +87,7 @@ class DPlayBaseIE(InfoExtractor): 'include': 'images,primaryChannel,show,tags' }) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + if isinstance(e.cause, HTTPError) and e.cause.status == 400: self._process_errors(e, geo_countries) raise video_id = video['data']['id'] @@ -98,7 +99,7 @@ class DPlayBaseIE(InfoExtractor): streaming = self._download_video_playback_info( disco_base, video_id, headers) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: self._process_errors(e, geo_countries) raise for format_dict in streaming: @@ -745,7 +746,7 @@ class MotorTrendIE(DiscoveryPlusBaseIE): class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): - _VALID_URL = r'https?://(?:www\.)?motortrendondemand\.com/detail' + DPlayBaseIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?motortrend(?:ondemand\.com|\.com/plus)/detail' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.motortrendondemand.com/detail/wheelstanding-dump-truck-stubby-bobs-comeback/37699/784', 'info_dict': { @@ -766,6 +767,25 @@ class MotorTrendOnDemandIE(DiscoveryPlusBaseIE): 'upload_date': '20140101', 'tags': [], }, + }, { + 'url': 'https://www.motortrend.com/plus/detail/roadworthy-rescues-teaser-trailer/4922860/', + 'info_dict': { + 'id': '4922860', + 'ext': 'mp4', + 'title': 'Roadworthy Rescues | Teaser Trailer', + 'description': 'Derek Bieri helps Freiburger and Finnegan with their \'68 big-block Dart.', + 'display_id': 'roadworthy-rescues-teaser-trailer/4922860', + 'creator': 'Originals', + 'series': 'Roadworthy Rescues', + 'thumbnail': r're:^https?://.+\.jpe?g$', + 'upload_date': '20220907', + 'timestamp': 1662523200, + 'duration': 1066.356, + 'tags': [], + }, + }, { + 'url': 'https://www.motortrend.com/plus/detail/ugly-duckling/2450033/12439', + 'only_matching': True, }] _PRODUCT = 'MTOD' @@ -1001,3 +1021,39 @@ class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE): _SHOW_STR = 'show' _INDEX = 4 _VIDEO_IE = DiscoveryPlusIndiaIE + + +class GlobalCyclingNetworkPlusIE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://plus\.globalcyclingnetwork\.com/watch/(?P\d+)' + _TESTS = [{ + 'url': 'https://plus.globalcyclingnetwork.com/watch/1397691', + 'info_dict': { + 'id': '1397691', + 'ext': 'mp4', + 'title': 'The Athertons: Mountain Biking\'s Fastest Family', + 'description': 'md5:75a81937fcd8b989eec6083a709cd837', + 'thumbnail': 'https://us1-prod-images.disco-api.com/2021/03/04/eb9e3026-4849-3001-8281-9356466f0557.png', + 'series': 'gcn', + 'creator': 'Gcn', + 'upload_date': '20210309', + 'timestamp': 1615248000, + 'duration': 2531.0, + 'tags': [], + }, + 'skip': 'Subscription required', + 'params': {'skip_download': 'm3u8'}, + }] + + _PRODUCT = 'web' + _DISCO_API_PARAMS = { + 'disco_host': 'disco-api-prod.globalcyclingnetwork.com', + 'realm': 'gcn', + 'country': 'us', + } + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': f'WEB:UNKNOWN:{self._PRODUCT}:27.3.2', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) diff --git a/hypervideo_dl/extractor/dropbox.py b/hypervideo_dl/extractor/dropbox.py index 214b309..bc2efce 100644 --- a/hypervideo_dl/extractor/dropbox.py +++ b/hypervideo_dl/extractor/dropbox.py @@ -1,3 +1,4 @@ +import base64 import os.path import re @@ -5,14 +6,13 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, - traverse_obj, - try_get, + update_url_query, url_basename, ) class DropboxIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P[a-zA-Z0-9]{15})/.*' + _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P\w+)' _TESTS = [ { 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0', @@ -22,7 +22,16 @@ class DropboxIE(InfoExtractor): 'title': 'youtube-dl test video \'ä"BaW_jenozKc' } }, { - 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v', + 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh', + 'only_matching': True, + }, { + 'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4', + 'only_matching': True, + }, { + 'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h', + 'only_matching': True, + }, { + 'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h', 'only_matching': True, }, ] @@ -53,16 +62,25 @@ class DropboxIE(InfoExtractor): else: raise ExtractorError('Password protected video, use --video-password ', expected=True) - info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id, - contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props'] - transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False) - formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id) + formats, subtitles, has_anonymous_download = [], {}, False + for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)): + decoded = base64.b64decode(encoded).decode('utf-8', 'ignore') + transcode_url = self._search_regex( + r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None) + if not transcode_url: + continue + formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4') + has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False) + break # downloads enabled we can get the original file - if 'anonymous' in (try_get(info_json, lambda x: x['sharePermission']['canDownloadRoles']) or []): - video_url = re.sub(r'[?&]dl=0', '', url) - video_url += ('?' if '?' not in video_url else '&') + 'dl=1' - formats.append({'url': video_url, 'format_id': 'original', 'format_note': 'Original', 'quality': 1}) + if has_anonymous_download: + formats.append({ + 'url': update_url_query(url, {'dl': '1'}), + 'format_id': 'original', + 'format_note': 'Original', + 'quality': 1 + }) return { 'id': video_id, diff --git a/hypervideo_dl/extractor/dropout.py b/hypervideo_dl/extractor/dropout.py index e280b1c..80ae6c1 100644 --- a/hypervideo_dl/extractor/dropout.py +++ b/hypervideo_dl/extractor/dropout.py @@ -1,13 +1,17 @@ +import functools + from .common import InfoExtractor from .vimeo import VHXEmbedIE from ..utils import ( ExtractorError, + OnDemandPagedList, clean_html, + extract_attributes, get_element_by_class, get_element_by_id, - get_elements_by_class, + get_elements_html_by_class, int_or_none, - join_nonempty, + traverse_obj, unified_strdate, urlencode_postdata, ) @@ -162,12 +166,13 @@ class DropoutIE(InfoExtractor): class DropoutSeasonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)' + _PAGE_SIZE = 24 + _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P[^\/$&?#]+)(?:/?$|/season:(?P[0-9]+)/?$)' _TESTS = [ { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', 'note': 'Multi-season series with the season in the url', - 'playlist_count': 17, + 'playlist_count': 24, 'info_dict': { 'id': 'dimension-20-fantasy-high-season-1', 'title': 'Dimension 20 Fantasy High - Season 1' @@ -176,7 +181,7 @@ class DropoutSeasonIE(InfoExtractor): { 'url': 'https://www.dropout.tv/dimension-20-fantasy-high', 'note': 'Multi-season series with the season not in the url', - 'playlist_count': 17, + 'playlist_count': 24, 'info_dict': { 'id': 'dimension-20-fantasy-high-season-1', 'title': 'Dimension 20 Fantasy High - Season 1' @@ -190,29 +195,30 @@ class DropoutSeasonIE(InfoExtractor): 'id': 'dimension-20-shriek-week-season-1', 'title': 'Dimension 20 Shriek Week - Season 1' } + }, + { + 'url': 'https://www.dropout.tv/breaking-news-no-laugh-newsroom/season:3', + 'note': 'Multi-season series with season in the url that requires pagination', + 'playlist_count': 25, + 'info_dict': { + 'id': 'breaking-news-no-laugh-newsroom-season-3', + 'title': 'Breaking News No Laugh Newsroom - Season 3' + } } ] + def _fetch_page(self, url, season_id, page): + page += 1 + webpage = self._download_webpage( + f'{url}?page={page}', season_id, note=f'Downloading page {page}', expected_status={400}) + yield from [self.url_result(item_url, DropoutIE) for item_url in traverse_obj( + get_elements_html_by_class('browse-item-link', webpage), (..., {extract_attributes}, 'href'))] + def _real_extract(self, url): season_id = self._match_id(url) + season_num = self._match_valid_url(url).group('season') or 1 season_title = season_id.replace('-', ' ').title() - webpage = self._download_webpage(url, season_id) - - entries = [ - self.url_result( - url=self._search_regex(r']+selected>([^<]+)', - seasons, 'current_season', default='').strip() - return { - '_type': 'playlist', - 'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')), - 'title': join_nonempty(season_title, current_season, delim=' - '), - 'entries': entries - } + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, url, season_id), self._PAGE_SIZE), + f'{season_id}-season-{season_num}', f'{season_title} - Season {season_num}') diff --git a/hypervideo_dl/extractor/drtv.py b/hypervideo_dl/extractor/drtv.py index 128f439..6c381aa 100644 --- a/hypervideo_dl/extractor/drtv.py +++ b/hypervideo_dl/extractor/drtv.py @@ -2,28 +2,29 @@ import binascii import hashlib import re - from .common import InfoExtractor from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, mimetype2ext, str_or_none, - try_get, + traverse_obj, unified_timestamp, update_url_query, url_or_none, ) +SERIES_API = 'https://production-cdn.dr-massive.com/api/page?device=web_browser&item_detail_expand=all&lang=da&max_list_prefetch=3&path=%s' + class DRTVIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*| + (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?Pradio|lyd)(?:/ondemand)?)/(?:[^/]+/)*| (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ ) (?P[\da-z_-]+) @@ -78,7 +79,7 @@ class DRTVIE(InfoExtractor): 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', 'timestamp': 1546628400, 'upload_date': '20190104', - 'duration': 3504.618, + 'duration': 3504.619, 'formats': 'mincount:20', 'release_year': 2017, 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35', @@ -99,14 +100,16 @@ class DRTVIE(InfoExtractor): 'ext': 'mp4', 'title': 'Bonderøven 2019 (1:8)', 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd', - 'timestamp': 1603188600, - 'upload_date': '20201020', + 'timestamp': 1654856100, + 'upload_date': '20220610', 'duration': 2576.6, 'season': 'Bonderøven 2019', 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5', 'release_year': 2019, 'season_number': 2019, - 'series': 'Frank & Kastaniegaarden' + 'series': 'Frank & Kastaniegaarden', + 'episode_number': 1, + 'episode': 'Episode 1', }, 'params': { 'skip_download': True, @@ -138,16 +141,32 @@ class DRTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'this video has been removed', + }, { + 'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9', + 'info_dict': { + 'ext': 'mp4', + 'id': '14802310112', + 'timestamp': 1678786200, + 'duration': 120.043, + 'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f', + 'series': 'P4 København regionale nyheder', + 'upload_date': '20230314', + 'release_year': 0, + 'description': 'Hør seneste regionale nyheder fra P4 København.', + 'season': 'Regionale nyheder', + 'title': 'Regionale nyheder', + }, }] def _real_extract(self, url): - video_id = self._match_id(url) + raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio') - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, raw_video_id) if '>Programmet er ikke længere tilgængeligt' in webpage: raise ExtractorError( - 'Video %s is not available' % video_id, expected=True) + 'Video %s is not available' % raw_video_id, expected=True) video_id = self._search_regex( (r'data-(?:material-identifier|episode-slug)="([^"]+)"', @@ -168,20 +187,27 @@ class DRTVIE(InfoExtractor): programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) else: programcard_url = _PROGRAMCARD_BASE - page = self._parse_json( - self._search_regex( - r'data\s*=\s*({.+?})\s*(?:;|[\w-]+)_(?P\d+)' + _GEO_COUNTRIES = ['DK'] + _TESTS = [{ + 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_9008', + 'info_dict': { + 'id': '9008', + 'display_id': 'frank-and-kastaniegaarden', + 'title': 'Frank & Kastaniegaarden', + 'series': 'Frank & Kastaniegaarden', + }, + 'playlist_mincount': 8 + }, { + 'url': 'https://www.dr.dk/drtv/saeson/frank-and-kastaniegaarden_8761', + 'info_dict': { + 'id': '8761', + 'display_id': 'frank-and-kastaniegaarden', + 'title': 'Frank & Kastaniegaarden', + 'series': 'Frank & Kastaniegaarden', + }, + 'playlist_mincount': 19 + }] + + def _real_extract(self, url): + display_id, season_id = self._match_valid_url(url).group('display_id', 'id') + data = self._download_json(SERIES_API % f'/saeson/{display_id}_{season_id}', display_id) + + entries = [{ + '_type': 'url', + 'url': f'https://www.dr.dk/drtv{episode["path"]}', + 'ie_key': DRTVIE.ie_key(), + 'title': episode.get('title'), + 'episode': episode.get('episodeName'), + 'description': episode.get('shortDescription'), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')), + 'episode_number': episode.get('episodeNumber'), + } for episode in traverse_obj(data, ('entries', 0, 'item', 'episodes', 'items'))] + + return { + '_type': 'playlist', + 'id': season_id, + 'display_id': display_id, + 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'entries': entries, + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) + } + + +class DRTVSeriesIE(InfoExtractor): + IE_NAME = 'drtv:series' + _VALID_URL = r'https?://(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/serie/(?P[\w-]+)_(?P\d+)' + _GEO_COUNTRIES = ['DK'] + _TESTS = [{ + 'url': 'https://www.dr.dk/drtv/serie/frank-and-kastaniegaarden_6954', + 'info_dict': { + 'id': '6954', + 'display_id': 'frank-and-kastaniegaarden', + 'title': 'Frank & Kastaniegaarden', + 'series': 'Frank & Kastaniegaarden', + }, + 'playlist_mincount': 15 + }] + + def _real_extract(self, url): + display_id, series_id = self._match_valid_url(url).group('display_id', 'id') + data = self._download_json(SERIES_API % f'/serie/{display_id}_{series_id}', display_id) + + entries = [{ + '_type': 'url', + 'url': f'https://www.dr.dk/drtv{season.get("path")}', + 'ie_key': DRTVSeasonIE.ie_key(), + 'title': season.get('title'), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'season_number': traverse_obj(data, ('entries', 0, 'item', 'seasonNumber')) + } for season in traverse_obj(data, ('entries', 0, 'item', 'show', 'seasons', 'items'))] + + return { + '_type': 'playlist', + 'id': series_id, + 'display_id': display_id, + 'title': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'series': traverse_obj(data, ('entries', 0, 'item', 'title')), + 'entries': entries + } diff --git a/hypervideo_dl/extractor/dumpert.py b/hypervideo_dl/extractor/dumpert.py index 010c2d0..0cf8426 100644 --- a/hypervideo_dl/extractor/dumpert.py +++ b/hypervideo_dl/extractor/dumpert.py @@ -1,12 +1,17 @@ from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, qualities, ) class DumpertIE(InfoExtractor): - _VALID_URL = r'(?Phttps?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P[0-9]+[/_][0-9a-zA-Z]+)' + _VALID_URL = r'''(?x) + (?Phttps?)://(?:(?:www|legacy)\.)?dumpert\.nl(?: + /(?:mediabase|embed|item)/| + (?:/toppers|/latest|/?)\?selectedId= + )(?P[0-9]+[/_][0-9a-zA-Z]+)''' _TESTS = [{ 'url': 'https://www.dumpert.nl/item/6646981_951bc60f', 'md5': '1b9318d7d5054e7dcb9dc7654f21d643', @@ -16,6 +21,9 @@ class DumpertIE(InfoExtractor): 'title': 'Ik heb nieuws voor je', 'description': 'Niet schrikken hoor', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 9, + 'view_count': int, + 'like_count': int, } }, { 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7', @@ -26,6 +34,28 @@ class DumpertIE(InfoExtractor): }, { 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7', 'only_matching': True, + }, { + 'url': 'https://www.dumpert.nl/item/100031688_b317a185', + 'info_dict': { + 'id': '100031688/b317a185', + 'ext': 'mp4', + 'title': 'Epic schijnbeweging', + 'description': '

Die zag je niet eh

', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'duration': 12, + 'view_count': int, + 'like_count': int, + }, + 'params': {'skip_download': 'm3u8'} + }, { + 'url': 'https://www.dumpert.nl/toppers?selectedId=100031688_b317a185', + 'only_matching': True, + }, { + 'url': 'https://www.dumpert.nl/latest?selectedId=100031688_b317a185', + 'only_matching': True, + }, { + 'url': 'https://www.dumpert.nl/?selectedId=100031688_b317a185', + 'only_matching': True, }] def _real_extract(self, url): @@ -36,18 +66,23 @@ class DumpertIE(InfoExtractor): title = item['title'] media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO') - quality = qualities(['flv', 'mobile', 'tablet', '720p']) + quality = qualities(['flv', 'mobile', 'tablet', '720p', '1080p']) formats = [] for variant in media.get('variants', []): uri = variant.get('uri') if not uri: continue version = variant.get('version') - formats.append({ - 'url': uri, - 'format_id': version, - 'quality': quality(version), - }) + preference = quality(version) + if determine_ext(uri) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + uri, video_id, 'mp4', m3u8_id=version, quality=preference)) + else: + formats.append({ + 'url': uri, + 'format_id': version, + 'quality': preference, + }) thumbnails = [] stills = item.get('stills') or {} diff --git a/hypervideo_dl/extractor/eagleplatform.py b/hypervideo_dl/extractor/eagleplatform.py index 9ebd24d..739d179 100644 --- a/hypervideo_dl/extractor/eagleplatform.py +++ b/hypervideo_dl/extractor/eagleplatform.py @@ -2,7 +2,7 @@ import functools import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, int_or_none, @@ -111,8 +111,8 @@ class EaglePlatformIE(InfoExtractor): response = super(EaglePlatformIE, self)._download_json( url_or_request, video_id, *args, **kwargs) except ExtractorError as ee: - if isinstance(ee.cause, compat_HTTPError): - response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) + if isinstance(ee.cause, HTTPError): + response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id) self._handle_error(response) raise return response diff --git a/hypervideo_dl/extractor/ebay.py b/hypervideo_dl/extractor/ebay.py new file mode 100644 index 0000000..d0eb9fc --- /dev/null +++ b/hypervideo_dl/extractor/ebay.py @@ -0,0 +1,36 @@ +from .common import InfoExtractor +from ..utils import remove_end + + +class EbayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ebay\.com/itm/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.ebay.com/itm/194509326719', + 'info_dict': { + 'id': '194509326719', + 'ext': 'mp4', + 'title': 'WiFi internal antenna adhesive for wifi 2.4GHz wifi 5 wifi 6 wifi 6E full bands', + }, + 'params': {'skip_download': 'm3u8'} + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_json = self._search_json(r'"video":', webpage, 'video json', video_id) + + formats = [] + for key, url in video_json['playlistMap'].items(): + if key == 'HLS': + formats.extend(self._extract_m3u8_formats(url, video_id, fatal=False)) + elif key == 'DASH': + formats.extend(self._extract_mpd_formats(url, video_id, fatal=False)) + else: + self.report_warning(f'Unsupported format {key}', video_id) + + return { + 'id': video_id, + 'title': remove_end(self._html_extract_title(webpage), ' | eBay'), + 'formats': formats + } diff --git a/hypervideo_dl/extractor/eitb.py b/hypervideo_dl/extractor/eitb.py index bd027da..66afbb6 100644 --- a/hypervideo_dl/extractor/eitb.py +++ b/hypervideo_dl/extractor/eitb.py @@ -1,10 +1,6 @@ from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - parse_iso8601, - sanitized_Request, -) +from ..networking import Request +from ..utils import float_or_none, int_or_none, parse_iso8601 class EitbIE(InfoExtractor): @@ -54,7 +50,7 @@ class EitbIE(InfoExtractor): hls_url = media.get('HLS_SURL') if hls_url: - request = sanitized_Request( + request = Request( 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', headers={'Referer': url}) token_data = self._download_json( diff --git a/hypervideo_dl/extractor/elevensports.py b/hypervideo_dl/extractor/elevensports.py new file mode 100644 index 0000000..99c52b3 --- /dev/null +++ b/hypervideo_dl/extractor/elevensports.py @@ -0,0 +1,59 @@ +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + traverse_obj, + url_or_none, +) + + +class ElevenSportsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?elevensports\.com/view/event/(?P\w+)' + _TESTS = [{ + 'url': 'https://elevensports.com/view/event/clf46yr3kenn80jgrqsjmwefk', + 'md5': 'c0958d9ff90e4503a75544358758921d', + 'info_dict': { + 'id': 'clf46yr3kenn80jgrqsjmwefk', + 'title': 'Cleveland SC vs Lionsbridge FC', + 'ext': 'mp4', + 'description': 'md5:03b5238d6549f4ea1fddadf69b5e0b58', + 'upload_date': '20230323', + 'timestamp': 1679612400, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'params': {'skip_download': 'm3u8'} + }, { + 'url': 'https://elevensports.com/view/event/clhpyd53b06160jez74qhgkmf', + 'md5': 'c0958d9ff90e4503a75544358758921d', + 'info_dict': { + 'id': 'clhpyd53b06160jez74qhgkmf', + 'title': 'AJNLF vs ARRAF', + 'ext': 'mp4', + 'description': 'md5:c8c5e75c78f37c6d15cd6c475e43a8c1', + 'upload_date': '20230521', + 'timestamp': 1684684800, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + }, + 'params': {'skip_download': 'm3u8'} + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + event_id = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['event']['mclsEventId'] + event_data = self._download_json( + f'https://mcls-api.mycujoo.tv/bff/events/v1beta1/{event_id}', video_id, + headers={'Authorization': 'Bearer FBVKACGN37JQC5SFA0OVK8KKSIOP153G'}) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + event_data['streams'][0]['full_url'], video_id, 'mp4', m3u8_id='hls') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(event_data, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('start_time', {parse_iso8601}), + 'thumbnail': ('thumbnail_url', {url_or_none}), + }), + } diff --git a/hypervideo_dl/extractor/embedly.py b/hypervideo_dl/extractor/embedly.py index 483d018..458aaa0 100644 --- a/hypervideo_dl/extractor/embedly.py +++ b/hypervideo_dl/extractor/embedly.py @@ -1,24 +1,109 @@ import re import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from .youtube import YoutubeTabIE +from ..utils import parse_qs, smuggle_url, traverse_obj class EmbedlyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P[^#&]+)' + _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)' _TESTS = [{ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'info_dict': { + 'id': 'UUGLim4T2loE5rwCMdpCIPVg', + 'modified_date': '20221225', + 'view_count': int, + 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'uploader': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel': 'TraciJHines', + 'availability': 'public', + 'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'description': '', + 'tags': [], + 'title': 'Uploads from TraciJHines', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'params': {'noplaylist': True}, + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'age_limit': 0, + 'categories': ['Entertainment'], + 'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'TraciJHines', + 'uploader_id': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg', + 'uploader_url': 'http://www.youtube.com/user/TraciJHines', + 'upload_date': '20150211', + 'duration': 282, + 'availability': 'public', + 'channel_follower_count': int, + 'tags': 'count:39', + 'view_count': int, + 'comment_count': int, + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'like_count': int, + 'uploader': 'TraciJHines', + 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364', + 'chapters': list, + + }, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + 'url': 'http://www.permacultureetc.com/2022/12/comment-greffer-facilement-les-arbres-fruitiers.html', + 'info_dict': { + 'id': 'pfUK_ADTvgY', + 'ext': 'mp4', + 'title': 'Comment greffer facilement les arbres fruitiers ? (mois par mois)', + 'description': 'md5:d3a876995e522f138aabb48e040bfb4c', + 'view_count': int, + 'upload_date': '20221210', + 'comment_count': int, + 'live_status': 'not_live', + 'channel_id': 'UCsM4_jihNFYe4CtSkXvDR-Q', + 'channel_follower_count': int, + 'tags': ['permaculture', 'jardinage', 'dekarz', 'autonomie', 'greffe', 'fruitiers', 'arbres', 'jardin forêt', 'forêt comestible', 'damien'], + 'playable_in_embed': True, + 'uploader': 'permaculture agroécologie etc...', + 'channel': 'permaculture agroécologie etc...', + 'thumbnail': 'https://i.ytimg.com/vi/pfUK_ADTvgY/sddefault.jpg', + 'duration': 1526, + 'channel_url': 'https://www.youtube.com/channel/UCsM4_jihNFYe4CtSkXvDR-Q', + 'age_limit': 0, + 'uploader_id': 'permacultureetc', + 'like_count': int, + 'uploader_url': 'http://www.youtube.com/user/permacultureetc', + 'categories': ['Education'], + 'availability': 'public', + }, + }] + @classmethod - def _extract_embed_urls(cls, url, webpage): - # Bypass suitable check + def _extract_from_webpage(cls, url, webpage): + # Bypass "ie=cls" and suitable check for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P[^"\']+)', webpage): - yield mobj.group('url') + yield cls.url_result(mobj.group('url')) for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P[^&]+)', webpage): - yield urllib.parse.unquote(mobj.group('url')) + yield cls.url_result(urllib.parse.unquote(mobj.group('url'))) def _real_extract(self, url): - return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) + qs = parse_qs(url) + src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '') + if src and YoutubeTabIE.suitable(src): + return self.url_result(src, YoutubeTabIE) + return self.url_result(smuggle_url( + urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))), + {'http_headers': {'Referer': url}})) diff --git a/hypervideo_dl/extractor/eporner.py b/hypervideo_dl/extractor/eporner.py index a233797..aee2dee 100644 --- a/hypervideo_dl/extractor/eporner.py +++ b/hypervideo_dl/extractor/eporner.py @@ -52,7 +52,7 @@ class EpornerIE(InfoExtractor): webpage, urlh = self._download_webpage_handle(url, display_id) - video_id = self._match_id(urlh.geturl()) + video_id = self._match_id(urlh.url) hash = self._search_regex( r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') diff --git a/hypervideo_dl/extractor/espn.py b/hypervideo_dl/extractor/espn.py index f4b0134..7ed824c 100644 --- a/hypervideo_dl/extractor/espn.py +++ b/hypervideo_dl/extractor/espn.py @@ -240,7 +240,7 @@ class FiveThirtyEightIE(InfoExtractor): class ESPNCricInfoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/(?:cricket-)?videos?/[^#$&?/]+-(?P\d+)' _TESTS = [{ 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135', 'info_dict': { @@ -252,6 +252,17 @@ class ESPNCricInfoIE(InfoExtractor): 'duration': 96, }, 'params': {'skip_download': True} + }, { + 'url': 'https://www.espncricinfo.com/cricket-videos/daryl-mitchell-mitchell-santner-is-one-of-the-best-white-ball-spinners-india-vs-new-zealand-1356225', + 'info_dict': { + 'id': '1356225', + 'ext': 'mp4', + 'description': '"Santner has done it for a long time for New Zealand - we\'re lucky to have him"', + 'upload_date': '20230128', + 'title': 'Mitchell: \'Santner is one of the best white-ball spinners at the moment\'', + 'duration': 87, + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): diff --git a/hypervideo_dl/extractor/ettutv.py b/hypervideo_dl/extractor/ettutv.py new file mode 100644 index 0000000..133b525 --- /dev/null +++ b/hypervideo_dl/extractor/ettutv.py @@ -0,0 +1,60 @@ +from .common import InfoExtractor +from ..utils import bool_or_none, traverse_obj, unified_timestamp, url_or_none + + +class EttuTvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ettu\.tv/[^?#]+/playerpage/(?P[0-9]+)' + + _TESTS = [{ + 'url': 'https://www.ettu.tv/en-int/playerpage/1573849', + 'md5': '5874b7639a2aa866d1f6c3a4037c7c09', + 'info_dict': { + 'id': '1573849', + 'title': 'Ni Xia Lian - Shao Jieni', + 'description': 'ITTF Europe Top 16 Cup', + 'timestamp': 1677348600, + 'upload_date': '20230225', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.ettu.tv/en-int/playerpage/1573753', + 'md5': '1fc094bf96cf2d5ec0f434d3a6dec9aa', + 'info_dict': { + 'id': '1573753', + 'title': 'Qiu Dang - Jorgic Darko', + 'description': 'ITTF Europe Top 16 Cup', + 'timestamp': 1677423600, + 'upload_date': '20230226', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'ext': 'mp4', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_settings = self._download_json( + f'https://www.ettu.tv/api/v3/contents/{video_id}/player-settings', video_id, query={ + 'language': 'en', + 'showTitle': 'true', + 'device': 'desktop', + }) + + stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'') + + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + stream_response['data']['stream'], video_id, 'mp4') + + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + **traverse_obj(player_settings, { + 'title': 'title', + 'description': ('metaInformation', 'competition'), + 'thumbnail': ('image', {url_or_none}), + 'timestamp': ('date', {unified_timestamp}), + 'is_live': ('isLivestream', {bool_or_none}), + }) + } diff --git a/hypervideo_dl/extractor/europa.py b/hypervideo_dl/extractor/europa.py index c2b4937..f3da95f 100644 --- a/hypervideo_dl/extractor/europa.py +++ b/hypervideo_dl/extractor/europa.py @@ -3,8 +3,10 @@ from ..utils import ( int_or_none, orderedSet, parse_duration, + parse_iso8601, parse_qs, qualities, + traverse_obj, unified_strdate, xpath_text ) @@ -87,3 +89,85 @@ class EuropaIE(InfoExtractor): 'view_count': view_count, 'formats': formats } + + +class EuroParlWebstreamIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://multimedia\.europarl\.europa\.eu/[^/#?]+/ + (?:(?!video)[^/#?]+/[\w-]+_)(?P[\w-]+) + ''' + _TESTS = [{ + 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY', + 'info_dict': { + 'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d', + 'ext': 'mp4', + 'title': 'Plenary session', + 'release_timestamp': 1663139069, + 'release_date': '20220914', + }, + 'params': { + 'skip_download': True, + } + }, { + # live webstream + 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA', + 'info_dict': { + 'ext': 'mp4', + 'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715', + 'release_timestamp': 1668502800, + 'title': 'Euroscola 2022-11-15 19:21', + 'release_date': '20221115', + 'live_status': 'is_live', + }, + 'skip': 'not live anymore' + }, { + 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT', + 'info_dict': { + 'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7', + 'ext': 'mp4', + 'release_date': '20230301', + 'title': 'Committee on Culture and Education', + 'release_timestamp': 1677666641, + } + }, { + # live stream + 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI', + 'info_dict': { + 'id': 'e4255f56-10aa-4b3c-6530-08db56d5b0d9', + 'ext': 'mp4', + 'release_date': '20230524', + 'title': r're:Committee on Environment, Public Health and Food Safety \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}', + 'release_timestamp': 1684911541, + 'live_status': 'is_live', + }, + 'skip': 'Not live anymore' + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + webpage_nextjs = self._search_nextjs_data(webpage, display_id)['props']['pageProps'] + + json_info = self._download_json( + 'https://acs-api.europarl.connectedviews.eu/api/FullMeeting', display_id, + query={ + 'api-version': 1.0, + 'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968', + 'externalReference': display_id + }) + + formats, subtitles = [], {} + for hls_url in traverse_obj(json_info, ((('meetingVideo'), ('meetingVideos', ...)), 'hlsUrl')): + fmt, subs = self._extract_m3u8_formats_and_subtitles(hls_url, display_id) + formats.extend(fmt) + self._merge_subtitles(subs, target=subtitles) + + return { + 'id': json_info['id'], + 'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False), + 'formats': formats, + 'subtitles': subtitles, + 'release_timestamp': parse_iso8601(json_info.get('startDateTime')), + 'is_live': traverse_obj(webpage_nextjs, ('mediaItem', 'mediaSubType')) == 'Live' + } diff --git a/hypervideo_dl/extractor/eurosport.py b/hypervideo_dl/extractor/eurosport.py index 654e112..6c426bb 100644 --- a/hypervideo_dl/extractor/eurosport.py +++ b/hypervideo_dl/extractor/eurosport.py @@ -3,7 +3,7 @@ from ..utils import traverse_obj class EurosportIE(InfoExtractor): - _VALID_URL = r'https?://www\.eurosport\.com/\w+/[\w-]+/\d+/[\w-]+_(?Pvid\d+)' + _VALID_URL = r'https?://www\.eurosport\.com/\w+/(?:[\w-]+/[\d-]+/)?[\w-]+_(?Pvid\d+)' _TESTS = [{ 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml', 'info_dict': { @@ -44,6 +44,32 @@ class EurosportIE(InfoExtractor): 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71', 'upload_date': '20220727', } + }, { + 'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml', + 'info_dict': { + 'id': '3096477', + 'ext': 'mp4', + 'title': 'md5:82edc17370124c7a19b3cf518517583b', + 'duration': 84.0, + 'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb', + 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg', + 'timestamp': 1681292028, + 'upload_date': '20230412', + 'display_id': 'vid1896254', + } + }, { + 'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml', + 'info_dict': { + 'id': '3149108', + 'ext': 'mp4', + 'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final', + 'description': 'md5:89ef142fe0170a66abab77fac2955d8e', + 'display_id': 'vid1914115', + 'timestamp': 1684403618, + 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg', + 'duration': 105.0, + 'upload_date': '20230518', + } }] _TOKEN = None diff --git a/hypervideo_dl/extractor/extractors.py b/hypervideo_dl/extractor/extractors.py index 610e02f..baa69d2 100644 --- a/hypervideo_dl/extractor/extractors.py +++ b/hypervideo_dl/extractor/extractors.py @@ -1,10 +1,10 @@ import contextlib import os -from ..utils import load_plugins +from ..plugins import load_plugins # NB: Must be before other imports so that plugins can be correctly injected -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {}) +_PLUGIN_CLASSES = load_plugins('extractor', 'IE') _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): @@ -24,3 +24,5 @@ if not _LAZY_LOADER: globals().update(_PLUGIN_CLASSES) _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() + +from .common import _PLUGIN_OVERRIDES # noqa: F401 diff --git a/hypervideo_dl/extractor/facebook.py b/hypervideo_dl/extractor/facebook.py index a58d9c8..021c3cf 100644 --- a/hypervideo_dl/extractor/facebook.py +++ b/hypervideo_dl/extractor/facebook.py @@ -8,6 +8,8 @@ from ..compat import ( compat_str, compat_urllib_parse_unquote, ) +from ..networking import Request +from ..networking.exceptions import network_exceptions from ..utils import ( ExtractorError, clean_html, @@ -19,11 +21,10 @@ from ..utils import ( int_or_none, js_to_json, merge_dicts, - network_exceptions, parse_count, parse_qs, qualities, - sanitized_Request, + str_or_none, traverse_obj, try_get, url_or_none, @@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '274175099429670', 'ext': 'mp4', - 'title': 'Asif Nawab Butt', - 'description': 'Asif Nawab Butt', + 'title': 'Asif', + 'description': '', 'uploader': 'Asif Nawab Butt', 'upload_date': '20140506', 'timestamp': 1399398998, 'thumbnail': r're:^https?://.*', + 'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl', + 'duration': 131.03, + 'concurrent_view_count': int, }, - 'expected_warnings': [ - 'title' - ] }, { 'note': 'Video with DASH manifest', 'url': 'https://www.facebook.com/video.php?v=957955867617029', @@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor): # have 1080P, but only up to 720p in swf params # data.video.story.attachments[].media 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', - 'md5': '3f3798adb2b73423263e59376f1f5eb7', + 'md5': 'ca63897a90c9452efee5f8c40d080e25', 'info_dict': { 'id': '10155529876156509', 'ext': 'mp4', @@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor): 'uploader': 'CNN', 'thumbnail': r're:^https?://.*', 'view_count': int, + 'uploader_id': '100059479812265', + 'concurrent_view_count': int, + 'duration': 44.478, }, }, { # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall @@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor): 'info_dict': { 'id': '1417995061575415', 'ext': 'mp4', - 'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео', + 'title': 'Довгоочікуване відео | By Yaroslav - Facebook', 'description': 'Довгоочікуване відео', - 'timestamp': 1486648771, + 'timestamp': 1486648217, 'upload_date': '20170209', 'uploader': 'Yaroslav Korpan', - 'uploader_id': '100000948048708', + 'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl', + 'concurrent_view_count': int, + 'thumbnail': r're:^https?://.*', + 'view_count': int, + 'duration': 11736.446, }, 'params': { 'skip_download': True, @@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'La Guía Del Varón', 'thumbnail': r're:^https?://.*', }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Requires logging in', }, { # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', @@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor): 'uploader': 'Elisabeth Ahtn', 'uploader_id': '100013949973717', }, - 'params': { - 'skip_download': True, - }, + 'skip': 'Requires logging in', }, { 'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'only_matching': True, @@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor): 'timestamp': 1527084179, 'upload_date': '20180523', 'uploader': 'ESL One Dota 2', - 'uploader_id': '234218833769558', + 'uploader_id': '100066514874195', + 'duration': 4524.212, + 'view_count': int, + 'thumbnail': r're:^https?://.*', + 'concurrent_view_count': int, }, 'params': { 'skip_download': True, @@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor): 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'info_dict': { 'id': '106560053808006', + 'ext': 'mp4', + 'title': 'Josef', + 'thumbnail': r're:^https?://.*', + 'concurrent_view_count': int, + 'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl', + 'timestamp': 1549275572, + 'duration': 3.413, + 'uploader': 'Josef Novak', + 'description': '', + 'upload_date': '20190204', }, - 'playlist_count': 2, }, { # data.video.story.attachments[].media 'url': 'https://www.facebook.com/watch/?v=647537299265662', @@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor): 'id': '10157667649866271', }, 'playlist_count': 3, + 'skip': 'Requires logging in', }, { # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330', @@ -319,7 +337,7 @@ class FacebookIE(InfoExtractor): } def _perform_login(self, username, password): - login_page_req = sanitized_Request(self._LOGIN_URL) + login_page_req = Request(self._LOGIN_URL) self._set_cookie('facebook.com', 'locale', 'en_US') login_page = self._download_webpage(login_page_req, None, note='Downloading login page', @@ -340,8 +358,8 @@ class FacebookIE(InfoExtractor): 'timezone': '-60', 'trynum': '1', } - request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') + request = Request(self._LOGIN_URL, urlencode_postdata(login_form)) + request.headers['Content-Type'] = 'application/x-www-form-urlencoded' try: login_results = self._download_webpage(request, None, note='Logging in', errnote='unable to fetch login page') @@ -367,8 +385,8 @@ class FacebookIE(InfoExtractor): 'h': h, 'name_action_selected': 'dont_save', } - check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) - check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') + check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) + check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded' check_response = self._download_webpage(check_req, None, note='Confirming login') if re.search(r'id="checkpointSubmitButton"', check_response) is not None: @@ -390,7 +408,10 @@ class FacebookIE(InfoExtractor): k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) - uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} + uploader_data = ( + get_first(media, ('owner', {dict})) + or get_first(post, (..., 'video', lambda k, v: k == 'owner' and v['name'])) + or get_first(post, ('node', 'actors', ..., {dict})) or {}) page_title = title or self._html_search_regex(( r']*class="uiHeaderTitle"[^>]*>(?P[^<]*)', @@ -415,16 +436,17 @@ class FacebookIE(InfoExtractor): # in https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/ if thumbnail and not re.search(r'\.(?:jpg|png)', thumbnail): thumbnail = None - view_count = parse_count(self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None)) info_dict = { 'description': description, 'uploader': uploader, 'uploader_id': uploader_data.get('id'), 'timestamp': timestamp, 'thumbnail': thumbnail, - 'view_count': view_count, + 'view_count': parse_count(self._search_regex( + (r'\bviewCount\s*:\s*["\']([\d,.]+)', r'video_view_count["\']\s*:\s*(\d+)',), + webpage, 'view count', default=None)), + 'concurrent_view_count': get_first(post, ( + ('video', (..., ..., 'attachments', ..., 'media')), 'liveViewerCount', {int_or_none})), } info_json_ld = self._search_json_ld(webpage, video_id, default={}) @@ -459,7 +481,8 @@ class FacebookIE(InfoExtractor): dash_manifest = video.get('dash_manifest') if dash_manifest: formats.extend(self._parse_mpd_formats( - compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)))) + compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), + mpd_url=video.get('dash_manifest_url'))) def process_formats(info): # Downloads with browser's User-Agent are rate limited. Working around @@ -493,6 +516,13 @@ class FacebookIE(InfoExtractor): entries = [] def parse_graphql_video(video): + v_id = video.get('videoId') or video.get('id') or video_id + reel_info = traverse_obj( + video, ('creation_story', 'short_form_video_context', 'playback_video', {dict})) + if reel_info: + video = video['creation_story'] + video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) + video.update(reel_info) formats = [] q = qualities(['sd', 'hd']) for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), @@ -509,15 +539,15 @@ class FacebookIE(InfoExtractor): 'url': playable_url, }) extract_dash_manifest(video, formats) - v_id = video.get('videoId') or video.get('id') or video_id info = { 'id': v_id, 'formats': formats, 'thumbnail': traverse_obj( video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), - 'uploader_id': try_get(video, lambda x: x['owner']['id']), - 'timestamp': int_or_none(video.get('publish_time')), - 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), + 'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})), + 'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none), + 'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000) + or float_or_none(video.get('length_in_second'))), } process_formats(info) description = try_get(video, lambda x: x['savable_description']['text']) @@ -778,18 +808,18 @@ class FacebookReelIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.facebook.com/reel/1195289147628387', - 'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831', + 'md5': 'f13dd37f2633595982db5ed8765474d3', 'info_dict': { 'id': '1195289147628387', 'ext': 'mp4', - 'title': 'md5:9f5b142921b2dc57004fa13f76005f87', - 'description': 'md5:24ea7ef062215d295bdde64e778f5474', - 'uploader': 'Beast Camp Training', - 'uploader_id': '1738535909799870', - 'duration': 9.536, - 'thumbnail': r're:^https?://.*', + 'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e', + 'description': 'md5:22f03309b216ac84720183961441d8db', + 'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1', + 'uploader_id': '100040874179269', + 'duration': 9.579, + 'timestamp': 1637502609, 'upload_date': '20211121', - 'timestamp': 1637502604, + 'thumbnail': r're:^https?://.*', } }] diff --git a/hypervideo_dl/extractor/fc2.py b/hypervideo_dl/extractor/fc2.py index dd5e088..ba19b6c 100644 --- a/hypervideo_dl/extractor/fc2.py +++ b/hypervideo_dl/extractor/fc2.py @@ -3,11 +3,11 @@ import re from .common import InfoExtractor from ..compat import compat_parse_qs from ..dependencies import websockets +from ..networking import Request from ..utils import ( ExtractorError, WebSocketsWrapper, js_to_json, - sanitized_Request, traverse_obj, update_url_query, urlencode_postdata, @@ -57,7 +57,7 @@ class FC2IE(InfoExtractor): } login_data = urlencode_postdata(login_form_strs) - request = sanitized_Request( + request = Request( 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') @@ -66,7 +66,7 @@ class FC2IE(InfoExtractor): return False # this is also needed - login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done') + login_redir = Request('http://id.fc2.com/?mode=redirect&login=done') self._download_webpage( login_redir, None, note='Login redirect', errnote='Login redirect failed') diff --git a/hypervideo_dl/extractor/fifa.py b/hypervideo_dl/extractor/fifa.py index dc00edc..8b4db3a 100644 --- a/hypervideo_dl/extractor/fifa.py +++ b/hypervideo_dl/extractor/fifa.py @@ -17,8 +17,10 @@ class FifaIE(InfoExtractor): 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', 'ext': 'mp4', 'categories': ['FIFA Tournaments'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero', 'duration': 8165, + 'release_timestamp': 1152403200, + 'release_date': '20060709', }, 'params': {'skip_download': 'm3u8'}, }, { @@ -54,7 +56,7 @@ class FifaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) preconnect_link = self._search_regex( - r']+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') video_details = self._download_json( f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) @@ -62,22 +64,9 @@ class FifaIE(InfoExtractor): preplay_parameters = self._download_json( f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] - cid = preplay_parameters['contentId'] content_data = self._download_json( - f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ - 'v': preplay_parameters['preplayAPIVersion'], - 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], - 'rn': preplay_parameters['randomNumber'], - 'exp': preplay_parameters['tokenExpirationDate'], - 'ct': preplay_parameters['contentType'], - 'cid': cid, - 'mbtracks': preplay_parameters['tracksAssetNumber'], - 'ad': preplay_parameters['adConfiguration'], - 'ad.preroll': int(preplay_parameters['adPreroll']), - 'ad.cmsid': preplay_parameters['adCMSSourceId'], - 'ad.vid': preplay_parameters['adSourceVideoID'], - 'sig': preplay_parameters['signature'], - }) + 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), + video_id, 'Downloading Content Data') formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) diff --git a/hypervideo_dl/extractor/filmon.py b/hypervideo_dl/extractor/filmon.py index 9a93cb9..0cd18f4 100644 --- a/hypervideo_dl/extractor/filmon.py +++ b/hypervideo_dl/extractor/filmon.py @@ -1,8 +1,6 @@ from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_HTTPError, -) +from ..compat import compat_str +from ..networking.exceptions import HTTPError from ..utils import ( qualities, strip_or_none, @@ -40,8 +38,8 @@ class FilmOnIE(InfoExtractor): 'https://www.filmon.com/api/vod/movie?id=%s' % video_id, video_id)['response'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason'] + if isinstance(e.cause, HTTPError): + errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason'] raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) raise @@ -124,8 +122,8 @@ class FilmOnChannelIE(InfoExtractor): channel_data = self._download_json( 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message'] + if isinstance(e.cause, HTTPError): + errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message'] raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) raise diff --git a/hypervideo_dl/extractor/fox.py b/hypervideo_dl/extractor/fox.py index 15c0c48..e00e977 100644 --- a/hypervideo_dl/extractor/fox.py +++ b/hypervideo_dl/extractor/fox.py @@ -3,10 +3,10 @@ import uuid from .common import InfoExtractor from ..compat import ( - compat_HTTPError, compat_str, compat_urllib_parse_unquote, ) +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, int_or_none, @@ -20,7 +20,7 @@ from ..utils import ( class FOXIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P[\da-fA-F]+)' + _VALID_URL = r'https?://(?:www\.)?fox(?:sports)?\.com/(?:watch|replay)/(?P[\da-fA-F]+)' _TESTS = [{ # clip 'url': 'https://www.fox.com/watch/4b765a60490325103ea69888fb2bd4e8/', @@ -50,6 +50,10 @@ class FOXIE(InfoExtractor): # sports event, geo-restricted 'url': 'https://www.fox.com/watch/b057484dade738d1f373b3e46216fa2c/', 'only_matching': True, + }, { + # fox sports replay, geo-restricted + 'url': 'https://www.foxsports.com/replay/561f3e071347a24e5e877abc56b22e89', + 'only_matching': True, }] _GEO_BYPASS = False _HOME_PAGE_URL = 'https://www.fox.com/' @@ -68,9 +72,9 @@ class FOXIE(InfoExtractor): 'https://api3.fox.com/v2.0/' + path, video_id, data=data, headers=headers) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: entitlement_issues = self._parse_json( - e.cause.read().decode(), video_id)['entitlementIssues'] + e.cause.response.read().decode(), video_id)['entitlementIssues'] for e in entitlement_issues: if e.get('errorCode') == 1005: raise ExtractorError( @@ -123,8 +127,8 @@ class FOXIE(InfoExtractor): try: m3u8_url = self._download_json(release_url, video_id)['playURL'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - error = self._parse_json(e.cause.read().decode(), video_id) + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + error = self._parse_json(e.cause.response.read().decode(), video_id) if error.get('exception') == 'GeoLocationBlocked': self.raise_geo_restricted(countries=['US']) raise ExtractorError(error['description'], expected=True) diff --git a/hypervideo_dl/extractor/foxnews.py b/hypervideo_dl/extractor/foxnews.py index 52172aa..6aa6361 100644 --- a/hypervideo_dl/extractor/foxnews.py +++ b/hypervideo_dl/extractor/foxnews.py @@ -7,8 +7,37 @@ from .common import InfoExtractor class FoxNewsIE(AMPIE): IE_NAME = 'foxnews' IE_DESC = 'Fox News and Fox Business Video' - _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' + _VALID_URL = r'https?://video\.(?:insider\.)?fox(?:news|business)\.com/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ + { + 'url': 'https://video.foxnews.com/v/6320653836112', + 'info_dict': { + 'id': '6320653836112', + 'ext': 'mp4', + 'title': 'Tucker Carlson joins \'Gutfeld!\' to discuss his new documentary', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 404, + 'upload_date': '20230217', + 'description': 'md5:858a8a36f59e9ca897d758855bcdfa02', + 'timestamp': 1676611344.0, + }, + 'params': {'skip_download': 'm3u8'}, + }, + { + # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words + 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', + 'info_dict': { + 'id': '5099377331001', + 'ext': 'mp4', + 'title': '82416_censoring', + 'description': '82416_censoring', + 'upload_date': '20160826', + 'timestamp': 1472169708.0, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 521, + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'http://video.foxnews.com/v/3937480/frozen-in-time/#sp=show-clips', 'md5': '32aaded6ba3ef0d1c04e238d01031e5e', @@ -22,6 +51,7 @@ class FoxNewsIE(AMPIE): 'upload_date': '20110503', 'thumbnail': r're:^https?://.*\.jpg$', }, + 'skip': '404 page', }, { 'url': 'http://video.foxnews.com/v/3922535568001/rep-luis-gutierrez-on-if-obamas-immigration-plan-is-legal/#sp=show-clips', @@ -36,10 +66,7 @@ class FoxNewsIE(AMPIE): 'upload_date': '20141204', 'thumbnail': r're:^https?://.*\.jpg$', }, - 'params': { - # m3u8 download - 'skip_download': True, - }, + 'skip': 'm3u8 HTTP error 400 in web browser', }, { 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', @@ -49,11 +76,6 @@ class FoxNewsIE(AMPIE): 'url': 'http://video.foxbusiness.com/v/4442309889001', 'only_matching': True, }, - { - # From http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words - 'url': 'http://video.insider.foxnews.com/v/video-embed.html?video_id=5099377331001&autoplay=true&share_url=http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words&share_title=Student%20Group:%20Saying%20%27Politically%20Correct,%27%20%27Trash%27%20and%20%27Lame%27%20Is%20Offensive&share=true', - 'only_matching': True, - }, ] @classmethod @@ -67,10 +89,10 @@ class FoxNewsIE(AMPIE): yield f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}' def _real_extract(self, url): - host, video_id = self._match_valid_url(url).groups() + video_id = self._match_id(url) info = self._extract_feed_info( - 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) + f'https://api.foxnews.com/v3/video-player/{video_id}?callback=uid_{video_id}') info['id'] = video_id return info @@ -78,6 +100,19 @@ class FoxNewsIE(AMPIE): class FoxNewsVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxnews\.com/video/(?P\d+)' _TESTS = [{ + 'url': 'https://www.foxnews.com/video/6328632286112', + 'info_dict': { + 'id': '6328632286112', + 'ext': 'mp4', + 'title': 'Review: 2023 Toyota Prius Prime', + 'duration': 155, + 'thumbnail': r're:^https://.+\.jpg$', + 'timestamp': 1685720177.0, + 'upload_date': '20230602', + 'description': 'md5:b69aafb125b41c1402e9744f53d6edc4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'https://www.foxnews.com/video/6313058664112', 'info_dict': { 'id': '6313058664112', @@ -89,8 +124,7 @@ class FoxNewsVideoIE(InfoExtractor): 'title': 'Gutfeld! - Thursday, September 29', 'timestamp': 1664527538, }, - 'expected_warnings': ['Ignoring subtitle tracks'], - 'params': {'skip_download': 'm3u8'}, + 'skip': '404 page', }] def _real_extract(self, url): @@ -104,19 +138,22 @@ class FoxNewsArticleIE(InfoExtractor): _TESTS = [{ # data-video-id - 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', - 'md5': '83d44e1aff1433e7a29a7b537d1700b5', + 'url': 'https://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': 'd2dd6ce809cedeefa96460e964821437', 'info_dict': { 'id': '5116295019001', 'ext': 'mp4', 'title': 'Trump and Clinton asked to defend positions on Iraq War', - 'description': 'Veterans react on \'The Kelly File\'', + 'description': 'Veterans and Fox News host Dana Perino react on \'The Kelly File\' to NBC\'s presidential forum', 'timestamp': 1473301045, 'upload_date': '20160908', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 426, }, + 'params': {'skip_download': 'm3u8'}, }, { # iframe embed - 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', + 'url': 'https://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', 'info_dict': { 'id': '5748266721001', 'ext': 'flv', @@ -127,9 +164,7 @@ class FoxNewsArticleIE(InfoExtractor): 'timestamp': 1520594670, 'upload_date': '20180309', }, - 'params': { - 'skip_download': True, - }, + 'skip': '404 page', }, { 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', 'only_matching': True, diff --git a/hypervideo_dl/extractor/foxsports.py b/hypervideo_dl/extractor/foxsports.py index f9d7fe5..8e89ccf 100644 --- a/hypervideo_dl/extractor/foxsports.py +++ b/hypervideo_dl/extractor/foxsports.py @@ -1,31 +1,52 @@ from .common import InfoExtractor +from .uplynk import UplynkPreplayIE +from ..networking import HEADRequest +from ..utils import float_or_none, make_archive_id, smuggle_url class FoxSportsIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P\d+)' - - _TEST = { - 'url': 'http://www.foxsports.com/tennessee/video/432609859715', - 'md5': 'b49050e955bebe32c301972e4012ac17', + _VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.foxsports.com/watch/play-612168c6700004b', 'info_dict': { - 'id': '432609859715', + 'id': 'b72f5bd8658140baa5791bb676433733', 'ext': 'mp4', - 'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', - 'description': 'Courtney Lee talks about Memphis being focused.', - # TODO: fix timestamp - 'upload_date': '19700101', # '20150423', - # 'timestamp': 1429761109, - 'uploader': 'NEWA-FNG-FOXSPORTS', + 'display_id': 'play-612168c6700004b', + 'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a', + 'description': 'md5:371bc43609708ae2b9e1a939229762af', + 'uploader_id': '06b4a36349624051a9ba52ac3a91d268', + 'upload_date': '20221205', + 'timestamp': 1670262586, + 'duration': 31.7317, + 'thumbnail': r're:^https?://.*\.jpg$', + 'extra_param_to_segment_url': str, }, 'params': { - # m3u8 download - 'skip_download': True, + 'skip_download': 'm3u8', }, - 'add_ie': ['ThePlatform'], - } + }] def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) + data = self._download_json( + f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}', + video_id, note='Downloading API JSON', headers={ + 'x-api-key': 'cf289e299efdfa39fb6316f259d1de93', + }) + preplay_url = self._request_webpage( + HEADRequest(data['url']), video_id, 'Fetching preplay URL').url - return self.url_result( - 'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed') + return { + '_type': 'url_transparent', + 'ie_key': UplynkPreplayIE.ie_key(), + 'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}), + 'display_id': video_id, + 'title': data.get('name') or json_ld.get('title'), + 'description': data.get('description') or json_ld.get('description'), + 'duration': float_or_none(data.get('durationInSeconds')), + 'timestamp': json_ld.get('timestamp'), + 'thumbnails': json_ld.get('thumbnails'), + '_old_archive_ids': [make_archive_id(self, video_id)], + } diff --git a/hypervideo_dl/extractor/freesound.py b/hypervideo_dl/extractor/freesound.py index 8b5f227..fcde044 100644 --- a/hypervideo_dl/extractor/freesound.py +++ b/hypervideo_dl/extractor/freesound.py @@ -52,6 +52,7 @@ class FreesoundIE(InfoExtractor): tags_str = get_element_by_class('tags', webpage) tags = re.findall(r']+>([^<]+)', tags_str) if tags_str else None + audio_url = re.sub(r'^https?://freesound\.org(https?://)', r'\1', audio_url) audio_urls = [audio_url] LQ_FORMAT = '-lq.mp3' diff --git a/hypervideo_dl/extractor/fujitv.py b/hypervideo_dl/extractor/fujitv.py index 668bb27..77e826e 100644 --- a/hypervideo_dl/extractor/fujitv.py +++ b/hypervideo_dl/extractor/fujitv.py @@ -1,5 +1,5 @@ -from ..utils import HEADRequest from .common import InfoExtractor +from ..networking import HEADRequest class FujiTVFODPlus7IE(InfoExtractor): diff --git a/hypervideo_dl/extractor/funimation.py b/hypervideo_dl/extractor/funimation.py index 18363c1..41de85c 100644 --- a/hypervideo_dl/extractor/funimation.py +++ b/hypervideo_dl/extractor/funimation.py @@ -3,7 +3,7 @@ import re import string from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, determine_ext, @@ -46,8 +46,8 @@ class FunimationBaseIE(InfoExtractor): })) FunimationBaseIE._TOKEN = data['token'] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read().decode(), None)['error'] + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + error = self._parse_json(e.cause.response.read().decode(), None)['error'] raise ExtractorError(error, expected=True) raise @@ -210,7 +210,7 @@ class FunimationIE(FunimationBaseIE): page = self._download_json( 'https://www.funimation.com/api/showexperience/%s/' % experience_id, display_id, headers=headers, expected_status=403, query={ - 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), + 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)), }, note=f'Downloading {format_name} JSON') sources = page.get('items') or [] if not sources: diff --git a/hypervideo_dl/extractor/funker530.py b/hypervideo_dl/extractor/funker530.py new file mode 100644 index 0000000..ba5ab7d --- /dev/null +++ b/hypervideo_dl/extractor/funker530.py @@ -0,0 +1,79 @@ +from .common import InfoExtractor +from .rumble import RumbleEmbedIE +from .youtube import YoutubeIE +from ..utils import ExtractorError, clean_html, get_element_by_class, strip_or_none + + +class Funker530IE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?funker530\.com/video/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://funker530.com/video/azov-patrol-caught-in-open-under-automatic-grenade-launcher-fire/', + 'md5': '085f50fea27523a388bbc22e123e09c8', + 'info_dict': { + 'id': 'v2qbmu4', + 'ext': 'mp4', + 'title': 'Azov Patrol Caught In Open Under Automatic Grenade Launcher Fire', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Funker530', + 'channel': 'Funker530', + 'channel_url': 'https://rumble.com/c/c-1199543', + 'width': 1280, + 'height': 720, + 'fps': 25, + 'duration': 27, + 'upload_date': '20230608', + 'timestamp': 1686241321, + 'live_status': 'not_live', + 'description': 'md5:bea2e1f458095414e04b5ac189c2f980', + } + }, { + 'url': 'https://funker530.com/video/my-friends-joined-the-russians-civdiv/', + 'md5': 'a42c2933391210662e93e867d7124b70', + 'info_dict': { + 'id': 'k-pk4bOvoac', + 'ext': 'mp4', + 'view_count': int, + 'channel': 'Civ Div', + 'comment_count': int, + 'channel_follower_count': int, + 'thumbnail': 'https://i.ytimg.com/vi/k-pk4bOvoac/maxresdefault.jpg', + 'uploader_id': '@CivDiv', + 'duration': 357, + 'channel_url': 'https://www.youtube.com/channel/UCgsCiwJ88up-YyMHo7hL5-A', + 'tags': [], + 'uploader_url': 'https://www.youtube.com/@CivDiv', + 'channel_id': 'UCgsCiwJ88up-YyMHo7hL5-A', + 'like_count': int, + 'description': 'md5:aef75ec3f59c07a0e39400f609b24429', + 'live_status': 'not_live', + 'age_limit': 0, + 'uploader': 'Civ Div', + 'categories': ['People & Blogs'], + 'title': 'My “Friends” joined the Russians.', + 'availability': 'public', + 'upload_date': '20230608', + 'playable_in_embed': True, + 'heatmap': 'count:100', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + rumble_url = list(RumbleEmbedIE._extract_embed_urls(url, webpage)) + if rumble_url: + info = {'url': rumble_url[0], 'ie_key': RumbleEmbedIE.ie_key()} + else: + youtube_url = list(YoutubeIE._extract_embed_urls(url, webpage)) + if youtube_url: + info = {'url': youtube_url[0], 'ie_key': YoutubeIE.ie_key()} + if not info: + raise ExtractorError('No videos found on webpage', expected=True) + + return { + **info, + '_type': 'url_transparent', + 'description': strip_or_none(self._search_regex( + r'(?s)(.+)About the Author', clean_html(get_element_by_class('video-desc-paragraph', webpage)), + 'description', default=None)) + } diff --git a/hypervideo_dl/extractor/gamejolt.py b/hypervideo_dl/extractor/gamejolt.py index 440b832..8ec046b 100644 --- a/hypervideo_dl/extractor/gamejolt.py +++ b/hypervideo_dl/extractor/gamejolt.py @@ -48,7 +48,7 @@ class GameJoltBaseIE(InfoExtractor): post_hash_id, note='Downloading comments list page %d' % page) if not comments_data.get('comments'): break - for comment in traverse_obj(comments_data, (('comments', 'childComments'), ...), expected_type=dict, default=[]): + for comment in traverse_obj(comments_data, (('comments', 'childComments'), ...), expected_type=dict): yield { 'id': comment['id'], 'text': self._parse_content_as_text( diff --git a/hypervideo_dl/extractor/gdcvault.py b/hypervideo_dl/extractor/gdcvault.py index 2878bbd..4265feb 100644 --- a/hypervideo_dl/extractor/gdcvault.py +++ b/hypervideo_dl/extractor/gdcvault.py @@ -2,13 +2,8 @@ import re from .common import InfoExtractor from .kaltura import KalturaIE -from ..utils import ( - HEADRequest, - remove_start, - sanitized_Request, - smuggle_url, - urlencode_postdata, -) +from ..networking import HEADRequest, Request +from ..utils import remove_start, smuggle_url, urlencode_postdata class GDCVaultIE(InfoExtractor): @@ -138,8 +133,8 @@ class GDCVaultIE(InfoExtractor): 'password': password, } - request = sanitized_Request(login_url, urlencode_postdata(login_form)) - request.add_header('Content-Type', 'application/x-www-form-urlencoded') + request = Request(login_url, urlencode_postdata(login_form)) + request.headers['Content-Type'] = 'application/x-www-form-urlencoded' self._download_webpage(request, display_id, 'Logging in') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') self._download_webpage(logout_url, display_id, 'Logging out') @@ -163,7 +158,7 @@ class GDCVaultIE(InfoExtractor): video_url = 'http://www.gdcvault.com' + direct_url # resolve the url so that we can detect the correct extension video_url = self._request_webpage( - HEADRequest(video_url), video_id).geturl() + HEADRequest(video_url), video_id).url return { 'id': video_id, diff --git a/hypervideo_dl/extractor/generic.py b/hypervideo_dl/extractor/generic.py index f28a77e..77b6fb3 100644 --- a/hypervideo_dl/extractor/generic.py +++ b/hypervideo_dl/extractor/generic.py @@ -14,7 +14,9 @@ from ..utils import ( ExtractorError, UnsupportedError, determine_ext, + determine_protocol, dict_get, + extract_basic_auth, format_field, int_or_none, is_html, @@ -31,7 +33,9 @@ from ..utils import ( unescapeHTML, unified_timestamp, unsmuggle_url, + update_url_query, url_or_none, + urljoin, variadic, xpath_attr, xpath_text, @@ -864,21 +868,7 @@ class GenericIE(InfoExtractor): }, }, { - # JWPlayer config passed as variable - 'url': 'http://www.txxx.com/videos/3326530/ariele/', - 'info_dict': { - 'id': '3326530_hq', - 'ext': 'mp4', - 'title': 'ARIELE | Tube Cup', - 'uploader': 'www.txxx.com', - 'age_limit': 18, - }, - 'params': { - 'skip_download': True, - } - }, - { - # Video.js embed, multiple formats + # Youtube embed, formerly: Video.js embed, multiple formats 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html', 'info_dict': { 'id': 'yygqldloqIk', @@ -905,6 +895,7 @@ class GenericIE(InfoExtractor): 'params': { 'skip_download': True, }, + 'skip': '404 Not Found', }, # rtl.nl embed { @@ -1547,19 +1538,6 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['WashingtonPost'], }, - { - # Mediaset embed - 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml', - 'info_dict': { - 'id': '720642', - 'ext': 'mp4', - 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Mediaset'], - }, { # JOJ.sk embeds 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok', @@ -1864,11 +1842,6 @@ class GenericIE(InfoExtractor): 'title': 'I AM BIO Podcast | BIO', }, 'playlist_mincount': 52, - }, - { - # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed) - 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', - 'only_matching': True, }, { # WimTv embed player 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', @@ -1885,11 +1858,13 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'description': 'Kelis - 4th Of July', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Untested major version'], }, { # KVS Player 'url': 'https://www.kvs-demo.com/embed/105/', @@ -1898,35 +1873,12 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July / Embed Player', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, }, { - # KVS Player - 'url': 'https://thisvid.com/videos/french-boy-pantsed/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player - 'url': 'https://thisvid.com/embed/2400174/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player 'url': 'https://youix.com/video/leningrad-zoj/', 'md5': '94f96ba95706dc3880812b27b7d8a2b8', 'info_dict': { @@ -1934,8 +1886,8 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://youix.com/embed/18485', @@ -1945,19 +1897,20 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Ленинград - ЗОЖ', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', 'md5': '94166bdb26b4cb1fb9214319a629fc51', 'info_dict': { 'id': '21217', - 'display_id': '40-nochey-40-nights-2016', + 'display_id': '40-nochey-2016', 'ext': 'mp4', 'title': '40 ночей (2016) - BogMedia.org', + 'description': 'md5:4e6d7d622636eb7948275432eb256dc3', 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', - } + }, }, { # KVS Player (for sites that serve kt_player.js via non-https urls) @@ -1967,9 +1920,9 @@ class GenericIE(InfoExtractor): 'id': '389508', 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', 'ext': 'mp4', - 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', - 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg', - } + 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', + 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg', + }, }, { # Reddit-hosted video that will redirect and be processed by RedditIE @@ -2172,7 +2125,79 @@ class GenericIE(InfoExtractor): 'age_limit': 0, 'direct': True, } - } + }, + { + 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', + 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'info_dict': { + 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'ext': 'mp4', + 'title': 'čauky lidi 70 finall', + 'description': 'čauky lidi 70 finall', + 'thumbnail': 'h', + 'upload_date': '20220606', + 'timestamp': 1654513791, + 'duration': 318.0, + 'direct': True, + 'age_limit': 0, + }, + }, + { + 'note': 'JW Player embed with unicode-escape sequences in URL', + 'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics', + 'info_dict': { + 'id': 'm', + 'ext': 'mp4', + 'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi', + 'description': 'Mahler\'s ', + 'uploader': 'www.medici.tv', + 'age_limit': 0, + 'thumbnail': r're:^https?://.+\.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', + 'md5': 'e2f0a4c329f7986280b7328e24036d60', + 'info_dict': { + 'id': '284002', + 'display_id': 'just-out-of-the-shower-joi', + 'ext': 'mp4', + 'title': 'Just Out Of The Shower JOI - Shooshtime', + 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg', + 'height': 720, + 'age_limit': 18, + }, + }, + { + 'note': 'Live HLS direct link', + 'url': 'https://d18j67ugtrocuq.cloudfront.net/out/v1/2767aec339144787926bd0322f72c6e9/index.m3u8', + 'info_dict': { + 'id': 'index', + 'title': r're:index', + 'ext': 'mp4', + 'live_status': 'is_live', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, + { + 'note': 'Video.js VOD HLS', + 'url': 'https://gist.githubusercontent.com/bashonly/2aae0862c50f4a4b84f220c315767208/raw/e3380d413749dabbe804c9c2d8fd9a45142475c7/videojs_hls_test.html', + 'info_dict': { + 'id': 'videojs_hls_test', + 'title': 'video', + 'ext': 'mp4', + 'age_limit': 0, + 'duration': 1800, + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, ] def report_following_redirect(self, new_url): @@ -2189,12 +2214,41 @@ class GenericIE(InfoExtractor): self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}') - def _fragment_query(self, url): - if self._configuration_arg('fragment_query'): - query_string = urllib.parse.urlparse(url).query - if query_string: - return {'extra_param_to_segment_url': query_string} - return {} + def _extra_manifest_info(self, info, manifest_url): + fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0] + if fragment_query is not None: + info['extra_param_to_segment_url'] = ( + urllib.parse.urlparse(fragment_query).query or fragment_query + or urllib.parse.urlparse(manifest_url).query or None) + + hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None + info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), { + 'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}), + }) or None + + variant_query = self._configuration_arg('variant_query', [None], casesense=True)[0] + if variant_query is not None: + query = urllib.parse.parse_qs( + urllib.parse.urlparse(variant_query).query or variant_query + or urllib.parse.urlparse(manifest_url).query) + for fmt in self._downloader._get_formats(info): + fmt['url'] = update_url_query(fmt['url'], query) + + # Attempt to detect live HLS or set VOD duration + m3u8_format = next((f for f in self._downloader._get_formats(info) + if determine_protocol(f) == 'm3u8_native'), None) + if m3u8_format: + is_live = self._configuration_arg('is_live', [None])[0] + if is_live is not None: + info['live_status'] = 'not_live' if is_live == 'false' else 'is_live' + return + headers = m3u8_format.get('http_headers') or info.get('http_headers') + duration = self._extract_m3u8_vod_duration( + m3u8_format['url'], info.get('id'), note='Checking m3u8 live status', + errnote='Failed to download m3u8 media playlist', headers=headers) + if not duration: + info['live_status'] = 'is_live' + info['duration'] = info.get('duration') or duration def _extract_rss(self, url, video_id, doc): NS_MAP = { @@ -2238,43 +2292,87 @@ class GenericIE(InfoExtractor): 'entries': entries, } - def _kvs_getrealurl(self, video_url, license_code): + @classmethod + def _kvs_get_real_url(cls, video_url, license_code): if not video_url.startswith('function/0/'): return video_url # not obfuscated - url_path, _, url_query = video_url.partition('?') - urlparts = url_path.split('/')[2:] - license = self._kvs_getlicensetoken(license_code) - newmagic = urlparts[5][:32] + parsed = urllib.parse.urlparse(video_url[len('function/0/'):]) + license = cls._kvs_get_license_token(license_code) + urlparts = parsed.path.split('/') - for o in range(len(newmagic) - 1, -1, -1): - new = '' - l = (o + sum(int(n) for n in license[o:])) % 32 + HASH_LENGTH = 32 + hash = urlparts[3][:HASH_LENGTH] + indices = list(range(HASH_LENGTH)) - for i in range(0, len(newmagic)): - if i == o: - new += newmagic[l] - elif i == l: - new += newmagic[o] - else: - new += newmagic[i] - newmagic = new + # Swap indices of hash according to the destination calculated from the license token + accum = 0 + for src in reversed(range(HASH_LENGTH)): + accum += license[src] + dest = (src + accum) % HASH_LENGTH + indices[src], indices[dest] = indices[dest], indices[src] + + urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:] + return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts))) - urlparts[5] = newmagic + urlparts[5][32:] - return '/'.join(urlparts) + '?' + url_query + @staticmethod + def _kvs_get_license_token(license): + license = license.replace('$', '') + license_values = [int(char) for char in license] - def _kvs_getlicensetoken(self, license): - modlicense = license.replace('$', '').replace('0', '1') - center = int(len(modlicense) / 2) + modlicense = license.replace('0', '1') + center = len(modlicense) // 2 fronthalf = int(modlicense[:center + 1]) backhalf = int(modlicense[center:]) + modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1] + + return [ + (license_values[index + offset] + current) % 10 + for index, current in enumerate(map(int, modlicense)) + for offset in range(4) + ] + + def _extract_kvs(self, url, webpage, video_id): + flashvars = self._search_json( + r'(?s:]*>.*?var\s+flashvars\s*=)', + webpage, 'flashvars', video_id, transform_source=js_to_json) + + # extract the part after the last / as the display_id from the + # canonical URL. + display_id = self._search_regex( + r'(?:' + r'|)', + webpage, 'display_id', fatal=False) + title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') + + thumbnail = flashvars['preview_url'] + if thumbnail.startswith('//'): + protocol, _, _ = url.partition('/') + thumbnail = protocol + thumbnail + + url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys())) + formats = [] + for key in url_keys: + if '/get_file/' not in flashvars[key]: + continue + format_id = flashvars.get(f'{key}_text', key) + formats.append({ + 'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])), + 'format_id': format_id, + 'ext': 'mp4', + **(parse_resolution(format_id) or parse_resolution(flashvars[key])), + 'http_headers': {'Referer': url}, + }) + if not formats[-1].get('height'): + formats[-1]['quality'] = 1 - modlicense = str(4 * abs(fronthalf - backhalf)) - retval = '' - for o in range(0, center + 1): - for i in range(1, 5): - retval += str((int(license[o + i]) + int(modlicense[o])) % 10) - return retval + return { + 'id': flashvars['video_id'], + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } def _real_extract(self, url): if url.startswith('//'): @@ -2330,13 +2428,12 @@ class GenericIE(InfoExtractor): # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. full_response = self._request_webpage(url, video_id, headers={ - 'Accept-Encoding': '*', + 'Accept-Encoding': 'identity', **smuggled_data.get('http_headers', {}) }) - new_url = full_response.geturl() - if new_url == urllib.parse.urlparse(url)._replace(scheme='https').geturl(): - url = new_url - elif url != new_url: + new_url = full_response.url + url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl() + if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url) if force_videoid: new_url = smuggle_url(new_url, {'force_videoid': force_videoid}) @@ -2355,14 +2452,13 @@ class GenericIE(InfoExtractor): self.report_detected('direct video link') headers = smuggled_data.get('http_headers', {}) format_id = str(m.group('format_id')) + ext = determine_ext(url) subtitles = {} - if format_id.endswith('mpegurl'): + if format_id.endswith('mpegurl') or ext == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) - info_dict.update(self._fragment_query(url)) - elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): + elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd': formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) - info_dict.update(self._fragment_query(url)) - elif format_id == 'f4m': + elif format_id == 'f4m' or ext == 'f4m': formats = self._extract_f4m_formats(url, video_id, headers=headers) else: formats = [{ @@ -2374,8 +2470,9 @@ class GenericIE(InfoExtractor): info_dict.update({ 'formats': formats, 'subtitles': subtitles, - 'http_headers': headers, + 'http_headers': headers or None, }) + self._extra_manifest_info(info_dict, url) return info_dict if not self.get_param('test', False) and not is_intentional: @@ -2388,7 +2485,7 @@ class GenericIE(InfoExtractor): if first_bytes.startswith(b'#EXTM3U'): self.report_detected('M3U playlist') info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') - info_dict.update(self._fragment_query(url)) + self._extra_manifest_info(info_dict, url) return info_dict # Maybe it's a direct link to a video? @@ -2432,14 +2529,14 @@ class GenericIE(InfoExtractor): return self.playlist_result( self._parse_xspf( doc, video_id, xspf_url=url, - xspf_base_url=full_response.geturl()), + xspf_base_url=full_response.url), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles( doc, - mpd_base_url=full_response.geturl().rpartition('/')[0], + mpd_base_url=full_response.url.rpartition('/')[0], mpd_url=url) - info_dict.update(self._fragment_query(url)) + self._extra_manifest_info(info_dict, url) self.report_detected('DASH manifest') return info_dict elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): @@ -2465,7 +2562,7 @@ class GenericIE(InfoExtractor): self._downloader.write_debug('Looking for embeds') embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict)) if len(embeds) == 1: - return {**info_dict, **embeds[0]} + return merge_dicts(embeds[0], info_dict) elif embeds: return self.playlist_result(embeds, **info_dict) raise UnsupportedError(url) @@ -2475,7 +2572,7 @@ class GenericIE(InfoExtractor): info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url) url, smuggled_data = unsmuggle_url(url, {}) - actual_url = urlh.geturl() if urlh else url + actual_url = urlh.url if urlh else url # Sometimes embedded video player is hidden behind percent encoding # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448) @@ -2528,8 +2625,7 @@ class GenericIE(InfoExtractor): varname = mobj.group(1) sources = variadic(self._parse_json( mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or []) - formats = [] - subtitles = {} + formats, subtitles, src = [], {}, None for source in sources: src = source.get('src') if not src or not isinstance(src, str): @@ -2552,8 +2648,6 @@ class GenericIE(InfoExtractor): m3u8_id='hls', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - for fmt in formats: - fmt.update(self._fragment_query(src)) if not formats: formats.append({ @@ -2569,11 +2663,11 @@ class GenericIE(InfoExtractor): for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage): sub = self._parse_json( sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {} - src = str_or_none(sub.get('src')) - if not src: + sub_src = str_or_none(sub.get('src')) + if not sub_src: continue subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({ - 'url': urllib.parse.urljoin(url, src), + 'url': urllib.parse.urljoin(url, sub_src), 'name': sub.get('label'), 'http_headers': { 'Referer': actual_url, @@ -2581,7 +2675,21 @@ class GenericIE(InfoExtractor): }) if formats or subtitles: self.report_detected('video.js embed') - return [{'formats': formats, 'subtitles': subtitles}] + info_dict = {'formats': formats, 'subtitles': subtitles} + if formats: + self._extra_manifest_info(info_dict, src) + return [info_dict] + + # Look for generic KVS player (before json-ld bc of some urls that break otherwise) + found = self._search_regex(( + r']+?\bsrc\s*=\s*(["\'])https?://(?:(?!\1)[^?#])+/kt_player\.js\?v=(?P\d+(?:\.\d+)+)\1[^>]*>', + r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:(?!\2)[^?#])+/kt_player\.swf\?v=(?P\d+(?:\.\d+)+)\2\s*,', + ), webpage, 'KVS player', group='ver', default=False) + if found: + self.report_detected('KVS Player') + if found.split('.')[0] not in ('4', '5', '6'): + self.report_warning(f'Untested major version ({found}) in player engine - download may fail.') + return [self._extract_kvs(url, webpage, video_id)] # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld(webpage, video_id, default={}) @@ -2625,52 +2733,6 @@ class GenericIE(InfoExtractor): ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) if found: self.report_detected('JW Player embed') - if not found: - # Look for generic KVS player - found = re.search(r'', webpage) - flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json) - - # extract the part after the last / as the display_id from the - # canonical URL. - display_id = self._search_regex( - r'(?:' - r'|)', - webpage, 'display_id', fatal=False - ) - title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') - - thumbnail = flashvars['preview_url'] - if thumbnail.startswith('//'): - protocol, _, _ = url.partition('/') - thumbnail = protocol + thumbnail - - url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys())) - formats = [] - for key in url_keys: - if '/get_file/' not in flashvars[key]: - continue - format_id = flashvars.get(f'{key}_text', key) - formats.append({ - 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']), - 'format_id': format_id, - 'ext': 'mp4', - **(parse_resolution(format_id) or parse_resolution(flashvars[key])) - }) - if not formats[-1].get('height'): - formats[-1]['quality'] = 1 - - return [{ - 'id': flashvars['video_id'], - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - }] if not found: # Broaden the search a little bit found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) @@ -2751,6 +2813,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in orderedSet(found): + video_url = video_url.encode().decode('unicode-escape') video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = urllib.parse.urljoin(url, video_url) @@ -2790,10 +2853,10 @@ class GenericIE(InfoExtractor): return [self._extract_xspf_playlist(video_url, video_id)] elif ext == 'm3u8': entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers) - entry_info_dict.update(self._fragment_query(video_url)) + self._extra_manifest_info(entry_info_dict, video_url) elif ext == 'mpd': entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers) - entry_info_dict.update(self._fragment_query(video_url)) + self._extra_manifest_info(entry_info_dict, video_url) elif ext == 'f4m': entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers) elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: diff --git a/hypervideo_dl/extractor/genius.py b/hypervideo_dl/extractor/genius.py index 62f5a28..57c25e7 100644 --- a/hypervideo_dl/extractor/genius.py +++ b/hypervideo_dl/extractor/genius.py @@ -10,7 +10,7 @@ from ..utils import ( class GeniusIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P[^?/#]+)' + _VALID_URL = r'https?://(?:www\.)?genius\.com/(?:videos|(?P
a))/(?P[^?/#]+)' _TESTS = [{ 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly', 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c', @@ -41,19 +41,37 @@ class GeniusIE(InfoExtractor): 'timestamp': 1631209167, 'thumbnail': r're:^https?://.*\.jpg$', }, + }, { + 'url': 'https://genius.com/a/cordae-anderson-paak-break-down-the-meaning-of-two-tens', + 'md5': 'f98a4e03b16b0a2821bd6e52fb3cc9d7', + 'info_dict': { + 'id': '6321509903112', + 'ext': 'mp4', + 'title': 'Cordae & Anderson .Paak Breaks Down The Meaning Of “Two Tens”', + 'description': 'md5:1255f0e1161d07342ce56a8464ac339d', + 'tags': ['song id: 5457554'], + 'uploader_id': '4863540648001', + 'duration': 361.813, + 'upload_date': '20230301', + 'timestamp': 1677703908, + 'thumbnail': r're:^https?://.*\.jpg$', + }, }] def _real_extract(self, url): - display_id = self._match_id(url) + display_id, is_article = self._match_valid_url(url).group('id', 'article') webpage = self._download_webpage(url, display_id) metadata = self._search_json( - r'[^?/#]+)-lyrics[?/#]?' + _VALID_URL = r'https?://(?:www\.)?genius\.com/(?P[^?/#]+)-lyrics(?:[?/#]|$)' _TESTS = [{ 'url': 'https://genius.com/Lil-baby-heyy-lyrics', 'playlist_mincount': 2, diff --git a/hypervideo_dl/extractor/globalplayer.py b/hypervideo_dl/extractor/globalplayer.py new file mode 100644 index 0000000..e0c0d58 --- /dev/null +++ b/hypervideo_dl/extractor/globalplayer.py @@ -0,0 +1,254 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + join_nonempty, + parse_duration, + str_or_none, + traverse_obj, + unified_strdate, + unified_timestamp, + urlhandle_detect_ext, +) + + +class GlobalPlayerBaseIE(InfoExtractor): + def _get_page_props(self, url, video_id): + webpage = self._download_webpage(url, video_id) + return self._search_nextjs_data(webpage, video_id)['props']['pageProps'] + + def _request_ext(self, url, video_id): + return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests + url, video_id, note='Determining source extension')) + + def _extract_audio(self, episode, series): + return { + 'vcodec': 'none', + **traverse_obj(series, { + 'series': 'title', + 'series_id': 'id', + 'thumbnail': 'imageUrl', + 'uploader': 'itunesAuthor', # podcasts only + }), + **traverse_obj(episode, { + 'id': 'id', + 'description': ('description', {clean_html}), + 'duration': ('duration', {parse_duration}), + 'thumbnail': 'imageUrl', + 'url': 'streamUrl', + 'timestamp': (('pubDate', 'startDate'), {unified_timestamp}), + 'title': 'title', + }, get_all=False) + } + + +class GlobalPlayerLiveIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P\w+)/\w+' + _TESTS = [{ + 'url': 'https://www.globalplayer.com/live/smoothchill/uk/', + 'info_dict': { + 'id': '2mx1E', + 'ext': 'aac', + 'display_id': 'smoothchill-uk', + 'title': 're:^Smooth Chill.+$', + 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png', + 'description': 'Music To Chill To', + 'live_status': 'is_live', + }, + }, { + # national station + 'url': 'https://www.globalplayer.com/live/heart/uk/', + 'info_dict': { + 'id': '2mwx4', + 'ext': 'aac', + 'description': 'turn up the feel good!', + 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png', + 'live_status': 'is_live', + 'title': 're:^Heart UK.+$', + 'display_id': 'heart-uk', + }, + }, { + # regional variation + 'url': 'https://www.globalplayer.com/live/heart/london/', + 'info_dict': { + 'id': 'AMqg', + 'ext': 'aac', + 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png', + 'title': 're:^Heart London.+$', + 'live_status': 'is_live', + 'display_id': 'heart-london', + 'description': 'turn up the feel good!', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + station = self._get_page_props(url, video_id)['station'] + stream_url = station['streamUrl'] + + return { + 'id': station['id'], + 'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'), + 'url': stream_url, + 'ext': self._request_ext(stream_url, video_id), + 'vcodec': 'none', + 'is_live': True, + **traverse_obj(station, { + 'title': (('name', 'brandName'), {str_or_none}), + 'description': 'tagline', + 'thumbnail': 'brandLogo', + }, get_all=False), + } + + +class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P\w+)' + _TESTS = [{ + # "live playlist" + 'url': 'https://www.globalplayer.com/playlists/8bLk/', + 'info_dict': { + 'id': '8bLk', + 'ext': 'aac', + 'live_status': 'is_live', + 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d', + 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=', + 'title': 're:^Classic FM Hall of Fame.+$' + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + station = self._get_page_props(url, video_id)['playlistData'] + stream_url = station['streamUrl'] + + return { + 'id': video_id, + 'url': stream_url, + 'ext': self._request_ext(stream_url, video_id), + 'vcodec': 'none', + 'is_live': True, + **traverse_obj(station, { + 'title': 'title', + 'description': 'description', + 'thumbnail': 'image', + }), + } + + +class GlobalPlayerAudioIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)/|catchup/\w+/\w+/)(?P\w+)/?(?:$|[?#])' + _TESTS = [{ + # podcast + 'url': 'https://www.globalplayer.com/podcasts/42KuaM/', + 'playlist_mincount': 5, + 'info_dict': { + 'id': '42KuaM', + 'title': 'Filthy Ritual', + 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', + 'categories': ['Society & Culture', 'True Crime'], + 'uploader': 'Global', + 'description': 'md5:da5b918eac9ae319454a10a563afacf9', + }, + }, { + # radio catchup + 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/', + 'playlist_mincount': 3, + 'info_dict': { + 'id': '46vyD7z', + 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.', + 'title': 'Nick Ferrari', + 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', + }, + }] + + def _real_extract(self, url): + video_id, podcast = self._match_valid_url(url).group('id', 'podcast') + props = self._get_page_props(url, video_id) + series = props['podcastInfo'] if podcast else props['catchupInfo'] + + return { + '_type': 'playlist', + 'id': video_id, + 'entries': [self._extract_audio(ep, series) for ep in traverse_obj( + series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))], + 'categories': traverse_obj(series, ('categories', ..., 'name')) or None, + **traverse_obj(series, { + 'description': 'description', + 'thumbnail': 'imageUrl', + 'title': 'title', + 'uploader': 'itunesAuthor', # podcasts only + }), + } + + +class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?Ppodcasts)|catchup/\w+/\w+)/episodes/(?P\w+)/?(?:$|[?#])' + _TESTS = [{ + # podcast + 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/', + 'info_dict': { + 'id': '7DrfNnE', + 'ext': 'mp3', + 'title': 'Filthy Ritual - Trailer', + 'description': 'md5:1f1562fd0f01b4773b590984f94223e0', + 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e', + 'duration': 225.0, + 'timestamp': 1681254900, + 'series': 'Filthy Ritual', + 'series_id': '42KuaM', + 'upload_date': '20230411', + 'uploader': 'Global', + }, + }, { + # radio catchup + 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/', + 'info_dict': { + 'id': '2zGq26Vcv1fCWhddC4JAwETXWe', + 'ext': 'm4a', + 'timestamp': 1682056800, + 'series': 'Nick Ferrari', + 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf', + 'upload_date': '20230421', + 'series_id': '46vyD7z', + 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.', + 'title': 'Nick Ferrari', + 'duration': 10800.0, + }, + }] + + def _real_extract(self, url): + video_id, podcast = self._match_valid_url(url).group('id', 'podcast') + props = self._get_page_props(url, video_id) + episode = props['podcastEpisode'] if podcast else props['catchupEpisode'] + + return self._extract_audio( + episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {}) + + +class GlobalPlayerVideoIE(GlobalPlayerBaseIE): + _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P\w+)' + _TESTS = [{ + 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/', + 'info_dict': { + 'id': '2JsSZ7Gm2uP', + 'ext': 'mp4', + 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd', + 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550', + 'upload_date': '20230420', + 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + meta = self._get_page_props(url, video_id)['videoData'] + + return { + 'id': video_id, + **traverse_obj(meta, { + 'url': 'url', + 'thumbnail': ('image', 'url'), + 'title': 'title', + 'upload_date': ('publish_date', {unified_strdate}), + 'description': 'description', + }), + } diff --git a/hypervideo_dl/extractor/globo.py b/hypervideo_dl/extractor/globo.py index a7be2cb..df98f09 100644 --- a/hypervideo_dl/extractor/globo.py +++ b/hypervideo_dl/extractor/globo.py @@ -8,8 +8,8 @@ from .common import InfoExtractor from ..compat import ( compat_str, ) +from ..networking import HEADRequest from ..utils import ( - HEADRequest, ExtractorError, float_or_none, orderedSet, diff --git a/hypervideo_dl/extractor/gmanetwork.py b/hypervideo_dl/extractor/gmanetwork.py new file mode 100644 index 0000000..62fff4e --- /dev/null +++ b/hypervideo_dl/extractor/gmanetwork.py @@ -0,0 +1,83 @@ +from .common import InfoExtractor +from .dailymotion import DailymotionIE +from .youtube import YoutubeIE + + +class GMANetworkVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www)\.gmanetwork\.com/(?:\w+/){3}(?P\d+)/(?P[\w-]+)/video' + _TESTS = [{ + 'url': 'https://www.gmanetwork.com/fullepisodes/home/running_man_philippines/168677/running-man-philippines-catch-the-thief-full-chapter-2/video?section=home', + 'info_dict': { + 'id': '28BqW0AXPe0', + 'ext': 'mp4', + 'upload_date': '20220919', + 'uploader_url': 'http://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ', + 'like_count': int, + 'view_count': int, + 'uploader': 'YoüLOL', + 'channel_id': 'UChsoPNR5x-wdSO2GrOSIWqQ', + 'duration': 5313, + 'comment_count': int, + 'tags': 'count:22', + 'uploader_id': 'UChsoPNR5x-wdSO2GrOSIWqQ', + 'title': 'Running Man Philippines: Catch the Thief (FULL CHAPTER 2)', + 'channel_url': 'https://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ', + 'thumbnail': 'https://i.ytimg.com/vi/28BqW0AXPe0/maxresdefault.jpg', + 'release_timestamp': 1663594212, + 'age_limit': 0, + 'channel_follower_count': int, + 'categories': ['Entertainment'], + 'description': 'md5:811bdcea74f9c48051824e494756e926', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'YoüLOL', + 'availability': 'public', + 'release_date': '20220919', + } + }, { + 'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home', + 'info_dict': { + 'id': 'yiDOExw2aSA', + 'ext': 'mp4', + 'live_status': 'not_live', + 'channel': 'GMANetwork', + 'like_count': int, + 'channel_follower_count': int, + 'description': 'md5:6d00cd658394fa1a5071200d3ed4be05', + 'duration': 1419, + 'age_limit': 0, + 'comment_count': int, + 'upload_date': '20181003', + 'thumbnail': 'https://i.ytimg.com/vi_webp/yiDOExw2aSA/maxresdefault.webp', + 'availability': 'public', + 'playable_in_embed': True, + 'channel_id': 'UCKL5hAuzgFQsyrsQKgU0Qng', + 'title': 'More Than Words: Full Episode 80 (Finale)', + 'uploader_id': 'GMANETWORK', + 'categories': ['Entertainment'], + 'uploader': 'GMANetwork', + 'channel_url': 'https://www.youtube.com/channel/UCKL5hAuzgFQsyrsQKgU0Qng', + 'tags': 'count:29', + 'view_count': int, + 'uploader_url': 'http://www.youtube.com/user/GMANETWORK', + } + }] + + def _real_extract(self, url): + content_id, display_id = self._match_valid_url(url).group('id', 'display_id') + webpage = self._download_webpage(url, display_id) + # webpage route + youtube_id = self._search_regex( + r'var\s*YOUTUBE_VIDEO\s*=\s*[\'"]+(?P[\w-]+)', webpage, 'youtube_id', fatal=False) + if youtube_id: + return self.url_result(youtube_id, YoutubeIE, youtube_id) + + # api call route + # more info at https://aphrodite.gmanetwork.com/fullepisodes/assets/fullepisodes/js/dist/fullepisodes_video.js?v=1.1.11 + network_url = self._search_regex( + r'NETWORK_URL\s*=\s*[\'"](?P[^\'"]+)', webpage, 'network_url') + json_data = self._download_json(f'{network_url}api/data/content/video/{content_id}', display_id) + if json_data.get('video_file'): + return self.url_result(json_data['video_file'], YoutubeIE, json_data['video_file']) + else: + return self.url_result(json_data['dailymotion_file'], DailymotionIE, json_data['dailymotion_file']) diff --git a/hypervideo_dl/extractor/googledrive.py b/hypervideo_dl/extractor/googledrive.py index e027ea7..2fdec20 100644 --- a/hypervideo_dl/extractor/googledrive.py +++ b/hypervideo_dl/extractor/googledrive.py @@ -3,9 +3,11 @@ import re from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( - determine_ext, ExtractorError, + determine_ext, + extract_attributes, get_element_by_class, + get_element_html_by_id, int_or_none, lowercase_escape, try_get, @@ -34,6 +36,7 @@ class GoogleDriveIE(InfoExtractor): 'ext': 'mp4', 'title': 'Big Buck Bunny.mp4', 'duration': 45, + 'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ', } }, { # video can't be watched anonymously due to view count limit reached, @@ -163,15 +166,13 @@ class GoogleDriveIE(InfoExtractor): video_id = self._match_id(url) video_info = compat_parse_qs(self._download_webpage( 'https://drive.google.com/get_video_info', - video_id, query={'docid': video_id})) + video_id, 'Downloading video webpage', query={'docid': video_id})) def get_value(key): return try_get(video_info, lambda x: x[key][0]) reason = get_value('reason') title = get_value('title') - if not title and reason: - raise ExtractorError(reason, expected=True) formats = [] fmt_stream_map = (get_value('fmt_stream_map') or '').split(',') @@ -209,20 +210,25 @@ class GoogleDriveIE(InfoExtractor): 'export': 'download', }) - def request_source_file(source_url, kind): + def request_source_file(source_url, kind, data=None): return self._request_webpage( source_url, video_id, note='Requesting %s file' % kind, - errnote='Unable to request %s file' % kind, fatal=False) + errnote='Unable to request %s file' % kind, fatal=False, data=data) urlh = request_source_file(source_url, 'source') if urlh: def add_source_format(urlh): + nonlocal title + if not title: + title = self._search_regex( + r'\bfilename="([^"]+)"', urlh.headers.get('Content-Disposition'), + 'title', default=None) formats.append({ # Use redirect URLs as download URLs in order to calculate # correct cookies in _calc_cookies. # Using original URLs may result in redirect loop due to # google.com's cookies mistakenly used for googleusercontent.com # redirect URLs (see #23919). - 'url': urlh.geturl(), + 'url': urlh.url, 'ext': determine_ext(title, 'mp4').lower(), 'format_id': 'source', 'quality': 1, @@ -234,14 +240,10 @@ class GoogleDriveIE(InfoExtractor): urlh, url, video_id, note='Downloading confirmation page', errnote='Unable to confirm download', fatal=False) if confirmation_webpage: - confirm = self._search_regex( - r'confirm=([^&"\']+)', confirmation_webpage, - 'confirmation code', default=None) - if confirm: - confirmed_source_url = update_url_query(source_url, { - 'confirm': confirm, - }) - urlh = request_source_file(confirmed_source_url, 'confirmed source') + confirmed_source_url = extract_attributes( + get_element_html_by_id('download-form', confirmation_webpage) or '').get('action') + if confirmed_source_url: + urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'') if urlh and urlh.headers.get('Content-Disposition'): add_source_format(urlh) else: @@ -251,7 +253,10 @@ class GoogleDriveIE(InfoExtractor): or 'unable to extract confirmation code') if not formats and reason: - self.raise_no_formats(reason, expected=True) + if title: + self.raise_no_formats(reason, expected=True) + else: + raise ExtractorError(reason, expected=True) hl = get_value('hl') subtitles_id = None diff --git a/hypervideo_dl/extractor/goplay.py b/hypervideo_dl/extractor/goplay.py index 2882b49..960d7d7 100644 --- a/hypervideo_dl/extractor/goplay.py +++ b/hypervideo_dl/extractor/goplay.py @@ -76,11 +76,11 @@ class GoPlayIE(InfoExtractor): } api = self._download_json( - f'https://api.viervijfzes.be/content/{video_id}', - video_id, headers={'Authorization': self._id_token}) + f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', + video_id, headers={'Authorization': 'Bearer %s' % self._id_token}) formats, subs = self._extract_m3u8_formats_and_subtitles( - api['video']['S'], video_id, ext='mp4', m3u8_id='HLS') + api['manifestUrls']['hls'], video_id, ext='mp4', m3u8_id='HLS') info_dict.update({ 'id': video_id, diff --git a/hypervideo_dl/extractor/gronkh.py b/hypervideo_dl/extractor/gronkh.py index b9370e3..1ae0a68 100644 --- a/hypervideo_dl/extractor/gronkh.py +++ b/hypervideo_dl/extractor/gronkh.py @@ -3,6 +3,7 @@ import functools from .common import InfoExtractor from ..utils import ( OnDemandPagedList, + float_or_none, traverse_obj, unified_strdate, ) @@ -19,7 +20,9 @@ class GronkhIE(InfoExtractor): 'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1', 'view_count': int, 'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg', - 'upload_date': '20221111' + 'upload_date': '20221111', + 'chapters': 'count:3', + 'duration': 31463, }, 'params': {'skip_download': True} }, { @@ -30,7 +33,8 @@ class GronkhIE(InfoExtractor): 'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv', 'view_count': int, 'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg', - 'upload_date': '20211001' + 'upload_date': '20211001', + 'duration': 32058, }, 'params': {'skip_download': True} }, { @@ -56,6 +60,12 @@ class GronkhIE(InfoExtractor): 'upload_date': unified_strdate(data_json.get('created_at')), 'formats': formats, 'subtitles': subtitles, + 'duration': float_or_none(data_json.get('source_length')), + 'chapters': traverse_obj(data_json, ( + 'chapters', lambda _, v: float_or_none(v['offset']) is not None, { + 'title': 'title', + 'start_time': ('offset', {float_or_none}), + })) or None, } diff --git a/hypervideo_dl/extractor/hidive.py b/hypervideo_dl/extractor/hidive.py index 3a53f2c..df6868d 100644 --- a/hypervideo_dl/extractor/hidive.py +++ b/hypervideo_dl/extractor/hidive.py @@ -1,5 +1,3 @@ -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -39,15 +37,28 @@ class HiDiveIE(InfoExtractor): form = self._search_regex( r'(?s)]+action="/account/login"[^>]*>(.+?)', webpage, 'login form', default=None) - if not form: # logged in + if not form: return data = self._hidden_inputs(form) data.update({ 'Email': username, 'Password': password, }) - self._download_webpage( + login_webpage = self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) + # If the user has multiple profiles on their account, select one. For now pick the first profile. + profile_id = self._search_regex( + r'