diff options
45 files changed, 1922 insertions, 766 deletions
diff --git a/.gitignore b/.gitignore index fb09c3d6d..31fdc484b 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ cookies *.3gp *.ape +*.ass *.avi *.desktop *.flac @@ -97,6 +98,7 @@ yt-dlp.zip *.iml .vscode *.sublime-* +*.code-workspace # Lazy extractors */extractor/lazy_extractors.py # Plugins diff --git a/CONTRIBUTORS b/CONTRIBUTORS index fd93e7df3..8d62c04fb 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -146,7 +146,7 @@ chio0hai cntrl-s Deer-Spangle DEvmIb -Grabien +Grabien/MaximVol j54vc1bk mpeter50 mrpapersonic @@ -160,7 +160,7 @@ PilzAdam zmousm iw0nderhow unit193 -TwoThousandHedgehogs +TwoThousandHedgehogs/KathrynElrod Jertzukka cypheron Hyeeji @@ -194,3 +194,23 @@ KiberInfinity tejing1 Bricio lazypete365 +Aniruddh-J +blackgear +CplPwnies +cyberfox1691 +FestplattenSchnitzel +hatienl0i261299 +iphoting +jakeogh +lukasfink1 +lyz-code +marieell +mdpauley +Mipsters +mxmehl +ofkz +P-reducible +pycabbage +regarten +Ronnnny +schn0sch diff --git a/Changelog.md b/Changelog.md index 0a76f65be..fe6f8a0ac 100644 --- a/Changelog.md +++ b/Changelog.md @@ -11,6 +11,139 @@ --> +### 2022.03.08.1 + +* [cleanup] Refactor `__init__.py` +* [build] Fix bug + +### 2022.03.08 + +* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR) +* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1) +* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz) +* Add pre-processor stage `after_filter` +* Better error message when no `--live-from-start` format +* Create necessary directories for `--print-to-file` +* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore) +* Fix `-all` for `--sub-langs` +* Fix doubling of `video_id` in `ExtractorError` +* Fix for when stdout/stderr encoding is `None` +* Handle negative duration from extractor +* Implement `--add-header` without modifying `std_headers` +* Obey `--abort-on-error` for "ffmpeg not installed" +* Set `webpage_url_...` from `webpage_url` and not input URL +* Tolerate failure to `--write-link` due to unknown URL +* [aria2c] Add `--http-accept-gzip=true` +* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev) +* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) +* [devscripts] Improve `prepare_manpage` +* [downloader] Do not use aria2c for non-native `m3u8` +* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) +* [extractor] Allow `http_headers` to be specified for `thumbnails` +* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz) +* [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) +* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz) +* [FFmpegConcat] Abort on `--simulate` +* [FormatSort] Consider `acodec`=`ogg` as `vorbis` +* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore) +* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [generic] Pass referer to extracted formats +* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio) +* [options] Better ambiguous option resolution +* [options] Rename `--clean-infojson` to `--clean-info-json` +* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari) +* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari) +* [utils] Better traceback for `ExtractorError` +* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh) +* [utils] Improve file locking +* [utils] OnDemandPagedList: Do not download pages after error +* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh) +* [utils] Validate `DateRange` input +* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore) +* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos +* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell) +* [cleanup, docs] Misc cleanup +* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) +* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm) +* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio) +* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691) +* [nfb] Add extractor by [ofkz](https://github.com/ofkz) +* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore) +* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch) +* [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore) +* [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) +* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow) +* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) +* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny) +* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi) +* [ard] Fix valid URL +* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell) +* [bandcamp] Detect acodec +* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code) +* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8) +* [beeg] Fix extractor by [Bricio](https://github.com/Bricio) +* [bigo] Fix extractor to not to use `form_params` +* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear) +* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio) +* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h) +* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1) +* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien) +* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de) +* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong) +* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t) +* [Gettr] Fix formats order by [i6t](https://github.com/i6t) +* [Gettr] Improve extractor by [i6t](https://github.com/i6t) +* [globo] Expand valid URL by [Bricio](https://github.com/Bricio) +* [lbry] Fix `--ignore-no-formats-error` +* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten) +* [mildom] Fix linter +* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore) +* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore) +* [niconico:tag] Add support for searching tags +* [nrk] Add fallback API +* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch) +* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl) +* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio) +* [spiegel] Fix `_VALID_URL` +* [ThumbnailsConvertor] Support `webp` +* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs +* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis) +* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk) +* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore) +* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore) +* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) +* [twitch] Fix field name of `view_count` +* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting) +* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio) +* [youtube:tab] Add `approximate_date` extractor-arg +* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Reject webpage data if redirected to home page +* [youtube] De-prioritize potentially damaged formats +* [youtube] Differentiate descriptive audio by language code +* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore) +* [youtube] Fix automatic captions +* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE) +* [youtube] Further de-prioritize 3gp format +* [youtube] Label original auto-subs +* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz) +* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell) +* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J) +* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters) + + ### 2022.02.04 * [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz) diff --git a/MANIFEST.in b/MANIFEST.in index 38d83a9a5..bc2f056c0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,5 +5,6 @@ include README.md include completions/*/* include supportedsites.md include yt-dlp.1 +include requirements.txt recursive-include devscripts * recursive-include test * diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index 10b0aec3e..b763d2d9a 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -75,7 +75,11 @@ def filter_options(readme): section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) options = '# OPTIONS\n' for line in section.split('\n')[1:]: - mobj = re.fullmatch(r'\s{4}(?P<opt>-(?:,\s|[^\s])+)(?:\s(?P<meta>([^\s]|\s(?!\s))+))?(\s{2,}(?P<desc>.+))?', line) + mobj = re.fullmatch(r'''(?x) + \s{4}(?P<opt>-(?:,\s|[^\s])+) + (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))? + (\s{2,}(?P<desc>.+))? + ''', line) if not mobj: options += f'{line.lstrip()}\n' continue @@ -74,7 +74,7 @@ def version_to_list(version): def dependency_options(): - dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') + dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] yield from (f'--hidden-import={module}' for module in dependencies) diff --git a/requirements.txt b/requirements.txt index 6a982fa36..7818aca78 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ mutagen pycryptodome websockets +brotli; platform_python_implementation=='CPython' +brotlicffi; platform_python_implementation!='CPython'
\ No newline at end of file @@ -21,7 +21,7 @@ DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: <https://github.com/yt-dlp/yt-dlp>', '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', - open('README.md', 'r', encoding='utf-8').read())) + open('README.md', encoding='utf-8').read())) REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets'] diff --git a/supportedsites.md b/supportedsites.md index 7166dc53a..46ad1328d 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -24,6 +24,8 @@ - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** + - **AbemaTV** + - **AbemaTVTitle** - **AcademicEarth:Course** - **acast** - **acast:channel** @@ -45,6 +47,8 @@ - **AlJazeera** - **Allocine** - **AlphaPorno** + - **Alsace20TV** + - **Alsace20TVEmbed** - **Alura** - **AluraCourse** - **Amara** @@ -58,6 +62,9 @@ - **AnimeLab** - **AnimeLabShows** - **AnimeOnDemand** + - **ant1newsgr:article**: ant1news.gr articles + - **ant1newsgr:embed**: ant1news.gr embedded videos + - **ant1newsgr:watch**: ant1news.gr videos - **Anvato** - **aol.com**: Yahoo screen and movies - **APA** @@ -75,6 +82,7 @@ - **Arkena** - **arte.sky.it** - **ArteTV** + - **ArteTVCategory** - **ArteTVEmbed** - **ArteTVPlaylist** - **AsianCrush** @@ -99,8 +107,8 @@ - **bandaichannel** - **Bandcamp** - **Bandcamp:album** + - **Bandcamp:user** - **Bandcamp:weekly** - - **BandcampMusic** - **bangumi.bilibili.com**: BiliBili番剧 - **BannedVideo** - **bbc**: BBC @@ -122,6 +130,7 @@ - **bfmtv:live** - **BibelTV** - **Bigflix** + - **Bigo** - **Bild**: Bild.de - **BiliBili** - **Bilibili category extractor** @@ -163,6 +172,7 @@ - **BYUtv** - **CableAV** - **Callin** + - **Caltrans** - **CAM4** - **Camdemy** - **CamdemyFolder** @@ -231,6 +241,8 @@ - **Coub** - **CozyTV** - **cp24** + - **cpac** + - **cpac:playlist** - **Cracked** - **Crackle** - **CrooksAndLiars** @@ -241,6 +253,7 @@ - **crunchyroll:playlist** - **crunchyroll:playlist:beta** - **CSpan**: C-SPAN + - **CSpanCongress** - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** @@ -262,6 +275,7 @@ - **daum.net:clip** - **daum.net:playlist** - **daum.net:user** + - **daystar:clip** - **DBTV** - **DctpTv** - **DeezerAlbum** @@ -353,6 +367,7 @@ - **faz.net** - **fc2** - **fc2:embed** + - **fc2:live** - **Fczenit** - **Filmmodu** - **filmon** @@ -372,6 +387,7 @@ - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxSports** + - **fptplay**: fptplay.vn - **FranceCulture** - **FranceInter** - **FranceTV** @@ -410,6 +426,7 @@ - **gem.cbc.ca:playlist** - **generic**: Generic downloader that works on some sites - **Gettr** + - **GettrStreaming** - **Gfycat** - **GiantBomb** - **Giga** @@ -622,8 +639,9 @@ - **MiaoPai** - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom + - **mildom:clip**: Clip in Mildom - **mildom:user:vod**: Download all VODs from specific user in Mildom - - **mildom:vod**: Download a VOD in Mildom + - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** - **minds:group** @@ -666,6 +684,8 @@ - **mtvservices:embedded** - **MTVUutisetArticle** - **MuenchenTV**: münchen.tv + - **Murrtube** + - **MurrtubeUser**: Murrtube user profile - **MuseScore** - **MusicdexAlbum** - **MusicdexArtist** @@ -734,9 +754,13 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** + - **NFB** - **NFHSNetwork** - **nfl.com** (Currently broken) - **nfl.com:article** (Currently broken) + - **NhkForSchoolBangumi** + - **NhkForSchoolProgramList** + - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) - **NhkVod** - **NhkVodProgram** - **nhl.com** @@ -746,7 +770,10 @@ - **nickelodeonru** - **nicknight** - **niconico**: ニコニコ動画 - - **NiconicoPlaylist** + - **niconico:history**: NicoNico user history. Requires cookies. + - **niconico:playlist** + - **niconico:series** + - **niconico:tag**: NicoNico video tag URLs - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix @@ -845,6 +872,7 @@ - **PatreonUser** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PearVideo** + - **PeekVids** - **peer.tv** - **PeerTube** - **PeerTube:Playlist** @@ -857,6 +885,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **Piapro** - **Picarto** - **PicartoVod** - **Piksel** @@ -876,6 +905,7 @@ - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** + - **PlayVids** - **Playwire** - **pluralsight** - **pluralsight:course** @@ -980,6 +1010,9 @@ - **RICE** - **RMCDecouverte** - **RockstarGames** + - **Rokfin** + - **rokfin:channel** + - **rokfin:stack** - **RoosterTeeth** - **RoosterTeethSeries** - **RottenTomatoes** @@ -1019,6 +1052,7 @@ - **RUTV**: RUTV.RU - **Ruutu** - **Ruv** + - **ruv.is:spila** - **safari**: safaribooksonline.com online video - **safari:api** - **safari:course**: safaribooksonline.com online courses @@ -1158,6 +1192,7 @@ - **TeleBruxelles** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** + - **telegram:embed** - **TeleMB** - **Telemundo** - **TeleQuebec** @@ -1319,6 +1354,8 @@ - **video.google:search**: Google Video search; "gvsearch:" prefix - **video.sky.it** - **video.sky.it:live** + - **VideocampusSachsen** + - **VideocampusSachsenEmbed** - **VideoDetective** - **videofy.me** - **videomore** @@ -1361,6 +1398,7 @@ - **vlive** - **vlive:channel** - **vlive:post** + - **vm.tiktok** - **Vodlocker** - **VODPl** - **VODPlatform** @@ -1395,7 +1433,7 @@ - **WatchBox** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - - **wdr:mobile** + - **wdr:mobile** (Currently broken) - **WDRElefant** - **WDRPage** - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix @@ -1430,6 +1468,7 @@ - **xiami:song**: 虾米音乐 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 + - **xinpianchang**: xinpianchang.com - **XMinus** - **XNXX** - **Xstream** @@ -1488,7 +1527,7 @@ - **ZenYandex** - **ZenYandexChannel** - **Zhihu** - - **zingmp3**: mp3.zing.vn + - **zingmp3**: zingmp3.vn - **zingmp3:album** - **zoom** - **Zype** diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 816c40329..150764629 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,6 +32,7 @@ from string import ascii_letters from .compat import ( compat_basestring, + compat_brotli, compat_get_terminal_size, compat_kwargs, compat_numeric_types, @@ -233,6 +234,8 @@ class YoutubeDL(object): See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. + prefer_free_formats: Whether to prefer video formats with free containers + over non-free ones of same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged @@ -514,17 +517,6 @@ class YoutubeDL(object): 'storyboards': {'mhtml'}, } - params = None - _ies = {} - _pps = {k: [] for k in POSTPROCESS_WHEN} - _printed_messages = set() - _first_webpage_request = True - _download_retcode = None - _num_downloads = None - _playlist_level = 0 - _playlist_urls = set() - _screen_file = None - def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options. @param auto_init Whether to load the default extractors and print header (if verbose). @@ -532,6 +524,7 @@ class YoutubeDL(object): """ if params is None: params = {} + self.params = params self._ies = {} self._ies_instances = {} self._pps = {k: [] for k in POSTPROCESS_WHEN} @@ -543,15 +536,21 @@ class YoutubeDL(object): self._download_retcode = 0 self._num_downloads = 0 self._num_videos = 0 - self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] - self._err_file = sys.stderr - self.params = params + self._playlist_level = 0 + self._playlist_urls = set() self.cache = Cache(self) windows_enable_vt_mode() + self._out_files = { + 'error': sys.stderr, + 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout, + 'console': None if compat_os_name == 'nt' else next( + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + } + self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print'] self._allow_colors = { - 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), - 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), + type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_]) + for type_ in ('screen', 'error') } if sys.version_info < (3, 6): @@ -616,7 +615,7 @@ class YoutubeDL(object): sp_kwargs = dict( stdin=subprocess.PIPE, stdout=slave, - stderr=self._err_file) + stderr=self._out_files['error']) try: self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) except OSError: @@ -784,14 +783,24 @@ class YoutubeDL(object): self._printed_messages.add(message) write_string(message, out=out, encoding=self.params.get('encoding')) - def to_stdout(self, message, skip_eol=False, quiet=False): + def to_stdout(self, message, skip_eol=False, quiet=None): """Print message to stdout""" + if quiet is not None: + self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead') + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._out_files['print']) + + def to_screen(self, message, skip_eol=False, quiet=None): + """Print message to screen if not in quiet mode""" if self.params.get('logger'): self.params['logger'].debug(message) - elif not quiet or self.params.get('verbose'): - self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), - self._err_file if quiet else self._screen_file) + return + if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): + return + self._write_string( + '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + self._out_files['screen']) def to_stderr(self, message, only_once=False): """Print message to stderr""" @@ -799,7 +808,12 @@ class YoutubeDL(object): if self.params.get('logger'): self.params['logger'].error(message) else: - self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once) + self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once) + + def _send_console_code(self, code): + if compat_os_name == 'nt' or not self._out_files['console']: + return + self._write_string(code, self._out_files['console']) def to_console_title(self, message): if not self.params.get('consoletitle', False): @@ -810,26 +824,18 @@ class YoutubeDL(object): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) - elif 'TERM' in os.environ: - self._write_string('\033]0;%s\007' % message, self._screen_file) + else: + self._send_console_code(f'\033]0;{message}\007') def save_console_title(self): - if not self.params.get('consoletitle', False): - return - if self.params.get('simulate'): + if not self.params.get('consoletitle') or self.params.get('simulate'): return - if compat_os_name != 'nt' and 'TERM' in os.environ: - # Save the title on stack - self._write_string('\033[22;0t', self._screen_file) + self._send_console_code('\033[22;0t') # Save the title on stack def restore_console_title(self): - if not self.params.get('consoletitle', False): - return - if self.params.get('simulate'): + if not self.params.get('consoletitle') or self.params.get('simulate'): return - if compat_os_name != 'nt' and 'TERM' in os.environ: - # Restore the title from stack - self._write_string('\033[23;0t', self._screen_file) + self._send_console_code('\033[23;0t') # Restore the title from stack def __enter__(self): self.save_console_title() @@ -875,11 +881,6 @@ class YoutubeDL(object): raise DownloadError(message, exc_info) self._download_retcode = 1 - def to_screen(self, message, skip_eol=False): - """Print message to stdout if not in quiet mode""" - self.to_stdout( - message, skip_eol, quiet=self.params.get('quiet', False)) - class Styles(Enum): HEADERS = 'yellow' EMPHASIS = 'light blue' @@ -903,11 +904,11 @@ class YoutubeDL(object): def _format_screen(self, *args, **kwargs): return self._format_text( - self._screen_file, self._allow_colors['screen'], *args, **kwargs) + self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs) def _format_err(self, *args, **kwargs): return self._format_text( - self._err_file, self._allow_colors['err'], *args, **kwargs) + self._out_files['error'], self._allow_colors['error'], *args, **kwargs) def report_warning(self, message, only_once=False): ''' @@ -2773,7 +2774,7 @@ class YoutubeDL(object): if info_dict.get('requested_formats') is not None: # For RTMP URLs, also include the playpath info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) - elif 'url' in info_dict: + elif info_dict.get('url'): info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '') if (self.params.get('forcejson') @@ -3600,7 +3601,7 @@ class YoutubeDL(object): encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - get_encoding(self._screen_file), get_encoding(self._err_file), + get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']), self.get_encoding()) logger = self.params.get('logger') @@ -3674,6 +3675,7 @@ class YoutubeDL(object): from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE lib_str = join_nonempty( + compat_brotli and compat_brotli.__name__, compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], SECRETSTORAGE_AVAILABLE and 'secretstorage', has_mutagen and 'mutagen', diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 524130807..a0489fcfa 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -11,9 +11,7 @@ import random import re import sys -from .options import ( - parseOpts, -) +from .options import parseOpts from .compat import ( compat_getpass, compat_os_name, @@ -26,12 +24,12 @@ from .utils import ( decodeOption, DownloadCancelled, DownloadError, - error_to_compat_str, expand_path, - GeoUtils, float_or_none, + GeoUtils, int_or_none, match_filter_func, + NO_DEFAULT, parse_duration, preferredencoding, read_batch_urls, @@ -60,59 +58,38 @@ from .postprocessor import ( from .YoutubeDL import YoutubeDL -def _real_main(argv=None): - # Compatibility fixes for Windows - if sys.platform == 'win32': - # https://github.com/ytdl-org/youtube-dl/issues/820 - codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) - - workaround_optparse_bug9161() - - setproctitle('yt-dlp') - - parser, opts, args = parseOpts(argv) - warnings, deprecation_warnings = [], [] - - if opts.user_agent is not None: - opts.headers.setdefault('User-Agent', opts.user_agent) - if opts.referer is not None: - opts.headers.setdefault('Referer', opts.referer) - - # Dump user agent - if opts.dump_user_agent: - ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) - write_string(f'{ua}\n', out=sys.stdout) - sys.exit(0) - +def get_urls(urls, batchfile, verbose): # Batch file verification batch_urls = [] - if opts.batchfile is not None: + if batchfile is not None: try: - if opts.batchfile == '-': + if batchfile == '-': write_string('Reading URLs from stdin - EOF (%s) to end:\n' % ( 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D')) batchfd = sys.stdin else: batchfd = io.open( - expand_path(opts.batchfile), + expand_path(batchfile), 'r', encoding='utf-8', errors='ignore') batch_urls = read_batch_urls(batchfd) - if opts.verbose: + if verbose: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: - sys.exit('ERROR: batch file %s could not be read' % opts.batchfile) - all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls + sys.exit('ERROR: batch file %s could not be read' % batchfile) _enc = preferredencoding() - all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] + return [ + url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip() + for url in batch_urls + urls] + +def print_extractor_information(opts, urls): if opts.list_extractors: for ie in list_extractors(opts.age_limit): write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout) - matchedUrls = [url for url in all_urls if ie.suitable(url)] + matchedUrls = [url for url in urls if ie.suitable(url)] for mu in matchedUrls: write_string(' ' + mu + '\n', out=sys.stdout) - sys.exit(0) - if opts.list_extractor_descriptions: + elif opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): if not ie.working(): continue @@ -124,184 +101,25 @@ def _real_main(argv=None): _COUNTS = ('', '5', '10', 'all') desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")' write_string(desc + '\n', out=sys.stdout) - sys.exit(0) - if opts.ap_list_mso: + elif opts.ap_list_mso: table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout) - sys.exit(0) - - # Conflicting, missing and erroneous options - if opts.format == 'best': - warnings.append('.\n '.join(( - '"-f best" selects the best pre-merged format which is often not the best option', - 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', - 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) - if opts.exec_cmd.get('before_dl') and opts.exec_before_dl_cmd: - parser.error('using "--exec-before-download" conflicts with "--exec before_dl:"') - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error('using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error('account username missing\n') - if opts.ap_password is not None and opts.ap_username is None: - parser.error('TV Provider account username missing\n') - if opts.autonumber_size is not None: - if opts.autonumber_size <= 0: - parser.error('auto number size must be positive') - if opts.autonumber_start is not None: - if opts.autonumber_start < 0: - parser.error('auto number start must be positive or 0') - if opts.username is not None and opts.password is None: - opts.password = compat_getpass('Type account password and press [Return]: ') - if opts.ap_username is not None and opts.ap_password is None: - opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.throttledratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit) - if numeric_limit is None: - parser.error('invalid rate limit specified') - opts.throttledratelimit = numeric_limit - if opts.min_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.min_filesize) - if numeric_limit is None: - parser.error('invalid min_filesize specified') - opts.min_filesize = numeric_limit - if opts.max_filesize is not None: - numeric_limit = FileDownloader.parse_bytes(opts.max_filesize) - if numeric_limit is None: - parser.error('invalid max_filesize specified') - opts.max_filesize = numeric_limit - if opts.sleep_interval is not None: - if opts.sleep_interval < 0: - parser.error('sleep interval must be positive or 0') - if opts.max_sleep_interval is not None: - if opts.max_sleep_interval < 0: - parser.error('max sleep interval must be positive or 0') - if opts.sleep_interval is None: - parser.error('min sleep interval must be specified, use --min-sleep-interval') - if opts.max_sleep_interval < opts.sleep_interval: - parser.error('max sleep interval must be greater than or equal to min sleep interval') else: - opts.max_sleep_interval = opts.sleep_interval - if opts.sleep_interval_subtitles is not None: - if opts.sleep_interval_subtitles < 0: - parser.error('subtitles sleep interval must be positive or 0') - if opts.sleep_interval_requests is not None: - if opts.sleep_interval_requests < 0: - parser.error('requests sleep interval must be positive or 0') - if opts.ap_mso and opts.ap_mso not in MSO_INFO: - parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers') - if opts.overwrites: # --yes-overwrites implies --no-continue - opts.continue_dl = False - if opts.concurrent_fragment_downloads <= 0: - parser.error('Concurrent fragments must be positive') - if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) - if min_wait is None or (max_wait is None and '-' in opts.wait_for_video): - parser.error('Invalid time range to wait') - elif max_wait is not None and max_wait < min_wait: - parser.error('Minimum time range to wait must not be longer than the maximum') - opts.wait_for_video = (min_wait, max_wait) + return False + return True - def parse_retries(retries, name=''): - if retries in ('inf', 'infinite'): - parsed_retries = float('inf') - else: - try: - parsed_retries = int(retries) - except (TypeError, ValueError): - parser.error('invalid %sretry count specified' % name) - return parsed_retries - if opts.retries is not None: - opts.retries = parse_retries(opts.retries) - if opts.file_access_retries is not None: - opts.file_access_retries = parse_retries(opts.file_access_retries, 'file access ') - if opts.fragment_retries is not None: - opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ') - if opts.extractor_retries is not None: - opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ') - if opts.buffersize is not None: - numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) - if numeric_buffersize is None: - parser.error('invalid buffer size specified') - opts.buffersize = numeric_buffersize - if opts.http_chunk_size is not None: - numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size) - if not numeric_chunksize: - parser.error('invalid http chunk size specified') - opts.http_chunk_size = numeric_chunksize - if opts.playliststart <= 0: - raise parser.error('Playlist start must be positive') - if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: - raise parser.error('Playlist end must be greater than playlist start') - if opts.extractaudio: - opts.audioformat = opts.audioformat.lower() - if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): - parser.error('invalid audio format specified') - if opts.audioquality: - opts.audioquality = opts.audioquality.strip('k').strip('K') - audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan - if audioquality is None or audioquality < 0: - parser.error('invalid audio quality specified') - if opts.recodevideo is not None: - opts.recodevideo = opts.recodevideo.replace(' ', '') - if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo): - parser.error('invalid video remux format specified') - if opts.remuxvideo is not None: - opts.remuxvideo = opts.remuxvideo.replace(' ', '') - if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo): - parser.error('invalid video remux format specified') - if opts.convertsubtitles is not None: - if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: - parser.error('invalid subtitle format specified') - if opts.convertthumbnails is not None: - if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: - parser.error('invalid thumbnail format specified') - if opts.cookiesfrombrowser is not None: - mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser) - if mobj is None: - parser.error(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') - browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') - browser_name = browser_name.lower() - if browser_name not in SUPPORTED_BROWSERS: - parser.error(f'unsupported browser specified for cookies: "{browser_name}". ' - f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') - if keyring is not None: - keyring = keyring.upper() - if keyring not in SUPPORTED_KEYRINGS: - parser.error(f'unsupported keyring specified for cookies: "{keyring}". ' - f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') - opts.cookiesfrombrowser = (browser_name, profile, keyring) - geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country - if geo_bypass_code is not None: - try: - GeoUtils.random_ipv4(geo_bypass_code) - except Exception: - parser.error('unsupported geo-bypass country or ip-block') - - if opts.date is not None: - date = DateRange.day(opts.date) - else: - date = DateRange(opts.dateafter, opts.datebefore) - - compat_opts = opts.compat_opts - - def report_conflict(arg1, arg2): - warnings.append(f'{arg2} is ignored since {arg1} was given') +def set_compat_opts(opts): def _unused_compat_opt(name): - if name not in compat_opts: + if name not in opts.compat_opts: return False - compat_opts.discard(name) - compat_opts.update(['*%s' % name]) + opts.compat_opts.discard(name) + opts.compat_opts.update(['*%s' % name]) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): attr = getattr(opts, opt_name) - if compat_name in compat_opts: + if compat_name in opts.compat_opts: if attr is None: setattr(opts, opt_name, not default) return True @@ -316,36 +134,140 @@ def _real_main(argv=None): set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') - if 'no-attach-info-json' in compat_opts: + if 'no-attach-info-json' in opts.compat_opts: if opts.embed_infojson: _unused_compat_opt('no-attach-info-json') else: opts.embed_infojson = False - if 'format-sort' in compat_opts: + if 'format-sort' in opts.compat_opts: opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') - outtmpl_default = opts.outtmpl.get('default') - if outtmpl_default == '': - outtmpl_default, opts.skip_download = None, True - del opts.outtmpl['default'] - if opts.useid: - if outtmpl_default is None: - outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s' - else: - report_conflict('--output', '--id') - if 'filename' in compat_opts: - if outtmpl_default is None: - outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s' + if 'filename' in opts.compat_opts: + if opts.outtmpl.get('default') is None: + opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'}) else: _unused_compat_opt('filename') + +def validate_options(opts): + def validate(cndn, name, value=None, msg=None): + if cndn: + return True + raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value)) + + def validate_in(name, value, items, msg=None): + return validate(value is None or value in items, name, value, msg) + + def validate_regex(name, value, regex): + return validate(value is None or re.match(regex, value), name, value) + + def validate_positive(name, value, strict=False): + return validate(value is None or value > 0 or (not strict and value == 0), + name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0')) + + def validate_minmax(min_val, max_val, min_name, max_name=None): + if max_val is None or min_val is None or max_val >= min_val: + return + if not max_name: + min_name, max_name = f'min {min_name}', f'max {min_name}' + raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') + + # Usernames and passwords + validate(not opts.usenetrc or (opts.username is None and opts.password is None), + '.netrc', msg='using {name} conflicts with giving username/password') + validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') + validate(opts.ap_password is None or opts.ap_username is not None, + 'TV Provider account username', msg='{name} missing') + validate_in('TV Provider', opts.ap_mso, MSO_INFO, + 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers') + + # Numbers + validate_positive('autonumber start', opts.autonumber_start) + validate_positive('autonumber size', opts.autonumber_size, True) + validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True) + validate_positive('playlist start', opts.playliststart, True) + if opts.playlistend != -1: + validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end') + + # Time ranges + validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles) + validate_positive('requests sleep interval', opts.sleep_interval_requests) + validate_positive('sleep interval', opts.sleep_interval) + validate_positive('max sleep interval', opts.max_sleep_interval) + if opts.sleep_interval is None: + validate( + opts.max_sleep_interval is None, 'min sleep interval', + msg='{name} must be specified; use --min-sleep-interval') + elif opts.max_sleep_interval is None: + opts.max_sleep_interval = opts.sleep_interval + else: + validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') + + if opts.wait_for_video is not None: + min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), + 'time range to wait for video', opts.wait_for_video) + validate_minmax(min_wait, max_wait, 'time range to wait for video') + opts.wait_for_video = (min_wait, max_wait) + + # Format sort + for f in opts.format_sort: + validate_regex('format sorting', f, InfoExtractor.FormatSort.regex) + + # Postprocessor formats + validate_in('audio format', opts.audioformat, ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS)) + validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) + validate_in('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS) + if opts.recodevideo is not None: + opts.recodevideo = opts.recodevideo.replace(' ', '') + validate_regex('video recode format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE) + if opts.remuxvideo is not None: + opts.remuxvideo = opts.remuxvideo.replace(' ', '') + validate_regex('video remux format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE) + if opts.audioquality: + opts.audioquality = opts.audioquality.strip('k').strip('K') + # int_or_none prevents inf, nan + validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0)) + + # Retries + def parse_retries(name, value): + if value is None: + return None + elif value in ('inf', 'infinite'): + return float('inf') + try: + return int(value) + except (TypeError, ValueError): + validate(False, f'{name} retry count', value) + + opts.retries = parse_retries('download', opts.retries) + opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) + opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) + opts.file_access_retries = parse_retries('file access', opts.file_access_retries) + + # Bytes + def parse_bytes(name, value): + if value is None: + return None + numeric_limit = FileDownloader.parse_bytes(value) + validate(numeric_limit is not None, 'rate limit', value) + return numeric_limit + + opts.ratelimit = parse_bytes('rate limit', opts.ratelimit) + opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit) + opts.min_filesize = parse_bytes('min filesize', opts.min_filesize) + opts.max_filesize = parse_bytes('max filesize', opts.max_filesize) + opts.buffersize = parse_bytes('buffer size', opts.buffersize) + opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size) + + # Output templates def validate_outtmpl(tmpl, msg): err = YoutubeDL.validate_outtmpl(tmpl) if err: - parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) + raise ValueError(f'invalid {msg} "{tmpl}": {err}') for k, tmpl in opts.outtmpl.items(): validate_outtmpl(tmpl, f'{k} output template') @@ -354,32 +276,62 @@ def _real_main(argv=None): validate_outtmpl(tmpl, f'{type_} print template') for type_, tmpl_list in opts.print_to_file.items(): for tmpl, file in tmpl_list: - validate_outtmpl(tmpl, f'{type_} print-to-file template') - validate_outtmpl(file, f'{type_} print-to-file filename') + validate_outtmpl(tmpl, f'{type_} print to file template') + validate_outtmpl(file, f'{type_} print to file filename') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') for k, tmpl in opts.progress_template.items(): k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' validate_outtmpl(tmpl, f'{k} template') - if opts.extractaudio and not opts.keepvideo and opts.format is None: - opts.format = 'bestaudio/best' - - if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: - parser.error('Cannot download a video and extract audio into the same' - ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' - ' template'.format(outtmpl_default)) + outtmpl_default = opts.outtmpl.get('default') + if outtmpl_default == '': + opts.skip_download = None + del opts.outtmpl['default'] + if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: + raise ValueError( + 'Cannot download a video and extract audio into the same file! ' + f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template') + + # Remove chapters + remove_chapters_patterns, opts.remove_ranges = [], [] + for regex in opts.remove_chapters or []: + if regex.startswith('*'): + dur = list(map(parse_duration, regex[1:].split('-'))) + if len(dur) == 2 and all(t is not None for t in dur): + opts.remove_ranges.append(tuple(dur)) + continue + raise ValueError(f'invalid --remove-chapters time range "{regex}". Must be of the form *start-end') + try: + remove_chapters_patterns.append(re.compile(regex)) + except re.error as err: + raise ValueError(f'invalid --remove-chapters regex "{regex}" - {err}') + opts.remove_chapters = remove_chapters_patterns - for f in opts.format_sort: - if re.match(InfoExtractor.FormatSort.regex, f) is None: - parser.error('invalid format sort string "%s" specified' % f) + # Cookies from browser + if opts.cookiesfrombrowser: + mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser) + if mobj is None: + raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') + browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') + browser_name = browser_name.lower() + if browser_name not in SUPPORTED_BROWSERS: + raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". ' + f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') + if keyring is not None: + keyring = keyring.upper() + if keyring not in SUPPORTED_KEYRINGS: + raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". ' + f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') + opts.cookiesfrombrowser = (browser_name, profile, keyring) + # MetadataParser def metadataparser_actions(f): if isinstance(f, str): cmd = '--parse-metadata %s' % compat_shlex_quote(f) try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: - parser.error(f'{cmd} is invalid; {err}') + raise ValueError(f'{cmd} is invalid; {err}') else: cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) @@ -388,162 +340,218 @@ def _real_main(argv=None): try: MetadataParserPP.validate_action(*action) except Exception as err: - parser.error(f'{cmd} is invalid; {err}') + raise ValueError(f'{cmd} is invalid; {err}') yield action - if opts.parse_metadata is None: - opts.parse_metadata = [] + parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - opts.parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata))) + parse_metadata.append('title:%s' % opts.metafromtitle) + opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) - any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json - or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail - or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + # Other options + geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country + if geo_bypass_code is not None: + try: + GeoUtils.random_ipv4(geo_bypass_code) + except Exception: + raise ValueError('unsupported geo-bypass country or ip-block') - any_printing = opts.print_json - download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive + opts.match_filter = match_filter_func(opts.match_filter) + + if opts.download_archive is not None: + opts.download_archive = expand_path(opts.download_archive) - # If JSON is not printed anywhere, but comments are requested, save it to file - printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json - if opts.getcomments and not printing_json: - opts.writeinfojson = True + if opts.user_agent is not None: + opts.headers.setdefault('User-Agent', opts.user_agent) + if opts.referer is not None: + opts.headers.setdefault('Referer', opts.referer) if opts.no_sponsorblock: - opts.sponsorblock_mark = set() - opts.sponsorblock_remove = set() - sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + opts.sponsorblock_mark = opts.sponsorblock_remove = set() - opts.remove_chapters = opts.remove_chapters or [] - - if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: - if opts.sponskrub: - if opts.remove_chapters: - report_conflict('--remove-chapters', '--sponskrub') - if opts.sponsorblock_mark: - report_conflict('--sponsorblock-mark', '--sponskrub') - if opts.sponsorblock_remove: - report_conflict('--sponsorblock-remove', '--sponskrub') - opts.sponskrub = False - if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: - report_conflict('--split-chapter', '--sponskrub-cut') - opts.sponskrub_cut = False - - if opts.remuxvideo and opts.recodevideo: - report_conflict('--recode-video', '--remux-video') - opts.remuxvideo = False - - if opts.allow_unplayable_formats: - def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): - val = getattr(opts, opt_name) - if (not allowed and val) or (allowed and not allowed(val)): - report_conflict('--allow-unplayable-formats', arg) - setattr(opts, opt_name, default) - - report_unplayable_conflict('extractaudio', '--extract-audio') - report_unplayable_conflict('remuxvideo', '--remux-video') - report_unplayable_conflict('recodevideo', '--recode-video') - report_unplayable_conflict('addmetadata', '--embed-metadata') - report_unplayable_conflict('addchapters', '--embed-chapters') - report_unplayable_conflict('embed_infojson', '--embed-info-json') - opts.embed_infojson = False - report_unplayable_conflict('embedsubtitles', '--embed-subs') - report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') - report_unplayable_conflict('xattrs', '--xattrs') - report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) - opts.fixup = 'never' - report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) - report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) - report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) - opts.sponskrub = False + warnings, deprecation_warnings = [], [] + + # Common mistake: -f best + if opts.format == 'best': + warnings.append('.\n '.join(( + '"-f best" selects the best pre-merged format which is often not the best option', + 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', + 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) + + # --(post-processor/downloader)-args without name + def report_args_compat(name, value, key1, key2=None): + if key1 in value and key2 not in value: + warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s') + return True + return False + + report_args_compat('external downloader', opts.external_downloader_args, 'default') + if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): + opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') + opts.postprocessor_args.setdefault('sponskrub', []) + + def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', + val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): + if val2 is NO_DEFAULT: + val2 = getattr(opts, opt2) + if not val2: + return + + if val1 is NO_DEFAULT: + val1 = getattr(opts, opt1) + if val1: + warnings.append(f'{arg1} is ignored since {arg2} was given') + setattr(opts, opt1, default) + + # Conflicting options + report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) + report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl')) + report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) + report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') + report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') + report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') + report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut) + + # Conflicts with --allow-unplayable-formats + report_conflict('--add-metadata', 'addmetadata') + report_conflict('--embed-chapters', 'addchapters') + report_conflict('--embed-info-json', 'embed_infojson') + report_conflict('--embed-subs', 'embedsubtitles') + report_conflict('--embed-thumbnail', 'embedthumbnail') + report_conflict('--extract-audio', 'extractaudio') + report_conflict('--fixup', 'fixup', val1=(opts.fixup or '').lower() in ('', 'never', 'ignore'), default='never') + report_conflict('--recode-video', 'recodevideo') + report_conflict('--remove-chapters', 'remove_chapters', default=[]) + report_conflict('--remux-video', 'remuxvideo') + report_conflict('--sponskrub', 'sponskrub') + report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) + report_conflict('--xattrs', 'xattrs') + + # Fully deprecated options + def report_deprecation(val, old, new=None): + if not val: + return + deprecation_warnings.append( + f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new + else f'{old} is deprecated and may not work as expected') + + report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') + report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') + # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future + # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future + # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + + # Dependent options + opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) + + if opts.exec_before_dl_cmd: + opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd + + if opts.useid: # --id is not deprecated in youtube-dl + opts.outtmpl['default'] = '%(id)s.%(ext)s' + + if opts.overwrites: # --force-overwrites implies --no-continue + opts.continue_dl = False if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + # Add chapters when adding metadata or marking sponsors opts.addchapters = True - # PostProcessors - postprocessors = list(opts.add_postprocessors) + if opts.extractaudio and not opts.keepvideo and opts.format is None: + # Do not unnecessarily download audio + opts.format = 'bestaudio/best' + + if opts.getcomments and opts.writeinfojson is None: + # If JSON is not printed anywhere, but comments are requested, save it to file + if not opts.dumpjson or opts.print_json or opts.dump_single_json: + opts.writeinfojson = True + + if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub): + # --all-sub automatically sets --write-sub if --write-auto-sub is not given + opts.writesubtitles = True + + if opts.addmetadata and opts.embed_infojson is None: + # If embedding metadata and infojson is present, embed it + opts.embed_infojson = 'if_exists' + + # Ask for passwords + if opts.username is not None and opts.password is None: + opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') + + return warnings, deprecation_warnings + + +def get_postprocessors(opts): + yield from opts.add_postprocessors + + if opts.parse_metadata: + yield { + 'key': 'MetadataParser', + 'actions': opts.parse_metadata, + 'when': 'pre_process' + } + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: - postprocessors.append({ + yield { 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - # Run this after filtering videos 'when': 'after_filter' - }) - if opts.parse_metadata: - postprocessors.append({ - 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - # Run this immediately after extraction is complete - 'when': 'pre_process' - }) + } if opts.convertsubtitles: - postprocessors.append({ + yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, - # Run this before the actual video download 'when': 'before_dl' - }) + } if opts.convertthumbnails: - postprocessors.append({ + yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, - # Run this before the actual video download 'when': 'before_dl' - }) + } if opts.extractaudio: - postprocessors.append({ + yield { 'key': 'FFmpegExtractAudio', 'preferredcodec': opts.audioformat, 'preferredquality': opts.audioquality, 'nopostoverwrites': opts.nopostoverwrites, - }) + } if opts.remuxvideo: - postprocessors.append({ + yield { 'key': 'FFmpegVideoRemuxer', 'preferedformat': opts.remuxvideo, - }) + } if opts.recodevideo: - postprocessors.append({ + yield { 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, - }) + } # If ModifyChapters is going to remove chapters, subtitles must already be in the container. if opts.embedsubtitles: - already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts - postprocessors.append({ + keep_subs = 'no-keep-subs' not in opts.compat_opts + yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': already_have_subtitle - }) - if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts: + 'already_have_subtitle': opts.writesubtitles and keep_subs + } + if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True - # --all-sub automatically sets --write-sub if --write-auto-sub is not given - # this was the old behaviour if only --all-sub was given. - if opts.allsubtitles and not opts.writeautomaticsub: - opts.writesubtitles = True + # ModifyChapters must run before FFmpegMetadataPP - remove_chapters_patterns, remove_ranges = [], [] - for regex in opts.remove_chapters: - if regex.startswith('*'): - dur = list(map(parse_duration, regex[1:].split('-'))) - if len(dur) == 2 and all(t is not None for t in dur): - remove_ranges.append(tuple(dur)) - continue - parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form *start-end') - try: - remove_chapters_patterns.append(re.compile(regex)) - except re.error as err: - parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') if opts.remove_chapters or sponsorblock_query: - postprocessors.append({ + yield { 'key': 'ModifyChapters', - 'remove_chapters_patterns': remove_chapters_patterns, + 'remove_chapters_patterns': opts.remove_chapters, 'remove_sponsor_segments': opts.sponsorblock_remove, - 'remove_ranges': remove_ranges, + 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts - }) + } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support # metadata (3gp, webm, etc.) @@ -551,21 +559,19 @@ def _real_main(argv=None): # source and target containers. From this point the container won't change, # so metadata can be added here. if opts.addmetadata or opts.addchapters or opts.embed_infojson: - if opts.embed_infojson is None: - opts.embed_infojson = 'if_exists' - postprocessors.append({ + yield { 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, - }) + } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found if opts.sponskrub is not False: - postprocessors.append({ + yield { 'key': 'SponSkrub', 'path': opts.sponskrub_path, 'args': opts.sponskrub_args, @@ -573,64 +579,57 @@ def _real_main(argv=None): 'force': opts.sponskrub_force, 'ignoreerror': opts.sponskrub is None, '_from_cli': True, - }) + } if opts.embedthumbnail: - postprocessors.append({ + yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding 'already_have_thumbnail': opts.writethumbnail - }) + } if not opts.writethumbnail: opts.writethumbnail = True opts.outtmpl['pl_thumbnail'] = '' if opts.split_chapters: - postprocessors.append({ + yield { 'key': 'FFmpegSplitChapters', 'force_keyframes': opts.force_keyframes_at_cuts, - }) + } # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: - postprocessors.append({'key': 'XAttrMetadata'}) + yield {'key': 'XAttrMetadata'} if opts.concat_playlist != 'never': - postprocessors.append({ + yield { 'key': 'FFmpegConcat', 'only_multi_video': opts.concat_playlist != 'always', 'when': 'playlist', - }) + } # Exec must be the last PP of each category - if opts.exec_before_dl_cmd: - opts.exec_cmd.setdefault('before_dl', opts.exec_before_dl_cmd) for when, exec_cmd in opts.exec_cmd.items(): - postprocessors.append({ + yield { 'key': 'Exec', 'exec_cmd': exec_cmd, - # Run this only after the files have been moved to their final locations 'when': when, - }) + } - def report_args_compat(arg, name): - warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name)) - if 'default' in opts.external_downloader_args: - report_args_compat('--downloader-args', 'external downloaders') +def parse_options(argv=None): + """ @returns (parser, opts, urls, ydl_opts) """ + parser, opts, urls = parseOpts(argv) + urls = get_urls(urls, opts.batchfile, opts.verbose) - if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args: - report_args_compat('--post-processor-args', 'post-processors') - opts.postprocessor_args.setdefault('sponskrub', []) - opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] + set_compat_opts(opts) + try: + warnings, deprecation_warnings = validate_options(opts) + except ValueError as err: + parser.error(f'{err}\n') - def report_deprecation(val, old, new=None): - if not val: - return - deprecation_warnings.append( - f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new - else f'{old} is deprecated and may not work as expected') + postprocessors = list(get_postprocessors(opts)) - report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') - report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') - report_deprecation(opts.include_ads, '--include-ads') - # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future - # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json + or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail + or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + + any_printing = opts.print_json final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS @@ -638,11 +637,7 @@ def _real_main(argv=None): else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best') else None) - match_filter = ( - None if opts.match_filter is None - else match_filter_func(opts.match_filter)) - - ydl_opts = { + return parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, 'username': opts.username, @@ -710,7 +705,7 @@ def _real_main(argv=None): 'playlistreverse': opts.playlist_reverse, 'playlistrandom': opts.playlist_random, 'noplaylist': opts.noplaylist, - 'logtostderr': outtmpl_default == '-', + 'logtostderr': opts.outtmpl.get('default') == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, @@ -746,11 +741,11 @@ def _real_main(argv=None): 'max_filesize': opts.max_filesize, 'min_views': opts.min_views, 'max_views': opts.max_views, - 'daterange': date, + 'daterange': opts.date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, - 'download_archive': download_archive_fn, + 'download_archive': opts.download_archive, 'break_on_existing': opts.break_on_existing, 'break_on_reject': opts.break_on_reject, 'break_per_url': opts.break_per_url, @@ -791,7 +786,7 @@ def _real_main(argv=None): 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, - 'match_filter': match_filter, + 'match_filter': opts.match_filter, 'no_color': opts.no_color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, @@ -806,9 +801,30 @@ def _real_main(argv=None): 'geo_bypass_ip_block': opts.geo_bypass_ip_block, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, - 'compat_opts': compat_opts, + 'compat_opts': opts.compat_opts, } + +def _real_main(argv=None): + # Compatibility fixes for Windows + if sys.platform == 'win32': + # https://github.com/ytdl-org/youtube-dl/issues/820 + codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None) + + workaround_optparse_bug9161() + + setproctitle('yt-dlp') + + parser, opts, all_urls, ydl_opts = parse_options(argv) + + # Dump user agent + if opts.dump_user_agent: + ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) + write_string(f'{ua}\n', out=sys.stdout) + + if print_extractor_information(opts, all_urls): + sys.exit(0) + with YoutubeDL(ydl_opts) as ydl: actual_use = all_urls or opts.load_info_filename @@ -851,4 +867,10 @@ def main(argv=None): sys.exit(f'\nERROR: {e}') -__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] +__all__ = [ + 'main', + 'YoutubeDL', + 'parse_options', + 'gen_extractors', + 'list_extractors', +] diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 2bc6a6b7f..0a0d3b351 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -170,6 +170,13 @@ except ImportError: except ImportError: compat_pycrypto_AES = None +try: + import brotlicffi as compat_brotli +except ImportError: + try: + import brotli as compat_brotli + except ImportError: + compat_brotli = None WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None @@ -258,6 +265,7 @@ __all__ = [ 'compat_asyncio_run', 'compat_b64decode', 'compat_basestring', + 'compat_brotli', 'compat_chr', 'compat_collections_abc', 'compat_cookiejar', diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 3a949d38a..afd2f2e38 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -159,7 +159,7 @@ class FileDownloader(object): return int(round(number * multiplier)) def to_screen(self, *args, **kargs): - self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) def to_stderr(self, message): self.ydl.to_stderr(message) @@ -277,9 +277,9 @@ class FileDownloader(object): elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): - self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) + self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines) else: - self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet')) self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') def _finish_multiline_status(self): diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index ef4205edc..b28d1ec17 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -22,6 +22,9 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': + self.report_warning('Live chat download runs until the livestream ends. ' + 'If you wish to download the video simultaneously, run a separate yt-dlp instance') fragment_retries = self.params.get('fragment_retries', 0) test = self.params.get('test', False) diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 66b12c72f..360fa4699 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -8,10 +8,6 @@ import struct from base64 import urlsafe_b64encode from binascii import unhexlify -import typing -if typing.TYPE_CHECKING: - from ..YoutubeDL import YoutubeDL - from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..compat import ( @@ -36,15 +32,15 @@ from ..utils import ( # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) -def add_opener(self: 'YoutubeDL', handler): +def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - assert isinstance(self._opener, compat_urllib_request.OpenerDirector) - self._opener.add_handler(handler) + assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) + ydl._opener.add_handler(handler) -def remove_opener(self: 'YoutubeDL', handler): +def remove_opener(ydl, handler): ''' Remove handler(s) for opening URLs @param handler Either handler object itself or handler type. @@ -52,8 +48,8 @@ def remove_opener(self: 'YoutubeDL', handler): ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - opener = self._opener - assert isinstance(self._opener, compat_urllib_request.OpenerDirector) + opener = ydl._opener + assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) if isinstance(handler, (type, tuple)): find_cp = lambda x: isinstance(x, handler) else: diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index bebcafa6b..f0eba8844 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1345,6 +1345,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'Suddenlink': { + 'name': 'Suddenlink', + 'username_field': 'username', + 'password_field': 'password', + }, } @@ -1636,6 +1641,52 @@ class AdobePassIE(InfoExtractor): query=hidden_data) post_form(mvpd_confirm_page_res, 'Confirming Login') + elif mso_id == 'Suddenlink': + # Suddenlink is similar to SlingTV in using a tab history count and a meta refresh, + # but they also do a dynmaic redirect using javascript that has to be followed as well + first_bookend_page, urlh = post_form( + provider_redirect_page_res, 'Pressing Continue...') + + hidden_data = self._hidden_inputs(first_bookend_page) + hidden_data['history_val'] = 1 + + provider_login_redirect_page = self._download_webpage( + urlh.geturl(), video_id, 'Sending First Bookend', + query=hidden_data) + + provider_tryauth_url = self._html_search_regex( + r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl') + + provider_tryauth_page = self._download_webpage( + provider_tryauth_url, video_id, 'Submitting TryAuth', + query=hidden_data) + + provider_login_page_res = self._download_webpage_handle( + f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}', + video_id, 'Getting Login Page', + query=hidden_data) + + provider_association_redirect, urlh = post_form( + provider_login_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password + }) + + provider_refresh_redirect_url = extract_redirect_url( + provider_association_redirect, url=urlh.geturl()) + + last_bookend_page, urlh = self._download_webpage_handle( + provider_refresh_redirect_url, video_id, + 'Downloading Auth Association Redirect Page') + + hidden_data = self._hidden_inputs(last_bookend_page) + hidden_data['history_val'] = 3 + + mvpd_confirm_page_res = self._download_webpage_handle( + urlh.geturl(), video_id, 'Sending Final Bookend', + query=hidden_data) + + post_form(mvpd_confirm_page_res, 'Confirming Login') else: # Some providers (e.g. DIRECTV NOW) have another meta refresh # based redirect that should be followed. diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index 7d70e0427..1075b461e 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -97,8 +97,8 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage)) if not embed_urls: raise ExtractorError('no videos found for %s' % video_id, expected=True) - return self.url_result_or_playlist_from_matches( - embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(), + return self.playlist_from_matches( + embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 4ad5d6ddd..7ea339b39 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -407,8 +407,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): (?:(?:beta|www)\.)?ardmediathek\.de/ (?:(?P<client>[^/]+)/)? (?:player|live|video|(?P<playlist>sendung|sammlung))/ - (?:(?P<display_id>[^?#]+)/)? - (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)''' + (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? + (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) + (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' _TESTS = [{ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', @@ -437,6 +438,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'upload_date': '20211108', }, }, { + 'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1', + 'playlist_count': 6, + 'info_dict': { + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw', + 'title': 'beforeigners/beforeigners/staffel-1', + }, + }, { 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'only_matching': True, }, { @@ -561,14 +569,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): break pageNumber = pageNumber + 1 - return self.playlist_result(entries, playlist_title=display_id) + return self.playlist_result(entries, playlist_id, playlist_title=display_id) def _real_extract(self, url): - video_id, display_id, playlist_type, client = self._match_valid_url(url).group( - 'id', 'display_id', 'playlist', 'client') + video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group( + 'id', 'display_id', 'playlist', 'client', 'season') display_id, client = display_id or video_id, client or 'ard' if playlist_type: + # TODO: Extract only specified season return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) player_page = self._download_json( diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index ea98f8688..9dbaabfa0 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -1,17 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime - from .common import InfoExtractor from ..utils import ( clean_html, - extract_timezone, int_or_none, parse_duration, parse_resolution, try_get, + unified_timestamp, url_or_none, ) @@ -95,14 +92,8 @@ class CCMAIE(InfoExtractor): duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) tematica = try_get(informacio, lambda x: x['tematica']['text']) - timestamp = None data_utc = try_get(informacio, lambda x: x['data_emissio']['utc']) - try: - timezone, data_utc = extract_timezone(data_utc) - timestamp = calendar.timegm((datetime.datetime.strptime( - data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple()) - except TypeError: - pass + timestamp = unified_timestamp(data_utc) subtitles = {} subtitols = media.get('subtitols') or [] diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f86e7cb3e..354814433 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -226,6 +226,7 @@ class InfoExtractor(object): The following fields are optional: + direct: True if a direct video file was given (must only be set by GenericIE) alt_title: A secondary title of the video. display_id An alternative identifier for the video, not necessarily unique, but available before title. Typically, id is @@ -274,7 +275,7 @@ class InfoExtractor(object): * "url": A URL pointing to the subtitles file It can optionally also have: * "name": Name or description of the subtitles - * http_headers: A dictionary of additional HTTP headers + * "http_headers": A dictionary of additional HTTP headers to add to the request. "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated @@ -425,8 +426,8 @@ class InfoExtractor(object): title, description etc. - Subclasses of this one should re-define the _real_initialize() and - _real_extract() methods and define a _VALID_URL regexp. + Subclasses of this should define a _VALID_URL regexp and, re-define the + _real_extract() and (optionally) _real_initialize() methods. Probably, they should also be added to the list of extractors. Subclasses may also override suitable() if necessary, but ensure the function @@ -661,7 +662,7 @@ class InfoExtractor(object): return False def set_downloader(self, downloader): - """Sets the downloader for this IE.""" + """Sets a YoutubeDL instance as the downloader for this IE.""" self._downloader = downloader def _real_initialize(self): @@ -670,7 +671,7 @@ class InfoExtractor(object): def _real_extract(self, url): """Real extraction process. Redefine in subclasses.""" - pass + raise NotImplementedError('This method must be implemented by subclasses') @classmethod def ie_key(cls): @@ -749,7 +750,7 @@ class InfoExtractor(object): errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) if fatal: - raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) + raise ExtractorError(errmsg, cause=err) else: self.report_warning(errmsg) return False @@ -1661,31 +1662,31 @@ class InfoExtractor(object): 'format_id': {'type': 'alias', 'field': 'id'}, 'preference': {'type': 'alias', 'field': 'ie_pref'}, 'language_preference': {'type': 'alias', 'field': 'lang'}, - - # Deprecated - 'dimension': {'type': 'alias', 'field': 'res'}, - 'resolution': {'type': 'alias', 'field': 'res'}, - 'extension': {'type': 'alias', 'field': 'ext'}, - 'bitrate': {'type': 'alias', 'field': 'br'}, - 'total_bitrate': {'type': 'alias', 'field': 'tbr'}, - 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, - 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, - 'framerate': {'type': 'alias', 'field': 'fps'}, - 'protocol': {'type': 'alias', 'field': 'proto'}, 'source_preference': {'type': 'alias', 'field': 'source'}, + 'protocol': {'type': 'alias', 'field': 'proto'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, - 'filesize_estimate': {'type': 'alias', 'field': 'size'}, - 'samplerate': {'type': 'alias', 'field': 'asr'}, - 'video_ext': {'type': 'alias', 'field': 'vext'}, - 'audio_ext': {'type': 'alias', 'field': 'aext'}, - 'video_codec': {'type': 'alias', 'field': 'vcodec'}, - 'audio_codec': {'type': 'alias', 'field': 'acodec'}, - 'video': {'type': 'alias', 'field': 'hasvid'}, - 'has_video': {'type': 'alias', 'field': 'hasvid'}, - 'audio': {'type': 'alias', 'field': 'hasaud'}, - 'has_audio': {'type': 'alias', 'field': 'hasaud'}, - 'extractor': {'type': 'alias', 'field': 'ie_pref'}, - 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'}, + + # Deprecated + 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}, + 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}, + 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}, + 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}, + 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}, + 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}, + 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}, + 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}, + 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}, + 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}, + 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}, + 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}, + 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, + 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, } def __init__(self, ie, field_preference): @@ -1785,7 +1786,7 @@ class InfoExtractor(object): continue if self._get_field_setting(field, 'type') == 'alias': alias, field = field, self._get_field_setting(field, 'field') - if alias not in ('format_id', 'preference', 'language_preference'): + if self._get_field_setting(alias, 'deprecated'): self.ydl.deprecation_warning( f'Format sorting alias {alias} is deprecated ' f'and may be removed in a future version. Please use {field} instead') diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 5448acf01..09b795c56 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -520,6 +520,7 @@ from .foxnews import ( FoxNewsArticleIE, ) from .foxsports import FoxSportsIE +from .fptplay import FptplayIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE from .francetv import ( @@ -848,6 +849,7 @@ from .microsoftvirtualacademy import ( from .mildom import ( MildomIE, MildomVodIE, + MildomClipIE, MildomUserVodIE, ) from .minds import ( @@ -1150,6 +1152,11 @@ from .palcomp3 import ( PalcoMP3VideoIE, ) from .pandoratv import PandoraTVIE +from .panopto import ( + PanoptoIE, + PanoptoListIE, + PanoptoPlaylistIE +) from .paramountplus import ( ParamountPlusIE, ParamountPlusSeriesIE, @@ -1218,6 +1225,7 @@ from .podomatic import PodomaticIE from .pokemon import ( PokemonIE, PokemonWatchIE, + PokemonSoundLibraryIE, ) from .pokergo import ( PokerGoIE, @@ -2010,6 +2018,7 @@ from .ximalaya import ( XimalayaIE, XimalayaAlbumIE ) +from .xinpianchang import XinpianchangIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index d39dcc058..ef57b221c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -18,6 +18,7 @@ from ..utils import ( ExtractorError, float_or_none, get_element_by_id, + get_first, int_or_none, js_to_json, merge_dicts, @@ -405,11 +406,9 @@ class FacebookIE(InfoExtractor): ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or [] if str(m.get('id')) == video_id and m.get('__typename') == 'Video'] - title = traverse_obj(media, (..., 'title', 'text'), get_all=False) - description = traverse_obj(media, ( - ..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False) - uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False) - or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {}) + title = get_first(media, ('title', 'text')) + description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) + uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} page_title = title or self._html_search_regex(( r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>', diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py new file mode 100644 index 000000000..a34e90bb1 --- /dev/null +++ b/yt_dlp/extractor/fptplay.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import time +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + join_nonempty, +) + + +class FptplayIE(InfoExtractor): + _VALID_URL = r'https?://fptplay\.vn/(?P<type>xem-video)/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>[^/]+)?/?(?:[?#]|$)|)' + _GEO_COUNTRIES = ['VN'] + IE_NAME = 'fptplay' + IE_DESC = 'fptplay.vn' + _TESTS = [{ + 'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945', + 'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33', + 'info_dict': { + 'id': '621a123016f369ebbde55945', + 'ext': 'mp4', + 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love', + 'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c', + }, + }, { + 'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3', + 'md5': 'b35be968c909b3e4e1e20ca45dd261b1', + 'info_dict': { + 'id': '61f3aa8a6b3b1d2e73c60eb5', + 'ext': 'mp4', + 'title': 'Má Tôi Là Đại Gia - 3', + 'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c', + }, + }, { + 'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0', + 'only_matching': True, + }] + + def _real_extract(self, url): + type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode') + webpage = self._download_webpage(url, video_id=video_id, fatal=False) + info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4') + self._sort_formats(formats) + return { + 'id': video_id, + 'title': join_nonempty( + self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '), + 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage), + 'formats': formats, + 'subtitles': subtitles, + } + + def get_api_with_st_token(self, video_id, episode): + path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip' + timestamp = int(time.time()) + 10800 + + t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper() + r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' + n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)] + + def convert(e): + t = '' + n = 0 + i = [0, 0, 0] + a = [0, 0, 0, 0] + s = len(e) + c = 0 + for z in range(s, 0, -1): + if n <= 3: + i[n] = e[c] + n += 1 + c += 1 + if 3 == n: + a[0] = (252 & i[0]) >> 2 + a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4) + a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6) + a[3] = (63 & i[2]) + for v in range(4): + t += r[a[v]] + n = 0 + if n: + for o in range(n, 3): + i[o] = 0 + + for o in range(n + 1): + a[0] = (252 & i[0]) >> 2 + a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4) + a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6) + a[3] = (63 & i[2]) + t += r[a[o]] + n += 1 + while n < 3: + t += '' + n += 1 + return t + + st_token = convert(n).replace('+', '-').replace('/', '_').replace('=', '') + return f'https://api.fptplay.net{path}?{urllib.parse.urlencode({"st": st_token, "e": timestamp})}' diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 40b8cb0b4..0d29da29b 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE): entries = [] for lesson in lessons: lesson_name = lesson.get('slug') - if not lesson_name: - continue lesson_id = lesson.get('hash') or lesson.get('statsId') + if not lesson_id or not lesson_name: + continue entries.append(self._extract_lesson(chapters, lesson_id, lesson)) title = course.get('title') diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 0ddd050ff..6a8b8543b 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -146,6 +146,7 @@ from .tvp import TVPEmbedIE from .blogger import BloggerIE from .mainstreaming import MainStreamingIE from .gfycat import GfycatIE +from .panopto import PanoptoBaseIE class GenericIE(InfoExtractor): @@ -2498,6 +2499,15 @@ class GenericIE(InfoExtractor): 'id': '?vid=2295' }, 'playlist_count': 9 + }, + { + # Panopto embeds + 'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video', + 'info_dict': { + 'title': 'Insert a quiz into a Panopto video', + 'id': 'insert-a-quiz-into-a-panopto-video' + }, + 'playlist_count': 1 } ] @@ -3723,6 +3733,9 @@ class GenericIE(InfoExtractor): if gfycat_urls: return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key()) + panopto_urls = PanoptoBaseIE._extract_urls(webpage) + if panopto_urls: + return self.playlist_from_matches(panopto_urls, video_id, video_title) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') if entries: diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index cab3aa045..4ac70ea57 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -13,12 +13,15 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + try_get, + url_or_none, ) class MGTVIE(InfoExtractor): _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' + IE_NAME = 'MangoTV' _TESTS = [{ 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html', @@ -31,6 +34,32 @@ class MGTVIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, { + 'url': 'https://w.mgtv.com/b/427837/15588271.html', + 'info_dict': { + 'id': '15588271', + 'ext': 'mp4', + 'title': '春日迟迟再出发 沉浸版', + 'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6', + 'thumbnail': r're:^https?://.+\.jpg', + 'duration': 4026, + }, + }, { + 'url': 'https://w.mgtv.com/b/333652/7329822.html', + 'info_dict': { + 'id': '7329822', + 'ext': 'mp4', + 'title': '拜托,请你爱我', + 'description': 'md5:cd81be6499bafe32e4d143abd822bf9c', + 'thumbnail': r're:^https?://.+\.jpg', + 'duration': 2656, + }, + }, { + 'url': 'https://w.mgtv.com/b/427837/15591647.html', + 'only_matching': True, + }, { + 'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home', + 'only_matching': True, + }, { 'url': 'http://www.mgtv.com/b/301817/3826653.html', 'only_matching': True, }, { @@ -40,12 +69,14 @@ class MGTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1] + tk2 = base64.urlsafe_b64encode( + f'did={compat_str(uuid.uuid4()).encode()}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1] try: api_data = self._download_json( 'https://pcweb.api.mgtv.com/player/video', video_id, query={ 'tk2': tk2, 'video_id': video_id, + 'type': 'pch5' }, headers=self.geo_verification_headers())['data'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: @@ -61,6 +92,7 @@ class MGTVIE(InfoExtractor): 'pm2': api_data['atc']['pm2'], 'tk2': tk2, 'video_id': video_id, + 'src': 'intelmgtv', }, headers=self.geo_verification_headers())['data'] stream_domain = stream_data['stream_domain'][0] @@ -71,7 +103,7 @@ class MGTVIE(InfoExtractor): continue format_data = self._download_json( stream_domain + stream_path, video_id, - note='Download video info for format #%d' % idx) + note=f'Download video info for format #{idx}') format_url = format_data.get('info') if not format_url: continue @@ -79,7 +111,7 @@ class MGTVIE(InfoExtractor): r'_(\d+)_mp4/', format_url, 'tbr', default=None)) formats.append({ 'format_id': compat_str(tbr or idx), - 'url': format_url, + 'url': url_or_none(format_url), 'ext': 'mp4', 'tbr': tbr, 'protocol': 'm3u8_native', @@ -97,4 +129,25 @@ class MGTVIE(InfoExtractor): 'description': info.get('desc'), 'duration': int_or_none(info.get('duration')), 'thumbnail': info.get('thumb'), + 'subtitles': self.extract_subtitles(video_id, stream_domain), } + + def _get_subtitles(self, video_id, domain): + info = self._download_json(f'https://pcweb.api.mgtv.com/video/title?videoId={video_id}', + video_id, fatal=False) or {} + subtitles = {} + for sub in try_get(info, lambda x: x['data']['title']) or []: + url_sub = sub.get('url') + if not url_sub: + continue + locale = sub.get('captionCountrySimpleName') + sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False, + note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {} + sub_url = url_or_none(sub.get('info')) + if not sub_url: + continue + subtitles.setdefault(locale or 'en', []).append({ + 'url': sub_url, + 'ext': 'srt' + }) + return subtitles diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index b5a2e17f2..5f2df29c6 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -1,102 +1,42 @@ # coding: utf-8 from __future__ import unicode_literals -import base64 -from datetime import datetime -import itertools +import functools import json from .common import InfoExtractor from ..utils import ( - update_url_query, - random_uuidv4, - try_get, + determine_ext, + dict_get, + ExtractorError, float_or_none, - dict_get -) -from ..compat import ( - compat_str, + OnDemandPagedList, + random_uuidv4, + traverse_obj, ) class MildomBaseIE(InfoExtractor): _GUEST_ID = None - _DISPATCHER_CONFIG = None - - def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', init=False): - query = query or {} - if query: - query['__platform'] = 'web' - url = update_url_query(url, self._common_queries(query, init=init)) - content = self._download_json(url, video_id, note=note) - if content['code'] == 0: - return content['body'] - else: - self.raise_no_formats( - f'Video not found or premium content. {content["code"]} - {content["message"]}', - expected=True) - def _common_queries(self, query={}, init=False): - dc = self._fetch_dispatcher_config() - r = { - 'timestamp': self.iso_timestamp(), - '__guest_id': '' if init else self.guest_id(), - '__location': dc['location'], - '__country': dc['country'], - '__cluster': dc['cluster'], - '__platform': 'web', - '__la': self.lang_code(), - '__pcv': 'v2.9.44', - 'sfr': 'pc', - 'accessToken': '', - } - r.update(query) - return r - - def _fetch_dispatcher_config(self): - if not self._DISPATCHER_CONFIG: - tmp = self._download_json( - 'https://disp.mildom.com/serverListV2', 'initialization', - note='Downloading dispatcher_config', data=json.dumps({ - 'protover': 0, - 'data': base64.b64encode(json.dumps({ - 'fr': 'web', - 'sfr': 'pc', - 'devi': 'Windows', - 'la': 'ja', - 'gid': None, - 'loc': '', - 'clu': '', - 'wh': '1919*810', - 'rtm': self.iso_timestamp(), - 'ua': self.get_param('http_headers')['User-Agent'], - }).encode('utf8')).decode('utf8').replace('\n', ''), - }).encode('utf8')) - self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization') - return self._DISPATCHER_CONFIG - - @staticmethod - def iso_timestamp(): - 'new Date().toISOString()' - return datetime.utcnow().isoformat()[0:-3] + 'Z' - - def guest_id(self): - 'getGuestId' - if self._GUEST_ID: - return self._GUEST_ID - self._GUEST_ID = try_get( - self, ( - lambda x: x._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization', - note='Downloading guest token', init=True)['guest_id'] or None, - lambda x: x._get_cookies('https://www.mildom.com').get('gid').value, - lambda x: x._get_cookies('https://m.mildom.com').get('gid').value, - ), compat_str) or '' - return self._GUEST_ID - - def lang_code(self): - 'getCurrentLangCode' - return 'ja' + def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): + if not self._GUEST_ID: + self._GUEST_ID = f'pc-gp-{random_uuidv4()}' + + content = self._download_json( + url, video_id, note=note, data=json.dumps(body).encode() if body else None, + headers={'Content-Type': 'application/json'} if body else {}, + query={ + '__guest_id': self._GUEST_ID, + '__platform': 'web', + **(query or {}), + }) + + if content['code'] != 0: + raise ExtractorError( + f'Mildom says: {content["message"]} (code {content["code"]})', + expected=True) + return content['body'] class MildomIE(MildomBaseIE): @@ -106,31 +46,13 @@ class MildomIE(MildomBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - url = 'https://www.mildom.com/%s' % video_id - - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id) enterstudio = self._call_api( 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id, note='Downloading live metadata', query={'user_id': video_id}) result_video_id = enterstudio.get('log_id', video_id) - title = try_get( - enterstudio, ( - lambda x: self._html_search_meta('twitter:description', webpage), - lambda x: x['anchor_intro'], - ), compat_str) - description = try_get( - enterstudio, ( - lambda x: x['intro'], - lambda x: x['live_intro'], - ), compat_str) - uploader = try_get( - enterstudio, ( - lambda x: self._html_search_meta('twitter:title', webpage), - lambda x: x['loginname'], - ), compat_str) - servers = self._call_api( 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id, note='Downloading live server list', query={ @@ -138,17 +60,20 @@ class MildomIE(MildomBaseIE): 'live_server_type': 'hls', }) - stream_query = self._common_queries({ - 'streamReqId': random_uuidv4(), - 'is_lhls': '0', - }) - m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query) - formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={ - 'Referer': 'https://www.mildom.com/', - 'Origin': 'https://www.mildom.com', - }, note='Downloading m3u8 information') - - del stream_query['streamReqId'], stream_query['timestamp'] + playback_token = self._call_api( + 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id, + note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'}) + playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False) + if not playback_token: + raise ExtractorError('Failed to obtain live playback token') + + formats = self._extract_m3u8_formats( + f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}', + result_video_id, 'mp4', headers={ + 'Referer': 'https://www.mildom.com/', + 'Origin': 'https://www.mildom.com', + }) + for fmt in formats: fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/' @@ -156,10 +81,10 @@ class MildomIE(MildomBaseIE): return { 'id': result_video_id, - 'title': title, - 'description': description, + 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'), + 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str), 'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000), - 'uploader': uploader, + 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'), 'uploader_id': video_id, 'formats': formats, 'is_live': True, @@ -168,7 +93,7 @@ class MildomIE(MildomBaseIE): class MildomVodIE(MildomBaseIE): IE_NAME = 'mildom:vod' - IE_DESC = 'Download a VOD in Mildom' + IE_DESC = 'VOD in Mildom' _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)' _TESTS = [{ 'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269', @@ -215,11 +140,8 @@ class MildomVodIE(MildomBaseIE): }] def _real_extract(self, url): - m = self._match_valid_url(url) - user_id, video_id = m.group('user_id'), m.group('id') - url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id) - - webpage = self._download_webpage(url, video_id) + user_id, video_id = self._match_valid_url(url).group('user_id', 'id') + webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id) autoplay = self._call_api( 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id, @@ -227,20 +149,6 @@ class MildomVodIE(MildomBaseIE): 'v_id': video_id, })['playback'] - title = try_get( - autoplay, ( - lambda x: self._html_search_meta('og:description', webpage), - lambda x: x['title'], - ), compat_str) - description = try_get( - autoplay, ( - lambda x: x['video_intro'], - ), compat_str) - uploader = try_get( - autoplay, ( - lambda x: x['author_info']['login_name'], - ), compat_str) - formats = [{ 'url': autoplay['audio_url'], 'format_id': 'audio', @@ -265,17 +173,81 @@ class MildomVodIE(MildomBaseIE): return { 'id': video_id, - 'title': title, - 'description': description, - 'timestamp': float_or_none(autoplay['publish_time'], scale=1000), - 'duration': float_or_none(autoplay['video_length'], scale=1000), + 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'), + 'description': traverse_obj(autoplay, 'video_intro'), + 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000), + 'duration': float_or_none(autoplay.get('video_length'), scale=1000), 'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')), - 'uploader': uploader, + 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')), 'uploader_id': user_id, 'formats': formats, } +class MildomClipIE(MildomBaseIE): + IE_NAME = 'mildom:clip' + IE_DESC = 'Clip in Mildom' + _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9', + 'info_dict': { + 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9', + 'title': '全然違ったよ', + 'timestamp': 1619181890, + 'duration': 59, + 'thumbnail': r're:https?://.+', + 'uploader': 'ざきんぽ', + 'uploader_id': '10042245', + }, + }, { + 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864', + 'info_dict': { + 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864', + 'title': 'かっこいい', + 'timestamp': 1621094003, + 'duration': 59, + 'thumbnail': r're:https?://.+', + 'uploader': '(ルーキー', + 'uploader_id': '10111524', + }, + }, { + 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', + 'info_dict': { + 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', + 'title': 'あ', + 'timestamp': 1614769431, + 'duration': 31, + 'thumbnail': r're:https?://.+', + 'uploader': 'ドルゴルスレンギーン=ダグワドルジ', + 'uploader_id': '10660174', + }, + }] + + def _real_extract(self, url): + user_id, video_id = self._match_valid_url(url).group('user_id', 'id') + webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id) + + clip_detail = self._call_api( + 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id, + note='Downloading playback metadata', query={ + 'clip_id': video_id, + }) + + return { + 'id': video_id, + 'title': self._html_search_meta( + ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'), + 'timestamp': float_or_none(clip_detail.get('create_time')), + 'duration': float_or_none(clip_detail.get('length')), + 'thumbnail': clip_detail.get('cover'), + 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')), + 'uploader_id': user_id, + + 'url': clip_detail['url'], + 'ext': determine_ext(clip_detail.get('url'), 'mp4'), + } + + class MildomUserVodIE(MildomBaseIE): IE_NAME = 'mildom:user:vod' IE_DESC = 'Download all VODs from specific user in Mildom' @@ -286,29 +258,32 @@ class MildomUserVodIE(MildomBaseIE): 'id': '10093333', 'title': 'Uploads from ねこばたけ', }, - 'playlist_mincount': 351, + 'playlist_mincount': 732, }, { 'url': 'https://www.mildom.com/profile/10882672', 'info_dict': { 'id': '10882672', 'title': 'Uploads from kson組長(けいそん)', }, - 'playlist_mincount': 191, + 'playlist_mincount': 201, }] - def _entries(self, user_id): - for page in itertools.count(1): - reply = self._call_api( - 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList', - user_id, note='Downloading page %d' % page, query={ - 'user_id': user_id, - 'page': page, - 'limit': '30', - }) - if not reply: - break - for x in reply: - yield self.url_result('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id'])) + def _fetch_page(self, user_id, page): + page += 1 + reply = self._call_api( + 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList', + user_id, note=f'Downloading page {page}', query={ + 'user_id': user_id, + 'page': page, + 'limit': '30', + }) + if not reply: + return + for x in reply: + v_id = x.get('v_id') + if not v_id: + continue + yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}') def _real_extract(self, url): user_id = self._match_id(url) @@ -319,4 +294,5 @@ class MildomUserVodIE(MildomBaseIE): query={'user_id': user_id}, note='Downloading user profile')['user_info'] return self.playlist_result( - self._entries(user_id), user_id, 'Uploads from %s' % profile['loginname']) + OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30), + user_id, f'Uploads from {profile["loginname"]}') diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 49d58a685..4d723e886 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -8,6 +8,7 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + compat_HTTPError, determine_ext, ExtractorError, int_or_none, @@ -147,10 +148,14 @@ class NRKIE(NRKBaseIE): def _real_extract(self, url): video_id = self._match_id(url).split('/')[-1] - path_templ = 'playback/%s/program/' + video_id - def call_playback_api(item, query=None): - return self._call_api(path_templ % item, video_id, item, query=query) + try: + return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query) + raise + # known values for preferredCdn: akamai, iponly, minicdn and telenor manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'}) diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 0525b4830..b476c0986 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -4,10 +4,11 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( ExtractorError, + get_first, int_or_none, traverse_obj, unified_strdate, - unified_timestamp + unified_timestamp, ) from ..compat import compat_str @@ -19,42 +20,34 @@ class OpenRecBaseIE(InfoExtractor): def _extract_movie(self, webpage, video_id, name, is_live): window_stores = self._extract_pagestore(webpage, video_id) - movie_store = traverse_obj( - window_stores, - ('v8', 'state', 'movie'), - ('v8', 'movie'), - expected_type=dict) - if not movie_store: + movie_stores = [ + # extract all three important data (most of data are duplicated each other, but slightly different!) + traverse_obj(window_stores, ('v8', 'state', 'movie'), expected_type=dict), + traverse_obj(window_stores, ('v8', 'movie'), expected_type=dict), + traverse_obj(window_stores, 'movieStore', expected_type=dict), + ] + if not any(movie_stores): raise ExtractorError(f'Failed to extract {name} info') - title = movie_store.get('title') - description = movie_store.get('introduction') - thumbnail = movie_store.get('thumbnailUrl') - - uploader = traverse_obj(movie_store, ('channel', 'user', 'name'), expected_type=compat_str) - uploader_id = traverse_obj(movie_store, ('channel', 'user', 'id'), expected_type=compat_str) - - timestamp = int_or_none(traverse_obj(movie_store, ('publishedAt', 'time')), scale=1000) - - m3u8_playlists = movie_store.get('media') or {} + m3u8_playlists = get_first(movie_stores, 'media') or {} formats = [] for name, m3u8_url in m3u8_playlists.items(): if not m3u8_url: continue formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id='hls-%s' % name)) + m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id=name)) self._sort_formats(formats) return { 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, + 'title': get_first(movie_stores, 'title'), + 'description': get_first(movie_stores, 'introduction'), + 'thumbnail': get_first(movie_stores, 'thumbnailUrl'), 'formats': formats, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'timestamp': timestamp, + 'uploader': get_first(movie_stores, ('channel', 'user', 'name')), + 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')), + 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')), 'is_live': is_live, } @@ -72,7 +65,7 @@ class OpenRecIE(OpenRecBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.openrec.tv/live/{video_id}', video_id) return self._extract_movie(webpage, video_id, 'live', True) @@ -96,7 +89,7 @@ class OpenRecCaptureIE(OpenRecBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.openrec.tv/capture/{video_id}', video_id) window_stores = self._extract_pagestore(webpage, video_id) movie_store = window_stores.get('movie') @@ -104,15 +97,6 @@ class OpenRecCaptureIE(OpenRecBaseIE): capture_data = window_stores.get('capture') if not capture_data: raise ExtractorError('Cannot extract title') - title = capture_data.get('title') - thumbnail = capture_data.get('thumbnailUrl') - upload_date = unified_strdate(capture_data.get('createdAt')) - - uploader = traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str) - uploader_id = traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str) - - timestamp = traverse_obj(movie_store, 'createdAt', expected_type=compat_str) - timestamp = unified_timestamp(timestamp) formats = self._extract_m3u8_formats( capture_data.get('source'), video_id, ext='mp4') @@ -120,13 +104,13 @@ class OpenRecCaptureIE(OpenRecBaseIE): return { 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, + 'title': capture_data.get('title'), + 'thumbnail': capture_data.get('thumbnailUrl'), 'formats': formats, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'upload_date': upload_date, + 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)), + 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str), + 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str), + 'upload_date': unified_strdate(capture_data.get('createdAt')), } @@ -148,6 +132,6 @@ class OpenRecMovieIE(OpenRecBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('https://www.openrec.tv/movie/%s' % video_id, video_id) + webpage = self._download_webpage(f'https://www.openrec.tv/movie/{video_id}', video_id) return self._extract_movie(webpage, video_id, 'movie', False) diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py new file mode 100644 index 000000000..d458dfe50 --- /dev/null +++ b/yt_dlp/extractor/panopto.py @@ -0,0 +1,445 @@ +import re +import calendar +import json +import functools +from datetime import datetime +from random import random + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlparse, + compat_urlparse +) + +from ..utils import ( + bug_reports_message, + ExtractorError, + get_first, + int_or_none, + OnDemandPagedList, + parse_qs, + traverse_obj, +) + + +class PanoptoBaseIE(InfoExtractor): + BASE_URL_RE = r'(?P<base_url>https?://[\w.]+\.panopto.(?:com|eu)/Panopto)' + + def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs): + response = self._download_json( + base_url + path, video_id, data=json.dumps(data).encode('utf8') if data else None, + fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs) + if not response: + return + error_code = response.get('ErrorCode') + if error_code == 2: + self.raise_login_required(method='cookies') + elif error_code is not None: + msg = f'Panopto said: {response.get("ErrorMessage")}' + if fatal: + raise ExtractorError(msg, video_id=video_id, expected=True) + else: + self.report_warning(msg, video_id=video_id) + return response + + @staticmethod + def _parse_fragment(url): + return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()} + + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r'<iframe[^>]+src=["\'](?P<url>%s/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)' % PanoptoIE.BASE_URL_RE, + webpage)] + + +class PanoptoIE(PanoptoBaseIE): + _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)' + _TESTS = [ + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', + 'info_dict': { + 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb', + 'title': 'Panopto for Business - Use Cases', + 'timestamp': 1459184200, + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb&mode=Delivery&random=[\d.]+', + 'upload_date': '20160328', + 'ext': 'mp4', + 'cast': [], + 'duration': 88.17099999999999, + 'average_rating': int, + 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e', + 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', + 'channel': 'Showcase Videos' + }, + }, + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', + 'info_dict': { + 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59', + 'title': 'Overcoming Top 4 Challenges of Enterprise Video', + 'uploader': 'Panopto Support', + 'timestamp': 1449409251, + 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59&mode=Delivery&random=[\d.]+', + 'upload_date': '20151206', + 'ext': 'mp4', + 'chapters': 'count:21', + 'cast': ['Panopto Support'], + 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c', + 'average_rating': int, + 'description': 'md5:4391837802b3fc856dadf630c4b375d1', + 'duration': 1088.2659999999998, + 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546', + 'channel': 'Webcasts', + }, + }, + { + # Extra params in URL + 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true', + 'info_dict': { + 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed', + 'ext': 'mp4', + 'duration': 129.513, + 'cast': ['Kathryn Kelly'], + 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56', + 'timestamp': 1569845768, + 'tags': ['Viewer', 'Enterprise'], + 'upload_date': '20190930', + 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=5fa74e93-3d87-4694-b60e-aaa4012214ed&mode=Delivery&random=[\d.]+', + 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f', + 'title': 'Getting Started: View a Video', + 'average_rating': int, + 'uploader': 'Kathryn Kelly', + 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3', + 'channel': 'Getting Started', + } + }, + { + # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream. + 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', + 'info_dict': { + 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4', + 'ext': 'mp4', + 'cast': ['LTS CLI Script'], + 'duration': 2178.45, + 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa', + 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8', + 'average_rating': int, + 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7', + 'uploader': 'LTS CLI Script', + 'timestamp': 1572458134, + 'title': 'WW2 Vets Interview 3 Ronald Stanley George', + 'thumbnail': r're:https://unisa\.au\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4&mode=Delivery&random=[\d.]+', + 'channel': 'World War II Veteran Interviews', + 'upload_date': '20191030', + }, + }, + { + 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb', + 'only_matching': True + }, + { + 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638', + 'only_matching': True + }, + ] + + @classmethod + def suitable(cls, url): + return False if PanoptoPlaylistIE.suitable(url) else super().suitable(url) + + def _mark_watched(self, base_url, video_id, delivery_info): + duration = traverse_obj(delivery_info, ('Delivery', 'Duration'), expected_type=float) + invocation_id = delivery_info.get('InvocationId') + stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str) + if invocation_id and stream_id and duration: + timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/' + data = { + 'streamRequests': [ + { + 'ClientTimeStamp': timestamp_str, + 'ID': 0, + 'InvocationID': invocation_id, + 'PlaybackSpeed': 1, + 'SecondsListened': duration - 1, + 'SecondsRejected': 0, + 'StartPosition': 0, + 'StartReason': 2, + 'StopReason': None, + 'StreamID': stream_id, + 'TimeStamp': timestamp_str, + 'UpdatesRejected': 0 + }, + ]} + + self._download_webpage( + base_url + '/Services/Analytics.svc/AddStreamRequests', video_id, + fatal=False, data=json.dumps(data).encode('utf8'), headers={'content-type': 'application/json'}, + note='Marking watched', errnote='Unable to mark watched') + + @staticmethod + def _extract_chapters(delivery): + chapters = [] + for timestamp in delivery.get('Timestamps', []): + start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration')) + if start is None or duration is None: + continue + chapters.append({ + 'start_time': start, + 'end_time': start + duration, + 'title': timestamp.get('Caption') + }) + return chapters + + def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs): + formats = [] + subtitles = {} + for stream in streams or []: + stream_formats = [] + http_stream_url = stream.get('StreamHttpUrl') + stream_url = stream.get('StreamUrl') + + if http_stream_url: + stream_formats.append({'url': http_stream_url}) + + if stream_url: + media_type = stream.get('ViewerMediaFileTypeName') + if media_type in ('hls', ): + m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id) + stream_formats.extend(m3u8_formats) + subtitles = self._merge_subtitles(subtitles, stream_subtitles) + else: + stream_formats.append({ + 'url': stream_url + }) + for fmt in stream_formats: + fmt.update({ + 'format_note': stream.get('Tag'), + **fmt_kwargs + }) + formats.extend(stream_formats) + + return formats, subtitles + + def _real_extract(self, url): + base_url, video_id = self._match_valid_url(url).group('base_url', 'id') + delivery_info = self._call_api( + base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id, + query={ + 'deliveryId': video_id, + 'invocationId': '', + 'isLiveNotes': 'false', + 'refreshAuthCookie': 'true', + 'isActiveBroadcast': 'false', + 'isEditing': 'false', + 'isKollectiveAgentInstalled': 'false', + 'isEmbed': 'false', + 'responseType': 'json', + } + ) + + delivery = delivery_info['Delivery'] + session_start_time = int_or_none(delivery.get('SessionStartTime')) + + # Podcast stream is usually the combined streams. We will prefer that by default. + podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles( + video_id, delivery.get('PodcastStreams'), format_note='PODCAST') + + streams_formats, streams_subtitles = self._extract_streams_formats_and_subtitles( + video_id, delivery.get('Streams'), preference=-10) + + formats = podcast_formats + streams_formats + subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles) + self._sort_formats(formats) + + self.mark_watched(base_url, video_id, delivery_info) + + return { + 'id': video_id, + 'title': delivery.get('SessionName'), + 'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), default=[], expected_type=lambda x: x or None), + 'timestamp': session_start_time - 11640000000 if session_start_time else None, + 'duration': delivery.get('Duration'), + 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}', + 'average_rating': delivery.get('AverageRating'), + 'chapters': self._extract_chapters(delivery) or None, + 'uploader': delivery.get('OwnerDisplayName') or None, + 'uploader_id': delivery.get('OwnerId'), + 'description': delivery.get('SessionAbstract'), + 'tags': traverse_obj(delivery, ('Tags', ..., 'Content')), + 'channel_id': delivery.get('SessionGroupPublicID'), + 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False), + 'formats': formats, + 'subtitles': subtitles + } + + +class PanoptoPlaylistIE(PanoptoBaseIE): + _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)' + _TESTS = [ + { + 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true', + 'info_dict': { + 'title': 'Featured Video Tutorials', + 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36', + 'description': '', + }, + 'playlist_mincount': 36 + }, + { + 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190', + 'info_dict': { + 'title': 'Library Website Introduction Playlist', + 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190', + 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb', + }, + 'playlist_mincount': 4 + }, + + ] + + def _entries(self, base_url, playlist_id, session_list_id): + session_list_info = self._call_api( + base_url, f'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id) + + items = session_list_info['Items'] + for item in items: + if item.get('TypeName') != 'Session': + self.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once=True) + continue + yield { + '_type': 'url', + 'id': item.get('Id'), + 'url': item.get('ViewerUri'), + 'title': item.get('Name'), + 'description': item.get('Description'), + 'duration': item.get('Duration'), + 'channel': traverse_obj(item, ('Parent', 'Name')), + 'channel_id': traverse_obj(item, ('Parent', 'Id')) + } + + def _real_extract(self, url): + base_url, playlist_id = self._match_valid_url(url).group('base_url', 'id') + + video_id = get_first(parse_qs(url), 'id') + if video_id: + if self.get_param('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id) + else: + self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') + + playlist_info = self._call_api(base_url, f'/Api/Playlists/{playlist_id}', playlist_id) + return self.playlist_result( + self._entries(base_url, playlist_id, playlist_info['SessionListId']), + playlist_id=playlist_id, playlist_title=playlist_info.get('Name'), + playlist_description=playlist_info.get('Description')) + + +class PanoptoListIE(PanoptoBaseIE): + _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx' + _PAGE_SIZE = 250 + _TESTS = [ + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22', + 'info_dict': { + 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a', + 'title': 'Showcase Videos' + }, + 'playlist_mincount': 140 + + }, + { + 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250', + 'info_dict': { + 'id': 'panopto_list', + 'title': 'panopto_list' + }, + 'playlist_mincount': 300 + }, + { + # Folder that contains 8 folders and a playlist + 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22', + 'info_dict': { + 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e', + 'title': 'Video Tutorials' + }, + 'playlist_mincount': 9 + } + + ] + + def _fetch_page(self, base_url, query_params, display_id, page): + + params = { + 'sortColumn': 1, + 'getFolderData': True, + 'includePlaylists': True, + **query_params, + 'page': page, + 'maxResults': self._PAGE_SIZE, + } + + response = self._call_api( + base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page+1}', + data={'queryParameters': params}, fatal=False) + + for result in get_first(response, 'Results', default=[]): + # This could be a video, playlist (or maybe something else) + item_id = result.get('DeliveryID') + yield { + '_type': 'url', + 'id': item_id, + 'title': result.get('SessionName'), + 'url': traverse_obj(result, 'ViewerUrl', 'EmbedUrl', get_all=False) or (base_url + f'/Pages/Viewer.aspx?id={item_id}'), + 'duration': result.get('Duration'), + 'channel': result.get('FolderName'), + 'channel_id': result.get('FolderID'), + } + + for folder in get_first(response, 'Subfolders', default=[]): + folder_id = folder.get('ID') + yield self.url_result( + base_url + f'/Pages/Sessions/List.aspx#folderID="{folder_id}"', + ie_key=PanoptoListIE.ie_key(), video_id=folder_id, title=folder.get('Name')) + + def _extract_folder_metadata(self, base_url, folder_id): + response = self._call_api( + base_url, '/Services/Data.svc/GetFolderInfo', folder_id, + data={'folderID': folder_id}, fatal=False) + return { + 'title': get_first(response, 'Name', default=[]) + } + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + base_url = mobj.group('base_url') + + query_params = self._parse_fragment(url) + folder_id, display_id = query_params.get('folderID'), 'panopto_list' + + if query_params.get('isSubscriptionsPage'): + display_id = 'subscriptions' + if not query_params.get('subscribableTypes'): + query_params['subscribableTypes'] = [0, 1, 2] + elif query_params.get('isSharedWithMe'): + display_id = 'sharedwithme' + elif folder_id: + display_id = folder_id + + query = query_params.get('query') + if query: + display_id += f': query "{query}"' + + info = { + '_type': 'playlist', + 'id': display_id, + 'title': display_id, + } + if folder_id: + info.update(self._extract_folder_metadata(base_url, folder_id)) + + info['entries'] = OnDemandPagedList( + functools.partial(self._fetch_page, base_url, query_params, display_id), self._PAGE_SIZE) + + return info diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index e0b2ab982..9d6b82178 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -87,6 +87,7 @@ class PeerTubeIE(InfoExtractor): maindreieck-tv\.de| mani\.tube| manicphase\.me| + media\.fsfe\.org| media\.gzevd\.de| media\.inno3\.cricket| media\.kaitaia\.life| diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index b93a02b7d..1a292b8ac 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -33,7 +33,7 @@ class PeriscopeBaseIE(InfoExtractor): return { 'id': broadcast.get('id') or video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'timestamp': parse_iso8601(broadcast.get('created_at')), 'uploader': uploader, 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index 402b574a7..b411390e2 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( @@ -138,3 +139,42 @@ class PokemonWatchIE(InfoExtractor): 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episode')), }) + + +class PokemonSoundLibraryIE(InfoExtractor): + _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp' + + _TESTS = [{ + 'url': 'https://soundlibrary.pokemon.co.jp/', + 'info_dict': { + 'title': 'Pokémon Diamond and Pearl Sound Tracks', + }, + 'playlist_mincount': 149, + }] + + def _real_extract(self, url): + musicbox_webpage = self._download_webpage( + 'https://soundlibrary.pokemon.co.jp/musicbox', None, + 'Downloading list of songs') + song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)] + song_titles = song_titles[4::2] + + # each songs don't have permalink; instead we return all songs at once + song_entries = [{ + 'id': f'pokemon-soundlibrary-{song_id}', + 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav', + # note: the server always serves MP3 files, despite its extension of the URL above + 'ext': 'mp3', + 'acodec': 'mp3', + 'vcodec': 'none', + 'title': song_title, + 'track': song_title, + 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.', + 'uploader': 'Pokémon', + 'release_year': 2006, + 'release_date': '20060928', + 'track_number': song_id, + 'album': 'Pokémon Diamond and Pearl', + } for song_id, song_title in enumerate(song_titles, 1)] + + return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks') diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 79a5b2336..0fd65db4b 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -100,7 +100,7 @@ class RokfinIE(InfoExtractor): video_url, video_id, fatal=False, live=live_status == 'is_live') if not formats: - if metadata.get('premiumPlan'): + if traverse_obj(metadata, 'premiumPlan', 'premium'): self.raise_login_required('This video is only available to premium users', True, method='cookies') elif scheduled: self.raise_no_formats( @@ -129,7 +129,7 @@ class RokfinIE(InfoExtractor): 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none), 'live_status': live_status, 'availability': self._availability( - needs_premium=bool(metadata.get('premiumPlan')), + needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')), is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False), # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None, diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 8146b3ef5..64b8a71b6 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -59,8 +59,16 @@ class SoundcloudEmbedIE(InfoExtractor): class SoundcloudBaseIE(InfoExtractor): + _NETRC_MACHINE = 'soundcloud' + _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' + _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' + _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' + _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s' + _access_token = None + _HEADERS = {} def _store_client_id(self, client_id): self._downloader.cache.store('soundcloud', 'client_id', client_id) @@ -103,14 +111,6 @@ class SoundcloudBaseIE(InfoExtractor): self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' self._login() - _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' - _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' - _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' - _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s' - _access_token = None - _HEADERS = {} - _NETRC_MACHINE = 'soundcloud' - def _login(self): username, password = self._get_login_info() if username is None: diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index daf1c7450..4bc2263f0 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -67,6 +67,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'series': 'The Witcher', 'season': 'Misc', 'episode_number': 13, + 'episode': 'Episode 13', }, }, { @@ -92,6 +93,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'series': 'Arma 3', 'season': 'Zeus Games', 'episode_number': 3, + 'episode': 'Episode 3', }, }, ] diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 620973a9f..56cc2dcc6 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, HEADRequest, + get_first, int_or_none, join_nonempty, LazyList, @@ -816,8 +817,7 @@ class DouyinIE(TikTokIE): render_data = self._parse_json( render_data_json, video_id, transform_source=compat_urllib_parse_unquote) - return self._parse_aweme_video_web( - traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url) + return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url) class TikTokVMIE(InfoExtractor): diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py new file mode 100644 index 000000000..9832d2398 --- /dev/null +++ b/yt_dlp/extractor/xinpianchang.py @@ -0,0 +1,95 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + update_url_query, + url_or_none, +) + + +class XinpianchangIE(InfoExtractor): + _VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)' + IE_NAME = 'xinpianchang' + IE_DESC = 'xinpianchang.com' + _TESTS = [{ + 'url': 'https://www.xinpianchang.com/a11766551', + 'info_dict': { + 'id': 'a11766551', + 'ext': 'mp4', + 'title': '北京2022冬奥会闭幕式再见短片-冰墩墩下班了', + 'description': 'md5:4a730c10639a82190fabe921c0fa4b87', + 'duration': 151, + 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/', + 'uploader': '正时文创', + 'uploader_id': 10357277, + 'categories': ['宣传片', '国家城市', '广告', '其他'], + 'keywords': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火'] + }, + }, { + 'url': 'https://www.xinpianchang.com/a11762904', + 'info_dict': { + 'id': 'a11762904', + 'ext': 'mp4', + 'title': '冬奥会决胜时刻《法国派出三只鸡?》', + 'description': 'md5:55cb139ef8f48f0c877932d1f196df8b', + 'duration': 136, + 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/', + 'uploader': '精品动画', + 'uploader_id': 10858927, + 'categories': ['动画', '三维CG'], + 'keywords': ['France Télévisions', '法国3台', '蠢萌', '冬奥会'] + }, + }, { + 'url': 'https://www.xinpianchang.com/a11779743?from=IndexPick&part=%E7%BC%96%E8%BE%91%E7%B2%BE%E9%80%89&index=2', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id=video_id) + domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage) + vid = self.find_value_with_regex(var='vid', webpage=webpage) + app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage) + api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key}) + data = self._download_json(api, video_id=video_id)['data'] + formats, subtitles = [], {} + for k, v in data.get('resource').items(): + if k in ('dash', 'hls'): + v_url = v.get('url') + if not v_url: + continue + if k == 'dash': + fmts, subs = self._extract_mpd_formats_and_subtitles(v_url, video_id=video_id) + elif k == 'hls': + fmts, subs = self._extract_m3u8_formats_and_subtitles(v_url, video_id=video_id) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + elif k == 'progressive': + formats.extend([{ + 'url': url_or_none(prog.get('url')), + 'width': int_or_none(prog.get('width')), + 'height': int_or_none(prog.get('height')), + 'ext': 'mp4', + } for prog in v if prog.get('url') or []]) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': data.get('title'), + 'description': data.get('description'), + 'duration': int_or_none(data.get('duration')), + 'categories': data.get('categories'), + 'keywords': data.get('keywords'), + 'thumbnail': data.get('cover'), + 'uploader': try_get(data, lambda x: x['owner']['username']), + 'uploader_id': try_get(data, lambda x: x['owner']['id']), + 'formats': formats, + 'subtitles': subtitles, + } + + def find_value_with_regex(self, var, webpage): + return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index ee0277fd7..66bb8d9f0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -39,6 +39,7 @@ from ..utils import ( ExtractorError, float_or_none, format_field, + get_first, int_or_none, is_html, join_nonempty, @@ -72,10 +73,6 @@ from ..utils import ( ) -def get_first(obj, keys, **kwargs): - return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -2081,7 +2078,93 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 0, 'channel_follower_count': int }, 'params': {'format': 'mhtml', 'skip_download': True} - } + }, { + # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) + 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', + 'info_dict': { + 'id': '2NUZ8W2llS4', + 'ext': 'mp4', + 'title': 'The NP that test your phone performance 🙂', + 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', + 'uploader': 'Leon Nguyen', + 'uploader_id': 'VNSXIII', + 'uploader_url': 'http://www.youtube.com/user/VNSXIII', + 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', + 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', + 'duration': 21, + 'view_count': int, + 'age_limit': 0, + 'categories': ['Gaming'], + 'tags': 'count:23', + 'playable_in_embed': True, + 'live_status': 'not_live', + 'upload_date': '20220103', + 'like_count': int, + 'availability': 'public', + 'channel': 'Leon Nguyen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', + 'channel_follower_count': int + } + }, { + # date text is premiered video, ensure upload date in UTC (published 1641172509) + 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', + 'info_dict': { + 'id': 'mzZzzBU6lrM', + 'ext': 'mp4', + 'title': 'I Met GeorgeNotFound In Real Life...', + 'description': 'md5:cca98a355c7184e750f711f3a1b22c84', + 'uploader': 'Quackity', + 'uploader_id': 'QuackityHQ', + 'uploader_url': 'http://www.youtube.com/user/QuackityHQ', + 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q', + 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q', + 'duration': 955, + 'view_count': int, + 'age_limit': 0, + 'categories': ['Entertainment'], + 'tags': 'count:26', + 'playable_in_embed': True, + 'live_status': 'not_live', + 'release_timestamp': 1641172509, + 'release_date': '20220103', + 'upload_date': '20220103', + 'like_count': int, + 'availability': 'public', + 'channel': 'Quackity', + 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg', + 'channel_follower_count': int + } + }, + { # continuous livestream. Microformat upload date should be preferred. + # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 + 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + 'info_dict': { + 'id': 'kgx4WGK0oNU', + 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'ext': 'mp4', + 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', + 'availability': 'public', + 'age_limit': 0, + 'release_timestamp': 1637975704, + 'upload_date': '20210619', + 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', + 'live_status': 'is_live', + 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', + 'uploader': '阿鲍Abao', + 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', + 'channel': 'Abao in Tokyo', + 'channel_follower_count': int, + 'release_date': '20211127', + 'tags': 'count:39', + 'categories': ['People & Blogs'], + 'like_count': int, + 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA', + 'view_count': int, + 'playable_in_embed': True, + 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + }, + 'params': {'skip_download': True} + }, ] @classmethod @@ -3008,6 +3091,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Some formats may have much smaller duration than others (possibly damaged during encoding) # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) + if is_damaged: + self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3027,7 +3112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'desc' if language_preference < -1 else ''), 'language_preference': language_preference, - 'preference': -10 if is_damaged else None, + # Strictly de-prioritize damaged and 3gp formats + 'preference': -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -3336,9 +3422,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # URL checking if user don't care about getting the best possible thumbnail 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 'description': video_description, - 'upload_date': unified_strdate( - get_first(microformats, 'uploadDate') - or search_meta('uploadDate')), 'uploader': get_first(video_details, 'author'), 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 'uploader_url': owner_profile_url, @@ -3489,6 +3572,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for content in contents: vpir = content.get('videoPrimaryInfoRenderer') if vpir: + info['upload_date'] = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') stl = vpir.get('superTitleLink') if stl: stl = self._get_text(stl) @@ -3567,6 +3651,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'uploader_id', 'channel_url': 'uploader_url', } + + # The upload date for scheduled and current live streams / premieres in microformats + # is generally the true upload date. Although not in UTC, we will prefer that in this case. + # Note this changes to the published date when the stream/premiere has finished. + # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 + if not info.get('upload_date') or info.get('is_live') or info.get('live_status') == 'is_upcoming': + info['upload_date'] = ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + or info.get('upload_date')) + for to, frm in fallbacks.items(): if not info.get(to): info[to] = info.get(frm) diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index f84ba5cff..419bf30d8 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -9,7 +9,6 @@ from .common import InfoExtractor from ..utils import ( int_or_none, traverse_obj, - HEADRequest, ) @@ -106,18 +105,17 @@ class ZingMp3BaseIE(InfoExtractor): def _real_initialize(self): if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'): - self._request_webpage(HEADRequest(self._DOMAIN), None, note='Updating cookies') + self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}), + None, note='Updating cookies') def _real_extract(self, url): song_id, type_url = self._match_valid_url(url).group('id', 'type') - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id}) - return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url) def get_api_with_signature(self, name_api, param): - sha256 = hashlib.sha256(''.join(f'{k}={v}' for k, v in param.items()).encode('utf-8')).hexdigest() - + param.update({'ctime': '1'}) + sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest() data = { 'apiKey': self._API_KEY, 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(), @@ -149,7 +147,7 @@ class ZingMp3IE(ZingMp3BaseIE): }, }, { 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', - 'md5': 'e9c972b693aa88301ef981c8151c4343', + 'md5': 'c7f23d971ac1a4f675456ed13c9b9612', 'info_dict': { 'id': 'ZO8ZF7C7', 'title': 'Sương Hoa Đưa Lối', @@ -158,6 +156,22 @@ class ZingMp3IE(ZingMp3BaseIE): 'duration': 207, 'track': 'Sương Hoa Đưa Lối', 'artist': 'K-ICM, RYO', + 'album': 'Sương Hoa Đưa Lối (Single)', + 'album_artist': 'K-ICM, RYO', + }, + }, { + 'url': 'https://zingmp3.vn/bai-hat/Nguoi-Yeu-Toi-Lanh-Lung-Sat-Da-Mr-Siro/ZZ6IW7OU.html', + 'md5': '3e9f7a9bd0d965573dbff8d7c68b629d', + 'info_dict': { + 'id': 'ZZ6IW7OU', + 'title': 'Người Yêu Tôi Lạnh Lùng Sắt Đá', + 'ext': 'mp3', + 'thumbnail': r're:^https?://.+\.jpg', + 'duration': 303, + 'track': 'Người Yêu Tôi Lạnh Lùng Sắt Đá', + 'artist': 'Mr. Siro', + 'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)', + 'album_artist': 'Mr. Siro', }, }, { 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false', @@ -184,6 +198,14 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): }, 'playlist_count': 9, }, { + 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html', + 'info_dict': { + '_type': 'playlist', + 'id': 'ZWZAEZZD', + 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro', + }, + 'playlist_count': 49, + }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, }, { diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9f1f31974..9f6b45ec6 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -319,7 +319,7 @@ def create_parser(): general.add_option( '--mark-watched', action='store_true', dest='mark_watched', default=False, - help='Mark videos watched (even with --simulate). Currently only supported for YouTube') + help='Mark videos watched (even with --simulate)') general.add_option( '--no-mark-watched', action='store_false', dest='mark_watched', @@ -1178,7 +1178,7 @@ def create_parser(): help='Do not write video description (default)') filesystem.add_option( '--write-info-json', - action='store_true', dest='writeinfojson', default=False, + action='store_true', dest='writeinfojson', default=None, help='Write video metadata to a .info.json file (this may contain personal information)') filesystem.add_option( '--no-write-info-json', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 907627381..aee84cf5b 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -404,7 +404,7 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') - SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') + SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index 5452b92d8..5bc435da3 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -1,5 +1,4 @@ import re - from enum import Enum from .common import PostProcessor @@ -26,12 +25,17 @@ class MetadataParserPP(PostProcessor): ''' if not isinstance(action, cls.Actions): raise ValueError(f'{action!r} is not a valid action') - getattr(cls, action.value)(cls, *data) + getattr(cls, action.value)(cls, *data) # So this can raise error to validate @staticmethod def field_to_template(tmpl): if re.match(r'[a-zA-Z_]+$', tmpl): return f'%({tmpl})s' + + from ..YoutubeDL import YoutubeDL + err = YoutubeDL.validate_outtmpl(tmpl) + if err: + raise err return tmpl @staticmethod diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 87463c999..c9b57c2f0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -47,6 +47,7 @@ from .compat import ( compat_HTMLParser, compat_HTTPError, compat_basestring, + compat_brotli, compat_chr, compat_cookiejar, compat_ctypes_WINFUNCTYPE, @@ -143,10 +144,16 @@ def random_user_agent(): return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) +SUPPORTED_ENCODINGS = [ + 'gzip', 'deflate' +] +if compat_brotli: + SUPPORTED_ENCODINGS.append('br') + std_headers = { 'User-Agent': random_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': 'gzip, deflate', + 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS), 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate', } @@ -1022,8 +1029,8 @@ def make_HTTPS_handler(params, **kwargs): def bug_reports_message(before=';'): msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , ' - 'filling out the "Broken site" issue template properly. ' - 'Confirm you are on the latest version using -U') + 'filling out the appropriate issue template. ' + 'Confirm you are on the latest version using yt-dlp -U') before = before.rstrip() if not before or before.endswith(('.', '!', '?')): @@ -1076,9 +1083,10 @@ class ExtractorError(YoutubeDLError): '' if expected else bug_reports_message()))) def format_traceback(self): - if self.traceback is None: - return None - return ''.join(traceback.format_tb(self.traceback)) + return join_nonempty( + self.traceback and ''.join(traceback.format_tb(self.traceback)), + self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]), + delim='\n') or None class UnsupportedError(ExtractorError): @@ -1356,6 +1364,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): except zlib.error: return zlib.decompress(data) + @staticmethod + def brotli(data): + if not data: + return data + return compat_brotli.decompress(data) + def http_request(self, req): # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not # always respected by websites, some tend to give out URLs with non percent-encoded @@ -1416,6 +1430,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) resp.msg = old_resp.msg del resp.headers['Content-encoding'] + # brotli + if resp.headers.get('Content-encoding', '') == 'br': + resp = compat_urllib_request.addinfourl( + io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code) + resp.msg = old_resp.msg + del resp.headers['Content-encoding'] # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see # https://github.com/ytdl-org/youtube-dl/issues/6457). if 300 <= resp.code < 400: @@ -3485,7 +3505,7 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): extra_gap += 1 if delim: table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data - table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter + table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter for row in table: for pos, text in enumerate(map(str, row)): if '\t' in text: @@ -3583,6 +3603,9 @@ def match_str(filter_str, dct, incomplete=False): def match_filter_func(filter_str): + if filter_str is None: + return None + def _match_func(info_dict, *args, **kwargs): if match_str(filter_str, info_dict, *args, **kwargs): return None @@ -5195,6 +5218,10 @@ def traverse_dict(dictn, keys, casesense=True): return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True) +def get_first(obj, keys, **kwargs): + return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) + + def variadic(x, allowed_types=(str, bytes, dict)): return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,) @@ -5461,5 +5488,5 @@ has_websockets = bool(compat_websockets) def merge_headers(*dicts): - """Merge dicts of network headers case insensitively, prioritizing the latter ones""" - return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} + """Merge dicts of http headers case insensitively, prioritizing the latter ones""" + return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 01e1b2345..d5df2af90 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,5 +1,5 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2022.02.04' +__version__ = '2022.03.08.1' -RELEASE_GIT_HEAD = 'c1653e9ef' +RELEASE_GIT_HEAD = 'c0c2c57d3' |