aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--CONTRIBUTORS24
-rw-r--r--Changelog.md133
-rw-r--r--MANIFEST.in1
-rw-r--r--devscripts/prepare_manpage.py6
-rw-r--r--pyinst.py2
-rw-r--r--requirements.txt2
-rw-r--r--setup.py2
-rw-r--r--supportedsites.md49
-rw-r--r--yt_dlp/YoutubeDL.py94
-rw-r--r--yt_dlp/__init__.py802
-rw-r--r--yt_dlp/compat.py8
-rw-r--r--yt_dlp/downloader/common.py6
-rw-r--r--yt_dlp/downloader/youtube_live_chat.py3
-rw-r--r--yt_dlp/extractor/abematv.py16
-rw-r--r--yt_dlp/extractor/adobepass.py51
-rw-r--r--yt_dlp/extractor/ant1newsgr.py4
-rw-r--r--yt_dlp/extractor/ard.py19
-rw-r--r--yt_dlp/extractor/ccma.py13
-rw-r--r--yt_dlp/extractor/common.py61
-rw-r--r--yt_dlp/extractor/extractors.py9
-rw-r--r--yt_dlp/extractor/facebook.py9
-rw-r--r--yt_dlp/extractor/fptplay.py102
-rw-r--r--yt_dlp/extractor/frontendmasters.py4
-rw-r--r--yt_dlp/extractor/generic.py13
-rw-r--r--yt_dlp/extractor/mgtv.py59
-rw-r--r--yt_dlp/extractor/mildom.py294
-rw-r--r--yt_dlp/extractor/nrk.py11
-rw-r--r--yt_dlp/extractor/openrec.py68
-rw-r--r--yt_dlp/extractor/panopto.py445
-rw-r--r--yt_dlp/extractor/peertube.py1
-rw-r--r--yt_dlp/extractor/periscope.py2
-rw-r--r--yt_dlp/extractor/pokemon.py40
-rw-r--r--yt_dlp/extractor/rokfin.py4
-rw-r--r--yt_dlp/extractor/soundcloud.py16
-rw-r--r--yt_dlp/extractor/sovietscloset.py2
-rw-r--r--yt_dlp/extractor/tiktok.py4
-rw-r--r--yt_dlp/extractor/xinpianchang.py95
-rw-r--r--yt_dlp/extractor/youtube.py113
-rw-r--r--yt_dlp/extractor/zingmp3.py36
-rw-r--r--yt_dlp/options.py4
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py2
-rw-r--r--yt_dlp/postprocessor/metadataparser.py8
-rw-r--r--yt_dlp/utils.py45
-rw-r--r--yt_dlp/version.py4
45 files changed, 1922 insertions, 766 deletions
diff --git a/.gitignore b/.gitignore
index fb09c3d6d..31fdc484b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ cookies
*.3gp
*.ape
+*.ass
*.avi
*.desktop
*.flac
@@ -97,6 +98,7 @@ yt-dlp.zip
*.iml
.vscode
*.sublime-*
+*.code-workspace
# Lazy extractors
*/extractor/lazy_extractors.py
# Plugins
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index fd93e7df3..8d62c04fb 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -146,7 +146,7 @@ chio0hai
cntrl-s
Deer-Spangle
DEvmIb
-Grabien
+Grabien/MaximVol
j54vc1bk
mpeter50
mrpapersonic
@@ -160,7 +160,7 @@ PilzAdam
zmousm
iw0nderhow
unit193
-TwoThousandHedgehogs
+TwoThousandHedgehogs/KathrynElrod
Jertzukka
cypheron
Hyeeji
@@ -194,3 +194,23 @@ KiberInfinity
tejing1
Bricio
lazypete365
+Aniruddh-J
+blackgear
+CplPwnies
+cyberfox1691
+FestplattenSchnitzel
+hatienl0i261299
+iphoting
+jakeogh
+lukasfink1
+lyz-code
+marieell
+mdpauley
+Mipsters
+mxmehl
+ofkz
+P-reducible
+pycabbage
+regarten
+Ronnnny
+schn0sch
diff --git a/Changelog.md b/Changelog.md
index 0a76f65be..fe6f8a0ac 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -11,6 +11,139 @@
-->
+### 2022.03.08.1
+
+* [cleanup] Refactor `__init__.py`
+* [build] Fix bug
+
+### 2022.03.08
+
+* Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR)
+* Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1)
+* Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz)
+* Add pre-processor stage `after_filter`
+* Better error message when no `--live-from-start` format
+* Create necessary directories for `--print-to-file`
+* Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore)
+* Fix `-all` for `--sub-langs`
+* Fix doubling of `video_id` in `ExtractorError`
+* Fix for when stdout/stderr encoding is `None`
+* Handle negative duration from extractor
+* Implement `--add-header` without modifying `std_headers`
+* Obey `--abort-on-error` for "ffmpeg not installed"
+* Set `webpage_url_...` from `webpage_url` and not input URL
+* Tolerate failure to `--write-link` due to unknown URL
+* [aria2c] Add `--http-accept-gzip=true`
+* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev)
+* [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley)
+* [devscripts] Improve `prepare_manpage`
+* [downloader] Do not use aria2c for non-native `m3u8`
+* [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
+* [extractor] Allow `http_headers` to be specified for `thumbnails`
+* [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz)
+* [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz)
+* [FFmpegConcat] Abort on `--simulate`
+* [FormatSort] Consider `acodec`=`ogg` as `vorbis`
+* [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore)
+* [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore)
+* [generic] Pass referer to extracted formats
+* [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio)
+* [options] Better ambiguous option resolution
+* [options] Rename `--clean-infojson` to `--clean-info-json`
+* [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari)
+* [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari)
+* [utils] Better traceback for `ExtractorError`
+* [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh)
+* [utils] Improve file locking
+* [utils] OnDemandPagedList: Do not download pages after error
+* [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh)
+* [utils] Validate `DateRange` input
+* [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore)
+* [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos
+* [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell)
+* [cleanup, docs] Misc cleanup
+* [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies)
+* [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm)
+* [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [Caltrans] Add extractor by [Bricio](https://github.com/Bricio)
+* [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691)
+* [nfb] Add extractor by [ofkz](https://github.com/ofkz)
+* [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore)
+* [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch)
+* [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore)
+* [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow)
+* [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny)
+* [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi)
+* [ard] Fix valid URL
+* [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell)
+* [bandcamp] Detect acodec
+* [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code)
+* [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8)
+* [beeg] Fix extractor by [Bricio](https://github.com/Bricio)
+* [bigo] Fix extractor to not to use `form_params`
+* [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear)
+* [Biqle] Fix extractor by [Bricio](https://github.com/Bricio)
+* [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h)
+* [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1)
+* [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien)
+* [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de)
+* [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t)
+* [Gettr] Fix formats order by [i6t](https://github.com/i6t)
+* [Gettr] Improve extractor by [i6t](https://github.com/i6t)
+* [globo] Expand valid URL by [Bricio](https://github.com/Bricio)
+* [lbry] Fix `--ignore-no-formats-error`
+* [manyvids] Extract `uploader` by [regarten](https://github.com/regarten)
+* [mildom] Fix linter
+* [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore)
+* [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore)
+* [niconico:tag] Add support for searching tags
+* [nrk] Add fallback API
+* [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch)
+* [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl)
+* [rtvs] Fix extractor by [Bricio](https://github.com/Bricio)
+* [spiegel] Fix `_VALID_URL`
+* [ThumbnailsConvertor] Support `webp`
+* [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs
+* [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis)
+* [tumblr] Fix extractor by [foghawk](https://github.com/foghawk)
+* [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore)
+* [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitch] Fix field name of `view_count`
+* [twitter] Fix for private videos by [iphoting](https://github.com/iphoting)
+* [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio)
+* [youtube:tab] Add `approximate_date` extractor-arg
+* [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Reject webpage data if redirected to home page
+* [youtube] De-prioritize potentially damaged formats
+* [youtube] Differentiate descriptive audio by language code
+* [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz)
+* [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore)
+* [youtube] Fix automatic captions
+* [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [youtube] Further de-prioritize 3gp format
+* [youtube] Label original auto-subs
+* [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz)
+* [zaq1] Remove dead extractor by [marieell](https://github.com/marieell)
+* [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J)
+* [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters)
+
+
### 2022.02.04
* [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz)
diff --git a/MANIFEST.in b/MANIFEST.in
index 38d83a9a5..bc2f056c0 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -5,5 +5,6 @@ include README.md
include completions/*/*
include supportedsites.md
include yt-dlp.1
+include requirements.txt
recursive-include devscripts *
recursive-include test *
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 10b0aec3e..b763d2d9a 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -75,7 +75,11 @@ def filter_options(readme):
section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
options = '# OPTIONS\n'
for line in section.split('\n')[1:]:
- mobj = re.fullmatch(r'\s{4}(?P<opt>-(?:,\s|[^\s])+)(?:\s(?P<meta>([^\s]|\s(?!\s))+))?(\s{2,}(?P<desc>.+))?', line)
+ mobj = re.fullmatch(r'''(?x)
+ \s{4}(?P<opt>-(?:,\s|[^\s])+)
+ (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
+ (\s{2,}(?P<desc>.+))?
+ ''', line)
if not mobj:
options += f'{line.lstrip()}\n'
continue
diff --git a/pyinst.py b/pyinst.py
index a7b2073dc..7b336aa9e 100644
--- a/pyinst.py
+++ b/pyinst.py
@@ -74,7 +74,7 @@ def version_to_list(version):
def dependency_options():
- dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets')
+ dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets')
excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
yield from (f'--hidden-import={module}' for module in dependencies)
diff --git a/requirements.txt b/requirements.txt
index 6a982fa36..7818aca78 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
mutagen
pycryptodome
websockets
+brotli; platform_python_implementation=='CPython'
+brotlicffi; platform_python_implementation!='CPython' \ No newline at end of file
diff --git a/setup.py b/setup.py
index f5f9d7513..9d54943f2 100644
--- a/setup.py
+++ b/setup.py
@@ -21,7 +21,7 @@ DESCRIPTION = 'A youtube-dl fork with additional features and patches'
LONG_DESCRIPTION = '\n\n'.join((
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'**PS**: Some links in this document will not work since this is a copy of the README.md from Github',
- open('README.md', 'r', encoding='utf-8').read()))
+ open('README.md', encoding='utf-8').read()))
REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets']
diff --git a/supportedsites.md b/supportedsites.md
index 7166dc53a..46ad1328d 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -24,6 +24,8 @@
- **abcnews:video**
- **abcotvs**: ABC Owned Television Stations
- **abcotvs:clips**
+ - **AbemaTV**
+ - **AbemaTVTitle**
- **AcademicEarth:Course**
- **acast**
- **acast:channel**
@@ -45,6 +47,8 @@
- **AlJazeera**
- **Allocine**
- **AlphaPorno**
+ - **Alsace20TV**
+ - **Alsace20TVEmbed**
- **Alura**
- **AluraCourse**
- **Amara**
@@ -58,6 +62,9 @@
- **AnimeLab**
- **AnimeLabShows**
- **AnimeOnDemand**
+ - **ant1newsgr:article**: ant1news.gr articles
+ - **ant1newsgr:embed**: ant1news.gr embedded videos
+ - **ant1newsgr:watch**: ant1news.gr videos
- **Anvato**
- **aol.com**: Yahoo screen and movies
- **APA**
@@ -75,6 +82,7 @@
- **Arkena**
- **arte.sky.it**
- **ArteTV**
+ - **ArteTVCategory**
- **ArteTVEmbed**
- **ArteTVPlaylist**
- **AsianCrush**
@@ -99,8 +107,8 @@
- **bandaichannel**
- **Bandcamp**
- **Bandcamp:album**
+ - **Bandcamp:user**
- **Bandcamp:weekly**
- - **BandcampMusic**
- **bangumi.bilibili.com**: BiliBili番剧
- **BannedVideo**
- **bbc**: BBC
@@ -122,6 +130,7 @@
- **bfmtv:live**
- **BibelTV**
- **Bigflix**
+ - **Bigo**
- **Bild**: Bild.de
- **BiliBili**
- **Bilibili category extractor**
@@ -163,6 +172,7 @@
- **BYUtv**
- **CableAV**
- **Callin**
+ - **Caltrans**
- **CAM4**
- **Camdemy**
- **CamdemyFolder**
@@ -231,6 +241,8 @@
- **Coub**
- **CozyTV**
- **cp24**
+ - **cpac**
+ - **cpac:playlist**
- **Cracked**
- **Crackle**
- **CrooksAndLiars**
@@ -241,6 +253,7 @@
- **crunchyroll:playlist**
- **crunchyroll:playlist:beta**
- **CSpan**: C-SPAN
+ - **CSpanCongress**
- **CtsNews**: 華視新聞
- **CTV**
- **CTVNews**
@@ -262,6 +275,7 @@
- **daum.net:clip**
- **daum.net:playlist**
- **daum.net:user**
+ - **daystar:clip**
- **DBTV**
- **DctpTv**
- **DeezerAlbum**
@@ -353,6 +367,7 @@
- **faz.net**
- **fc2**
- **fc2:embed**
+ - **fc2:live**
- **Fczenit**
- **Filmmodu**
- **filmon**
@@ -372,6 +387,7 @@
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
- **FoxSports**
+ - **fptplay**: fptplay.vn
- **FranceCulture**
- **FranceInter**
- **FranceTV**
@@ -410,6 +426,7 @@
- **gem.cbc.ca:playlist**
- **generic**: Generic downloader that works on some sites
- **Gettr**
+ - **GettrStreaming**
- **Gfycat**
- **GiantBomb**
- **Giga**
@@ -622,8 +639,9 @@
- **MiaoPai**
- **microsoftstream**: Microsoft Stream
- **mildom**: Record ongoing live by specific user in Mildom
+ - **mildom:clip**: Clip in Mildom
- **mildom:user:vod**: Download all VODs from specific user in Mildom
- - **mildom:vod**: Download a VOD in Mildom
+ - **mildom:vod**: VOD in Mildom
- **minds**
- **minds:channel**
- **minds:group**
@@ -666,6 +684,8 @@
- **mtvservices:embedded**
- **MTVUutisetArticle**
- **MuenchenTV**: münchen.tv
+ - **Murrtube**
+ - **MurrtubeUser**: Murrtube user profile
- **MuseScore**
- **MusicdexAlbum**
- **MusicdexArtist**
@@ -734,9 +754,13 @@
- **NextTV**: 壹電視
- **Nexx**
- **NexxEmbed**
+ - **NFB**
- **NFHSNetwork**
- **nfl.com** (Currently broken)
- **nfl.com:article** (Currently broken)
+ - **NhkForSchoolBangumi**
+ - **NhkForSchoolProgramList**
+ - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学)
- **NhkVod**
- **NhkVodProgram**
- **nhl.com**
@@ -746,7 +770,10 @@
- **nickelodeonru**
- **nicknight**
- **niconico**: ニコニコ動画
- - **NiconicoPlaylist**
+ - **niconico:history**: NicoNico user history. Requires cookies.
+ - **niconico:playlist**
+ - **niconico:series**
+ - **niconico:tag**: NicoNico video tag URLs
- **NiconicoUser**
- **nicovideo:search**: Nico video search; "nicosearch:" prefix
- **nicovideo:search:date**: Nico video search, newest first; "nicosearchdate:" prefix
@@ -845,6 +872,7 @@
- **PatreonUser**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **PearVideo**
+ - **PeekVids**
- **peer.tv**
- **PeerTube**
- **PeerTube:Playlist**
@@ -857,6 +885,7 @@
- **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de**
- **Photobucket**
+ - **Piapro**
- **Picarto**
- **PicartoVod**
- **Piksel**
@@ -876,6 +905,7 @@
- **PlaysTV**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
+ - **PlayVids**
- **Playwire**
- **pluralsight**
- **pluralsight:course**
@@ -980,6 +1010,9 @@
- **RICE**
- **RMCDecouverte**
- **RockstarGames**
+ - **Rokfin**
+ - **rokfin:channel**
+ - **rokfin:stack**
- **RoosterTeeth**
- **RoosterTeethSeries**
- **RottenTomatoes**
@@ -1019,6 +1052,7 @@
- **RUTV**: RUTV.RU
- **Ruutu**
- **Ruv**
+ - **ruv.is:spila**
- **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses
@@ -1158,6 +1192,7 @@
- **TeleBruxelles**
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
- **Telegraaf**
+ - **telegram:embed**
- **TeleMB**
- **Telemundo**
- **TeleQuebec**
@@ -1319,6 +1354,8 @@
- **video.google:search**: Google Video search; "gvsearch:" prefix
- **video.sky.it**
- **video.sky.it:live**
+ - **VideocampusSachsen**
+ - **VideocampusSachsenEmbed**
- **VideoDetective**
- **videofy.me**
- **videomore**
@@ -1361,6 +1398,7 @@
- **vlive**
- **vlive:channel**
- **vlive:post**
+ - **vm.tiktok**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
@@ -1395,7 +1433,7 @@
- **WatchBox**
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- - **wdr:mobile**
+ - **wdr:mobile** (Currently broken)
- **WDRElefant**
- **WDRPage**
- **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix
@@ -1430,6 +1468,7 @@
- **xiami:song**: 虾米音乐
- **ximalaya**: 喜马拉雅FM
- **ximalaya:album**: 喜马拉雅FM 专辑
+ - **xinpianchang**: xinpianchang.com
- **XMinus**
- **XNXX**
- **Xstream**
@@ -1488,7 +1527,7 @@
- **ZenYandex**
- **ZenYandexChannel**
- **Zhihu**
- - **zingmp3**: mp3.zing.vn
+ - **zingmp3**: zingmp3.vn
- **zingmp3:album**
- **zoom**
- **Zype**
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 816c40329..150764629 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -32,6 +32,7 @@ from string import ascii_letters
from .compat import (
compat_basestring,
+ compat_brotli,
compat_get_terminal_size,
compat_kwargs,
compat_numeric_types,
@@ -233,6 +234,8 @@ class YoutubeDL(object):
See "Sorting Formats" for more details.
format_sort_force: Force the given format_sort. see "Sorting Formats"
for more details.
+ prefer_free_formats: Whether to prefer video formats with free containers
+ over non-free ones of same quality.
allow_multiple_video_streams: Allow multiple video streams to be merged
into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged
@@ -514,17 +517,6 @@ class YoutubeDL(object):
'storyboards': {'mhtml'},
}
- params = None
- _ies = {}
- _pps = {k: [] for k in POSTPROCESS_WHEN}
- _printed_messages = set()
- _first_webpage_request = True
- _download_retcode = None
- _num_downloads = None
- _playlist_level = 0
- _playlist_urls = set()
- _screen_file = None
-
def __init__(self, params=None, auto_init=True):
"""Create a FileDownloader object with the given options.
@param auto_init Whether to load the default extractors and print header (if verbose).
@@ -532,6 +524,7 @@ class YoutubeDL(object):
"""
if params is None:
params = {}
+ self.params = params
self._ies = {}
self._ies_instances = {}
self._pps = {k: [] for k in POSTPROCESS_WHEN}
@@ -543,15 +536,21 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._num_videos = 0
- self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
- self._err_file = sys.stderr
- self.params = params
+ self._playlist_level = 0
+ self._playlist_urls = set()
self.cache = Cache(self)
windows_enable_vt_mode()
+ self._out_files = {
+ 'error': sys.stderr,
+ 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
+ 'console': None if compat_os_name == 'nt' else next(
+ filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
+ }
+ self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
self._allow_colors = {
- 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
- 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
+ type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
+ for type_ in ('screen', 'error')
}
if sys.version_info < (3, 6):
@@ -616,7 +615,7 @@ class YoutubeDL(object):
sp_kwargs = dict(
stdin=subprocess.PIPE,
stdout=slave,
- stderr=self._err_file)
+ stderr=self._out_files['error'])
try:
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
except OSError:
@@ -784,14 +783,24 @@ class YoutubeDL(object):
self._printed_messages.add(message)
write_string(message, out=out, encoding=self.params.get('encoding'))
- def to_stdout(self, message, skip_eol=False, quiet=False):
+ def to_stdout(self, message, skip_eol=False, quiet=None):
"""Print message to stdout"""
+ if quiet is not None:
+ self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead')
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._out_files['print'])
+
+ def to_screen(self, message, skip_eol=False, quiet=None):
+ """Print message to screen if not in quiet mode"""
if self.params.get('logger'):
self.params['logger'].debug(message)
- elif not quiet or self.params.get('verbose'):
- self._write_string(
- '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._err_file if quiet else self._screen_file)
+ return
+ if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
+ return
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._out_files['screen'])
def to_stderr(self, message, only_once=False):
"""Print message to stderr"""
@@ -799,7 +808,12 @@ class YoutubeDL(object):
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
+ self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
+
+ def _send_console_code(self, code):
+ if compat_os_name == 'nt' or not self._out_files['console']:
+ return
+ self._write_string(code, self._out_files['console'])
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -810,26 +824,18 @@ class YoutubeDL(object):
# c_wchar_p() might not be necessary if `message` is
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
- elif 'TERM' in os.environ:
- self._write_string('\033]0;%s\007' % message, self._screen_file)
+ else:
+ self._send_console_code(f'\033]0;{message}\007')
def save_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate'):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Save the title on stack
- self._write_string('\033[22;0t', self._screen_file)
+ self._send_console_code('\033[22;0t') # Save the title on stack
def restore_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate'):
+ if not self.params.get('consoletitle') or self.params.get('simulate'):
return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Restore the title from stack
- self._write_string('\033[23;0t', self._screen_file)
+ self._send_console_code('\033[23;0t') # Restore the title from stack
def __enter__(self):
self.save_console_title()
@@ -875,11 +881,6 @@ class YoutubeDL(object):
raise DownloadError(message, exc_info)
self._download_retcode = 1
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode"""
- self.to_stdout(
- message, skip_eol, quiet=self.params.get('quiet', False))
-
class Styles(Enum):
HEADERS = 'yellow'
EMPHASIS = 'light blue'
@@ -903,11 +904,11 @@ class YoutubeDL(object):
def _format_screen(self, *args, **kwargs):
return self._format_text(
- self._screen_file, self._allow_colors['screen'], *args, **kwargs)
+ self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
def _format_err(self, *args, **kwargs):
return self._format_text(
- self._err_file, self._allow_colors['err'], *args, **kwargs)
+ self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
@@ -2773,7 +2774,7 @@ class YoutubeDL(object):
if info_dict.get('requested_formats') is not None:
# For RTMP URLs, also include the playpath
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
- elif 'url' in info_dict:
+ elif info_dict.get('url'):
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
if (self.params.get('forcejson')
@@ -3600,7 +3601,7 @@ class YoutubeDL(object):
encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- get_encoding(self._screen_file), get_encoding(self._err_file),
+ get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
self.get_encoding())
logger = self.params.get('logger')
@@ -3674,6 +3675,7 @@ class YoutubeDL(object):
from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
lib_str = join_nonempty(
+ compat_brotli and compat_brotli.__name__,
compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
SECRETSTORAGE_AVAILABLE and 'secretstorage',
has_mutagen and 'mutagen',
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 524130807..a0489fcfa 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -11,9 +11,7 @@ import random
import re
import sys
-from .options import (
- parseOpts,
-)
+from .options import parseOpts
from .compat import (
compat_getpass,
compat_os_name,
@@ -26,12 +24,12 @@ from .utils import (
decodeOption,
DownloadCancelled,
DownloadError,
- error_to_compat_str,
expand_path,
- GeoUtils,
float_or_none,
+ GeoUtils,
int_or_none,
match_filter_func,
+ NO_DEFAULT,
parse_duration,
preferredencoding,
read_batch_urls,
@@ -60,59 +58,38 @@ from .postprocessor import (
from .YoutubeDL import YoutubeDL
-def _real_main(argv=None):
- # Compatibility fixes for Windows
- if sys.platform == 'win32':
- # https://github.com/ytdl-org/youtube-dl/issues/820
- codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
-
- workaround_optparse_bug9161()
-
- setproctitle('yt-dlp')
-
- parser, opts, args = parseOpts(argv)
- warnings, deprecation_warnings = [], []
-
- if opts.user_agent is not None:
- opts.headers.setdefault('User-Agent', opts.user_agent)
- if opts.referer is not None:
- opts.headers.setdefault('Referer', opts.referer)
-
- # Dump user agent
- if opts.dump_user_agent:
- ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
- write_string(f'{ua}\n', out=sys.stdout)
- sys.exit(0)
-
+def get_urls(urls, batchfile, verbose):
# Batch file verification
batch_urls = []
- if opts.batchfile is not None:
+ if batchfile is not None:
try:
- if opts.batchfile == '-':
+ if batchfile == '-':
write_string('Reading URLs from stdin - EOF (%s) to end:\n' % (
'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'))
batchfd = sys.stdin
else:
batchfd = io.open(
- expand_path(opts.batchfile),
+ expand_path(batchfile),
'r', encoding='utf-8', errors='ignore')
batch_urls = read_batch_urls(batchfd)
- if opts.verbose:
+ if verbose:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
except IOError:
- sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
- all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
+ sys.exit('ERROR: batch file %s could not be read' % batchfile)
_enc = preferredencoding()
- all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
+ return [
+ url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip()
+ for url in batch_urls + urls]
+
+def print_extractor_information(opts, urls):
if opts.list_extractors:
for ie in list_extractors(opts.age_limit):
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout)
- matchedUrls = [url for url in all_urls if ie.suitable(url)]
+ matchedUrls = [url for url in urls if ie.suitable(url)]
for mu in matchedUrls:
write_string(' ' + mu + '\n', out=sys.stdout)
- sys.exit(0)
- if opts.list_extractor_descriptions:
+ elif opts.list_extractor_descriptions:
for ie in list_extractors(opts.age_limit):
if not ie.working():
continue
@@ -124,184 +101,25 @@ def _real_main(argv=None):
_COUNTS = ('', '5', '10', 'all')
desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")'
write_string(desc + '\n', out=sys.stdout)
- sys.exit(0)
- if opts.ap_list_mso:
+ elif opts.ap_list_mso:
table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
- sys.exit(0)
-
- # Conflicting, missing and erroneous options
- if opts.format == 'best':
- warnings.append('.\n '.join((
- '"-f best" selects the best pre-merged format which is often not the best option',
- 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
- 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
- if opts.exec_cmd.get('before_dl') and opts.exec_before_dl_cmd:
- parser.error('using "--exec-before-download" conflicts with "--exec before_dl:"')
- if opts.usenetrc and (opts.username is not None or opts.password is not None):
- parser.error('using .netrc conflicts with giving username/password')
- if opts.password is not None and opts.username is None:
- parser.error('account username missing\n')
- if opts.ap_password is not None and opts.ap_username is None:
- parser.error('TV Provider account username missing\n')
- if opts.autonumber_size is not None:
- if opts.autonumber_size <= 0:
- parser.error('auto number size must be positive')
- if opts.autonumber_start is not None:
- if opts.autonumber_start < 0:
- parser.error('auto number start must be positive or 0')
- if opts.username is not None and opts.password is None:
- opts.password = compat_getpass('Type account password and press [Return]: ')
- if opts.ap_username is not None and opts.ap_password is None:
- opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
- if opts.ratelimit is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
- if numeric_limit is None:
- parser.error('invalid rate limit specified')
- opts.ratelimit = numeric_limit
- if opts.throttledratelimit is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.throttledratelimit)
- if numeric_limit is None:
- parser.error('invalid rate limit specified')
- opts.throttledratelimit = numeric_limit
- if opts.min_filesize is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
- if numeric_limit is None:
- parser.error('invalid min_filesize specified')
- opts.min_filesize = numeric_limit
- if opts.max_filesize is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
- if numeric_limit is None:
- parser.error('invalid max_filesize specified')
- opts.max_filesize = numeric_limit
- if opts.sleep_interval is not None:
- if opts.sleep_interval < 0:
- parser.error('sleep interval must be positive or 0')
- if opts.max_sleep_interval is not None:
- if opts.max_sleep_interval < 0:
- parser.error('max sleep interval must be positive or 0')
- if opts.sleep_interval is None:
- parser.error('min sleep interval must be specified, use --min-sleep-interval')
- if opts.max_sleep_interval < opts.sleep_interval:
- parser.error('max sleep interval must be greater than or equal to min sleep interval')
else:
- opts.max_sleep_interval = opts.sleep_interval
- if opts.sleep_interval_subtitles is not None:
- if opts.sleep_interval_subtitles < 0:
- parser.error('subtitles sleep interval must be positive or 0')
- if opts.sleep_interval_requests is not None:
- if opts.sleep_interval_requests < 0:
- parser.error('requests sleep interval must be positive or 0')
- if opts.ap_mso and opts.ap_mso not in MSO_INFO:
- parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
- if opts.overwrites: # --yes-overwrites implies --no-continue
- opts.continue_dl = False
- if opts.concurrent_fragment_downloads <= 0:
- parser.error('Concurrent fragments must be positive')
- if opts.wait_for_video is not None:
- min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
- if min_wait is None or (max_wait is None and '-' in opts.wait_for_video):
- parser.error('Invalid time range to wait')
- elif max_wait is not None and max_wait < min_wait:
- parser.error('Minimum time range to wait must not be longer than the maximum')
- opts.wait_for_video = (min_wait, max_wait)
+ return False
+ return True
- def parse_retries(retries, name=''):
- if retries in ('inf', 'infinite'):
- parsed_retries = float('inf')
- else:
- try:
- parsed_retries = int(retries)
- except (TypeError, ValueError):
- parser.error('invalid %sretry count specified' % name)
- return parsed_retries
- if opts.retries is not None:
- opts.retries = parse_retries(opts.retries)
- if opts.file_access_retries is not None:
- opts.file_access_retries = parse_retries(opts.file_access_retries, 'file access ')
- if opts.fragment_retries is not None:
- opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ')
- if opts.extractor_retries is not None:
- opts.extractor_retries = parse_retries(opts.extractor_retries, 'extractor ')
- if opts.buffersize is not None:
- numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
- if numeric_buffersize is None:
- parser.error('invalid buffer size specified')
- opts.buffersize = numeric_buffersize
- if opts.http_chunk_size is not None:
- numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
- if not numeric_chunksize:
- parser.error('invalid http chunk size specified')
- opts.http_chunk_size = numeric_chunksize
- if opts.playliststart <= 0:
- raise parser.error('Playlist start must be positive')
- if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
- raise parser.error('Playlist end must be greater than playlist start')
- if opts.extractaudio:
- opts.audioformat = opts.audioformat.lower()
- if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS):
- parser.error('invalid audio format specified')
- if opts.audioquality:
- opts.audioquality = opts.audioquality.strip('k').strip('K')
- audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan
- if audioquality is None or audioquality < 0:
- parser.error('invalid audio quality specified')
- if opts.recodevideo is not None:
- opts.recodevideo = opts.recodevideo.replace(' ', '')
- if not re.match(FFmpegVideoConvertorPP.FORMAT_RE, opts.recodevideo):
- parser.error('invalid video remux format specified')
- if opts.remuxvideo is not None:
- opts.remuxvideo = opts.remuxvideo.replace(' ', '')
- if not re.match(FFmpegVideoRemuxerPP.FORMAT_RE, opts.remuxvideo):
- parser.error('invalid video remux format specified')
- if opts.convertsubtitles is not None:
- if opts.convertsubtitles not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS:
- parser.error('invalid subtitle format specified')
- if opts.convertthumbnails is not None:
- if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS:
- parser.error('invalid thumbnail format specified')
- if opts.cookiesfrombrowser is not None:
- mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser)
- if mobj is None:
- parser.error(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}')
- browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile')
- browser_name = browser_name.lower()
- if browser_name not in SUPPORTED_BROWSERS:
- parser.error(f'unsupported browser specified for cookies: "{browser_name}". '
- f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}')
- if keyring is not None:
- keyring = keyring.upper()
- if keyring not in SUPPORTED_KEYRINGS:
- parser.error(f'unsupported keyring specified for cookies: "{keyring}". '
- f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
- opts.cookiesfrombrowser = (browser_name, profile, keyring)
- geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
- if geo_bypass_code is not None:
- try:
- GeoUtils.random_ipv4(geo_bypass_code)
- except Exception:
- parser.error('unsupported geo-bypass country or ip-block')
-
- if opts.date is not None:
- date = DateRange.day(opts.date)
- else:
- date = DateRange(opts.dateafter, opts.datebefore)
-
- compat_opts = opts.compat_opts
-
- def report_conflict(arg1, arg2):
- warnings.append(f'{arg2} is ignored since {arg1} was given')
+def set_compat_opts(opts):
def _unused_compat_opt(name):
- if name not in compat_opts:
+ if name not in opts.compat_opts:
return False
- compat_opts.discard(name)
- compat_opts.update(['*%s' % name])
+ opts.compat_opts.discard(name)
+ opts.compat_opts.update(['*%s' % name])
return True
def set_default_compat(compat_name, opt_name, default=True, remove_compat=True):
attr = getattr(opts, opt_name)
- if compat_name in compat_opts:
+ if compat_name in opts.compat_opts:
if attr is None:
setattr(opts, opt_name, not default)
return True
@@ -316,36 +134,140 @@ def _real_main(argv=None):
set_default_compat('abort-on-error', 'ignoreerrors', 'only_download')
set_default_compat('no-playlist-metafiles', 'allow_playlist_files')
set_default_compat('no-clean-infojson', 'clean_infojson')
- if 'no-attach-info-json' in compat_opts:
+ if 'no-attach-info-json' in opts.compat_opts:
if opts.embed_infojson:
_unused_compat_opt('no-attach-info-json')
else:
opts.embed_infojson = False
- if 'format-sort' in compat_opts:
+ if 'format-sort' in opts.compat_opts:
opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default)
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
if _video_multistreams_set is False and _audio_multistreams_set is False:
_unused_compat_opt('multistreams')
- outtmpl_default = opts.outtmpl.get('default')
- if outtmpl_default == '':
- outtmpl_default, opts.skip_download = None, True
- del opts.outtmpl['default']
- if opts.useid:
- if outtmpl_default is None:
- outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s'
- else:
- report_conflict('--output', '--id')
- if 'filename' in compat_opts:
- if outtmpl_default is None:
- outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s'
+ if 'filename' in opts.compat_opts:
+ if opts.outtmpl.get('default') is None:
+ opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'})
else:
_unused_compat_opt('filename')
+
+def validate_options(opts):
+ def validate(cndn, name, value=None, msg=None):
+ if cndn:
+ return True
+ raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value))
+
+ def validate_in(name, value, items, msg=None):
+ return validate(value is None or value in items, name, value, msg)
+
+ def validate_regex(name, value, regex):
+ return validate(value is None or re.match(regex, value), name, value)
+
+ def validate_positive(name, value, strict=False):
+ return validate(value is None or value > 0 or (not strict and value == 0),
+ name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0'))
+
+ def validate_minmax(min_val, max_val, min_name, max_name=None):
+ if max_val is None or min_val is None or max_val >= min_val:
+ return
+ if not max_name:
+ min_name, max_name = f'min {min_name}', f'max {min_name}'
+ raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"')
+
+ # Usernames and passwords
+ validate(not opts.usenetrc or (opts.username is None and opts.password is None),
+ '.netrc', msg='using {name} conflicts with giving username/password')
+ validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing')
+ validate(opts.ap_password is None or opts.ap_username is not None,
+ 'TV Provider account username', msg='{name} missing')
+ validate_in('TV Provider', opts.ap_mso, MSO_INFO,
+ 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers')
+
+ # Numbers
+ validate_positive('autonumber start', opts.autonumber_start)
+ validate_positive('autonumber size', opts.autonumber_size, True)
+ validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True)
+ validate_positive('playlist start', opts.playliststart, True)
+ if opts.playlistend != -1:
+ validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end')
+
+ # Time ranges
+ validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles)
+ validate_positive('requests sleep interval', opts.sleep_interval_requests)
+ validate_positive('sleep interval', opts.sleep_interval)
+ validate_positive('max sleep interval', opts.max_sleep_interval)
+ if opts.sleep_interval is None:
+ validate(
+ opts.max_sleep_interval is None, 'min sleep interval',
+ msg='{name} must be specified; use --min-sleep-interval')
+ elif opts.max_sleep_interval is None:
+ opts.max_sleep_interval = opts.sleep_interval
+ else:
+ validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval')
+
+ if opts.wait_for_video is not None:
+ min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
+ validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video),
+ 'time range to wait for video', opts.wait_for_video)
+ validate_minmax(min_wait, max_wait, 'time range to wait for video')
+ opts.wait_for_video = (min_wait, max_wait)
+
+ # Format sort
+ for f in opts.format_sort:
+ validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
+
+ # Postprocessor formats
+ validate_in('audio format', opts.audioformat, ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS))
+ validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
+ validate_in('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)
+ if opts.recodevideo is not None:
+ opts.recodevideo = opts.recodevideo.replace(' ', '')
+ validate_regex('video recode format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
+ if opts.remuxvideo is not None:
+ opts.remuxvideo = opts.remuxvideo.replace(' ', '')
+ validate_regex('video remux format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
+ if opts.audioquality:
+ opts.audioquality = opts.audioquality.strip('k').strip('K')
+ # int_or_none prevents inf, nan
+ validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0))
+
+ # Retries
+ def parse_retries(name, value):
+ if value is None:
+ return None
+ elif value in ('inf', 'infinite'):
+ return float('inf')
+ try:
+ return int(value)
+ except (TypeError, ValueError):
+ validate(False, f'{name} retry count', value)
+
+ opts.retries = parse_retries('download', opts.retries)
+ opts.fragment_retries = parse_retries('fragment', opts.fragment_retries)
+ opts.extractor_retries = parse_retries('extractor', opts.extractor_retries)
+ opts.file_access_retries = parse_retries('file access', opts.file_access_retries)
+
+ # Bytes
+ def parse_bytes(name, value):
+ if value is None:
+ return None
+ numeric_limit = FileDownloader.parse_bytes(value)
+ validate(numeric_limit is not None, 'rate limit', value)
+ return numeric_limit
+
+ opts.ratelimit = parse_bytes('rate limit', opts.ratelimit)
+ opts.throttledratelimit = parse_bytes('throttled rate limit', opts.throttledratelimit)
+ opts.min_filesize = parse_bytes('min filesize', opts.min_filesize)
+ opts.max_filesize = parse_bytes('max filesize', opts.max_filesize)
+ opts.buffersize = parse_bytes('buffer size', opts.buffersize)
+ opts.http_chunk_size = parse_bytes('http chunk size', opts.http_chunk_size)
+
+ # Output templates
def validate_outtmpl(tmpl, msg):
err = YoutubeDL.validate_outtmpl(tmpl)
if err:
- parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err)))
+ raise ValueError(f'invalid {msg} "{tmpl}": {err}')
for k, tmpl in opts.outtmpl.items():
validate_outtmpl(tmpl, f'{k} output template')
@@ -354,32 +276,62 @@ def _real_main(argv=None):
validate_outtmpl(tmpl, f'{type_} print template')
for type_, tmpl_list in opts.print_to_file.items():
for tmpl, file in tmpl_list:
- validate_outtmpl(tmpl, f'{type_} print-to-file template')
- validate_outtmpl(file, f'{type_} print-to-file filename')
+ validate_outtmpl(tmpl, f'{type_} print to file template')
+ validate_outtmpl(file, f'{type_} print to file filename')
validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title')
for k, tmpl in opts.progress_template.items():
k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress'
validate_outtmpl(tmpl, f'{k} template')
- if opts.extractaudio and not opts.keepvideo and opts.format is None:
- opts.format = 'bestaudio/best'
-
- if outtmpl_default is not None and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio:
- parser.error('Cannot download a video and extract audio into the same'
- ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
- ' template'.format(outtmpl_default))
+ outtmpl_default = opts.outtmpl.get('default')
+ if outtmpl_default == '':
+ opts.skip_download = None
+ del opts.outtmpl['default']
+ if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio:
+ raise ValueError(
+ 'Cannot download a video and extract audio into the same file! '
+ f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
+
+ # Remove chapters
+ remove_chapters_patterns, opts.remove_ranges = [], []
+ for regex in opts.remove_chapters or []:
+ if regex.startswith('*'):
+ dur = list(map(parse_duration, regex[1:].split('-')))
+ if len(dur) == 2 and all(t is not None for t in dur):
+ opts.remove_ranges.append(tuple(dur))
+ continue
+ raise ValueError(f'invalid --remove-chapters time range "{regex}". Must be of the form *start-end')
+ try:
+ remove_chapters_patterns.append(re.compile(regex))
+ except re.error as err:
+ raise ValueError(f'invalid --remove-chapters regex "{regex}" - {err}')
+ opts.remove_chapters = remove_chapters_patterns
- for f in opts.format_sort:
- if re.match(InfoExtractor.FormatSort.regex, f) is None:
- parser.error('invalid format sort string "%s" specified' % f)
+ # Cookies from browser
+ if opts.cookiesfrombrowser:
+ mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser)
+ if mobj is None:
+ raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}')
+ browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile')
+ browser_name = browser_name.lower()
+ if browser_name not in SUPPORTED_BROWSERS:
+ raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". '
+ f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}')
+ if keyring is not None:
+ keyring = keyring.upper()
+ if keyring not in SUPPORTED_KEYRINGS:
+ raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". '
+ f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
+ opts.cookiesfrombrowser = (browser_name, profile, keyring)
+ # MetadataParser
def metadataparser_actions(f):
if isinstance(f, str):
cmd = '--parse-metadata %s' % compat_shlex_quote(f)
try:
actions = [MetadataFromFieldPP.to_action(f)]
except Exception as err:
- parser.error(f'{cmd} is invalid; {err}')
+ raise ValueError(f'{cmd} is invalid; {err}')
else:
cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f))
actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(','))
@@ -388,162 +340,218 @@ def _real_main(argv=None):
try:
MetadataParserPP.validate_action(*action)
except Exception as err:
- parser.error(f'{cmd} is invalid; {err}')
+ raise ValueError(f'{cmd} is invalid; {err}')
yield action
- if opts.parse_metadata is None:
- opts.parse_metadata = []
+ parse_metadata = opts.parse_metadata or []
if opts.metafromtitle is not None:
- opts.parse_metadata.append('title:%s' % opts.metafromtitle)
- opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, opts.parse_metadata)))
+ parse_metadata.append('title:%s' % opts.metafromtitle)
+ opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata)))
- any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json
- or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail
- or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration)
+ # Other options
+ geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
+ if geo_bypass_code is not None:
+ try:
+ GeoUtils.random_ipv4(geo_bypass_code)
+ except Exception:
+ raise ValueError('unsupported geo-bypass country or ip-block')
- any_printing = opts.print_json
- download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+ opts.match_filter = match_filter_func(opts.match_filter)
+
+ if opts.download_archive is not None:
+ opts.download_archive = expand_path(opts.download_archive)
- # If JSON is not printed anywhere, but comments are requested, save it to file
- printing_json = opts.dumpjson or opts.print_json or opts.dump_single_json
- if opts.getcomments and not printing_json:
- opts.writeinfojson = True
+ if opts.user_agent is not None:
+ opts.headers.setdefault('User-Agent', opts.user_agent)
+ if opts.referer is not None:
+ opts.headers.setdefault('Referer', opts.referer)
if opts.no_sponsorblock:
- opts.sponsorblock_mark = set()
- opts.sponsorblock_remove = set()
- sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
+ opts.sponsorblock_mark = opts.sponsorblock_remove = set()
- opts.remove_chapters = opts.remove_chapters or []
-
- if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False:
- if opts.sponskrub:
- if opts.remove_chapters:
- report_conflict('--remove-chapters', '--sponskrub')
- if opts.sponsorblock_mark:
- report_conflict('--sponsorblock-mark', '--sponskrub')
- if opts.sponsorblock_remove:
- report_conflict('--sponsorblock-remove', '--sponskrub')
- opts.sponskrub = False
- if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False:
- report_conflict('--split-chapter', '--sponskrub-cut')
- opts.sponskrub_cut = False
-
- if opts.remuxvideo and opts.recodevideo:
- report_conflict('--recode-video', '--remux-video')
- opts.remuxvideo = False
-
- if opts.allow_unplayable_formats:
- def report_unplayable_conflict(opt_name, arg, default=False, allowed=None):
- val = getattr(opts, opt_name)
- if (not allowed and val) or (allowed and not allowed(val)):
- report_conflict('--allow-unplayable-formats', arg)
- setattr(opts, opt_name, default)
-
- report_unplayable_conflict('extractaudio', '--extract-audio')
- report_unplayable_conflict('remuxvideo', '--remux-video')
- report_unplayable_conflict('recodevideo', '--recode-video')
- report_unplayable_conflict('addmetadata', '--embed-metadata')
- report_unplayable_conflict('addchapters', '--embed-chapters')
- report_unplayable_conflict('embed_infojson', '--embed-info-json')
- opts.embed_infojson = False
- report_unplayable_conflict('embedsubtitles', '--embed-subs')
- report_unplayable_conflict('embedthumbnail', '--embed-thumbnail')
- report_unplayable_conflict('xattrs', '--xattrs')
- report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore'))
- opts.fixup = 'never'
- report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[])
- report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set())
- report_unplayable_conflict('sponskrub', '--sponskrub', default=set())
- opts.sponskrub = False
+ warnings, deprecation_warnings = [], []
+
+ # Common mistake: -f best
+ if opts.format == 'best':
+ warnings.append('.\n '.join((
+ '"-f best" selects the best pre-merged format which is often not the best option',
+ 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
+ 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
+
+ # --(post-processor/downloader)-args without name
+ def report_args_compat(name, value, key1, key2=None):
+ if key1 in value and key2 not in value:
+ warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s')
+ return True
+ return False
+
+ report_args_compat('external downloader', opts.external_downloader_args, 'default')
+ if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'):
+ opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat')
+ opts.postprocessor_args.setdefault('sponskrub', [])
+
+ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats',
+ val1=NO_DEFAULT, val2=NO_DEFAULT, default=False):
+ if val2 is NO_DEFAULT:
+ val2 = getattr(opts, opt2)
+ if not val2:
+ return
+
+ if val1 is NO_DEFAULT:
+ val1 = getattr(opts, opt1)
+ if val1:
+ warnings.append(f'{arg1} is ignored since {arg2} was given')
+ setattr(opts, opt1, default)
+
+ # Conflicting options
+ report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
+ report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
+ report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl'))
+ report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default'))
+ report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo')
+ report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters')
+ report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark')
+ report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove')
+ report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut)
+
+ # Conflicts with --allow-unplayable-formats
+ report_conflict('--add-metadata', 'addmetadata')
+ report_conflict('--embed-chapters', 'addchapters')
+ report_conflict('--embed-info-json', 'embed_infojson')
+ report_conflict('--embed-subs', 'embedsubtitles')
+ report_conflict('--embed-thumbnail', 'embedthumbnail')
+ report_conflict('--extract-audio', 'extractaudio')
+ report_conflict('--fixup', 'fixup', val1=(opts.fixup or '').lower() in ('', 'never', 'ignore'), default='never')
+ report_conflict('--recode-video', 'recodevideo')
+ report_conflict('--remove-chapters', 'remove_chapters', default=[])
+ report_conflict('--remux-video', 'remuxvideo')
+ report_conflict('--sponskrub', 'sponskrub')
+ report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set())
+ report_conflict('--xattrs', 'xattrs')
+
+ # Fully deprecated options
+ def report_deprecation(val, old, new=None):
+ if not val:
+ return
+ deprecation_warnings.append(
+ f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new
+ else f'{old} is deprecated and may not work as expected')
+
+ report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove')
+ report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg')
+ # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future
+ # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future
+ # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it
+
+ # Dependent options
+ opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore)
+
+ if opts.exec_before_dl_cmd:
+ opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd
+
+ if opts.useid: # --id is not deprecated in youtube-dl
+ opts.outtmpl['default'] = '%(id)s.%(ext)s'
+
+ if opts.overwrites: # --force-overwrites implies --no-continue
+ opts.continue_dl = False
if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None:
+ # Add chapters when adding metadata or marking sponsors
opts.addchapters = True
- # PostProcessors
- postprocessors = list(opts.add_postprocessors)
+ if opts.extractaudio and not opts.keepvideo and opts.format is None:
+ # Do not unnecessarily download audio
+ opts.format = 'bestaudio/best'
+
+ if opts.getcomments and opts.writeinfojson is None:
+ # If JSON is not printed anywhere, but comments are requested, save it to file
+ if not opts.dumpjson or opts.print_json or opts.dump_single_json:
+ opts.writeinfojson = True
+
+ if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub):
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ opts.writesubtitles = True
+
+ if opts.addmetadata and opts.embed_infojson is None:
+ # If embedding metadata and infojson is present, embed it
+ opts.embed_infojson = 'if_exists'
+
+ # Ask for passwords
+ if opts.username is not None and opts.password is None:
+ opts.password = compat_getpass('Type account password and press [Return]: ')
+ if opts.ap_username is not None and opts.ap_password is None:
+ opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
+
+ return warnings, deprecation_warnings
+
+
+def get_postprocessors(opts):
+ yield from opts.add_postprocessors
+
+ if opts.parse_metadata:
+ yield {
+ 'key': 'MetadataParser',
+ 'actions': opts.parse_metadata,
+ 'when': 'pre_process'
+ }
+ sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove
if sponsorblock_query:
- postprocessors.append({
+ yield {
'key': 'SponsorBlock',
'categories': sponsorblock_query,
'api': opts.sponsorblock_api,
- # Run this after filtering videos
'when': 'after_filter'
- })
- if opts.parse_metadata:
- postprocessors.append({
- 'key': 'MetadataParser',
- 'actions': opts.parse_metadata,
- # Run this immediately after extraction is complete
- 'when': 'pre_process'
- })
+ }
if opts.convertsubtitles:
- postprocessors.append({
+ yield {
'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles,
- # Run this before the actual video download
'when': 'before_dl'
- })
+ }
if opts.convertthumbnails:
- postprocessors.append({
+ yield {
'key': 'FFmpegThumbnailsConvertor',
'format': opts.convertthumbnails,
- # Run this before the actual video download
'when': 'before_dl'
- })
+ }
if opts.extractaudio:
- postprocessors.append({
+ yield {
'key': 'FFmpegExtractAudio',
'preferredcodec': opts.audioformat,
'preferredquality': opts.audioquality,
'nopostoverwrites': opts.nopostoverwrites,
- })
+ }
if opts.remuxvideo:
- postprocessors.append({
+ yield {
'key': 'FFmpegVideoRemuxer',
'preferedformat': opts.remuxvideo,
- })
+ }
if opts.recodevideo:
- postprocessors.append({
+ yield {
'key': 'FFmpegVideoConvertor',
'preferedformat': opts.recodevideo,
- })
+ }
# If ModifyChapters is going to remove chapters, subtitles must already be in the container.
if opts.embedsubtitles:
- already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts
- postprocessors.append({
+ keep_subs = 'no-keep-subs' not in opts.compat_opts
+ yield {
'key': 'FFmpegEmbedSubtitle',
# already_have_subtitle = True prevents the file from being deleted after embedding
- 'already_have_subtitle': already_have_subtitle
- })
- if not opts.writeautomaticsub and 'no-keep-subs' not in compat_opts:
+ 'already_have_subtitle': opts.writesubtitles and keep_subs
+ }
+ if not opts.writeautomaticsub and keep_subs:
opts.writesubtitles = True
- # --all-sub automatically sets --write-sub if --write-auto-sub is not given
- # this was the old behaviour if only --all-sub was given.
- if opts.allsubtitles and not opts.writeautomaticsub:
- opts.writesubtitles = True
+
# ModifyChapters must run before FFmpegMetadataPP
- remove_chapters_patterns, remove_ranges = [], []
- for regex in opts.remove_chapters:
- if regex.startswith('*'):
- dur = list(map(parse_duration, regex[1:].split('-')))
- if len(dur) == 2 and all(t is not None for t in dur):
- remove_ranges.append(tuple(dur))
- continue
- parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form *start-end')
- try:
- remove_chapters_patterns.append(re.compile(regex))
- except re.error as err:
- parser.error(f'invalid --remove-chapters regex {regex!r} - {err}')
if opts.remove_chapters or sponsorblock_query:
- postprocessors.append({
+ yield {
'key': 'ModifyChapters',
- 'remove_chapters_patterns': remove_chapters_patterns,
+ 'remove_chapters_patterns': opts.remove_chapters,
'remove_sponsor_segments': opts.sponsorblock_remove,
- 'remove_ranges': remove_ranges,
+ 'remove_ranges': opts.remove_ranges,
'sponsorblock_chapter_title': opts.sponsorblock_chapter_title,
'force_keyframes': opts.force_keyframes_at_cuts
- })
+ }
# FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
# FFmpegExtractAudioPP as containers before conversion may not support
# metadata (3gp, webm, etc.)
@@ -551,21 +559,19 @@ def _real_main(argv=None):
# source and target containers. From this point the container won't change,
# so metadata can be added here.
if opts.addmetadata or opts.addchapters or opts.embed_infojson:
- if opts.embed_infojson is None:
- opts.embed_infojson = 'if_exists'
- postprocessors.append({
+ yield {
'key': 'FFmpegMetadata',
'add_chapters': opts.addchapters,
'add_metadata': opts.addmetadata,
'add_infojson': opts.embed_infojson,
- })
+ }
# Deprecated
# This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment
# but must be below EmbedSubtitle and FFmpegMetadata
# See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29
# If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
if opts.sponskrub is not False:
- postprocessors.append({
+ yield {
'key': 'SponSkrub',
'path': opts.sponskrub_path,
'args': opts.sponskrub_args,
@@ -573,64 +579,57 @@ def _real_main(argv=None):
'force': opts.sponskrub_force,
'ignoreerror': opts.sponskrub is None,
'_from_cli': True,
- })
+ }
if opts.embedthumbnail:
- postprocessors.append({
+ yield {
'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding
'already_have_thumbnail': opts.writethumbnail
- })
+ }
if not opts.writethumbnail:
opts.writethumbnail = True
opts.outtmpl['pl_thumbnail'] = ''
if opts.split_chapters:
- postprocessors.append({
+ yield {
'key': 'FFmpegSplitChapters',
'force_keyframes': opts.force_keyframes_at_cuts,
- })
+ }
# XAttrMetadataPP should be run after post-processors that may change file contents
if opts.xattrs:
- postprocessors.append({'key': 'XAttrMetadata'})
+ yield {'key': 'XAttrMetadata'}
if opts.concat_playlist != 'never':
- postprocessors.append({
+ yield {
'key': 'FFmpegConcat',
'only_multi_video': opts.concat_playlist != 'always',
'when': 'playlist',
- })
+ }
# Exec must be the last PP of each category
- if opts.exec_before_dl_cmd:
- opts.exec_cmd.setdefault('before_dl', opts.exec_before_dl_cmd)
for when, exec_cmd in opts.exec_cmd.items():
- postprocessors.append({
+ yield {
'key': 'Exec',
'exec_cmd': exec_cmd,
- # Run this only after the files have been moved to their final locations
'when': when,
- })
+ }
- def report_args_compat(arg, name):
- warnings.append('%s given without specifying name. The arguments will be given to all %s' % (arg, name))
- if 'default' in opts.external_downloader_args:
- report_args_compat('--downloader-args', 'external downloaders')
+def parse_options(argv=None):
+ """ @returns (parser, opts, urls, ydl_opts) """
+ parser, opts, urls = parseOpts(argv)
+ urls = get_urls(urls, opts.batchfile, opts.verbose)
- if 'default-compat' in opts.postprocessor_args and 'default' not in opts.postprocessor_args:
- report_args_compat('--post-processor-args', 'post-processors')
- opts.postprocessor_args.setdefault('sponskrub', [])
- opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat']
+ set_compat_opts(opts)
+ try:
+ warnings, deprecation_warnings = validate_options(opts)
+ except ValueError as err:
+ parser.error(f'{err}\n')
- def report_deprecation(val, old, new=None):
- if not val:
- return
- deprecation_warnings.append(
- f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new
- else f'{old} is deprecated and may not work as expected')
+ postprocessors = list(get_postprocessors(opts))
- report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove')
- report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg')
- report_deprecation(opts.include_ads, '--include-ads')
- # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future
- # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it
+ any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json
+ or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail
+ or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration)
+
+ any_printing = opts.print_json
final_ext = (
opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
@@ -638,11 +637,7 @@ def _real_main(argv=None):
else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best')
else None)
- match_filter = (
- None if opts.match_filter is None
- else match_filter_func(opts.match_filter))
-
- ydl_opts = {
+ return parser, opts, urls, {
'usenetrc': opts.usenetrc,
'netrc_location': opts.netrc_location,
'username': opts.username,
@@ -710,7 +705,7 @@ def _real_main(argv=None):
'playlistreverse': opts.playlist_reverse,
'playlistrandom': opts.playlist_random,
'noplaylist': opts.noplaylist,
- 'logtostderr': outtmpl_default == '-',
+ 'logtostderr': opts.outtmpl.get('default') == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
'updatetime': opts.updatetime,
@@ -746,11 +741,11 @@ def _real_main(argv=None):
'max_filesize': opts.max_filesize,
'min_views': opts.min_views,
'max_views': opts.max_views,
- 'daterange': date,
+ 'daterange': opts.date,
'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
'age_limit': opts.age_limit,
- 'download_archive': download_archive_fn,
+ 'download_archive': opts.download_archive,
'break_on_existing': opts.break_on_existing,
'break_on_reject': opts.break_on_reject,
'break_per_url': opts.break_per_url,
@@ -791,7 +786,7 @@ def _real_main(argv=None):
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize,
- 'match_filter': match_filter,
+ 'match_filter': opts.match_filter,
'no_color': opts.no_color,
'ffmpeg_location': opts.ffmpeg_location,
'hls_prefer_native': opts.hls_prefer_native,
@@ -806,9 +801,30 @@ def _real_main(argv=None):
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
'_warnings': warnings,
'_deprecation_warnings': deprecation_warnings,
- 'compat_opts': compat_opts,
+ 'compat_opts': opts.compat_opts,
}
+
+def _real_main(argv=None):
+ # Compatibility fixes for Windows
+ if sys.platform == 'win32':
+ # https://github.com/ytdl-org/youtube-dl/issues/820
+ codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+
+ workaround_optparse_bug9161()
+
+ setproctitle('yt-dlp')
+
+ parser, opts, all_urls, ydl_opts = parse_options(argv)
+
+ # Dump user agent
+ if opts.dump_user_agent:
+ ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
+ write_string(f'{ua}\n', out=sys.stdout)
+
+ if print_extractor_information(opts, all_urls):
+ sys.exit(0)
+
with YoutubeDL(ydl_opts) as ydl:
actual_use = all_urls or opts.load_info_filename
@@ -851,4 +867,10 @@ def main(argv=None):
sys.exit(f'\nERROR: {e}')
-__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
+__all__ = [
+ 'main',
+ 'YoutubeDL',
+ 'parse_options',
+ 'gen_extractors',
+ 'list_extractors',
+]
diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py
index 2bc6a6b7f..0a0d3b351 100644
--- a/yt_dlp/compat.py
+++ b/yt_dlp/compat.py
@@ -170,6 +170,13 @@ except ImportError:
except ImportError:
compat_pycrypto_AES = None
+try:
+ import brotlicffi as compat_brotli
+except ImportError:
+ try:
+ import brotli as compat_brotli
+ except ImportError:
+ compat_brotli = None
WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
@@ -258,6 +265,7 @@ __all__ = [
'compat_asyncio_run',
'compat_b64decode',
'compat_basestring',
+ 'compat_brotli',
'compat_chr',
'compat_collections_abc',
'compat_cookiejar',
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index 3a949d38a..afd2f2e38 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -159,7 +159,7 @@ class FileDownloader(object):
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
- self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
+ self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
def to_stderr(self, message):
self.ydl.to_stderr(message)
@@ -277,9 +277,9 @@ class FileDownloader(object):
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
- self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
+ self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
else:
- self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
+ self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
def _finish_multiline_status(self):
diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py
index ef4205edc..b28d1ec17 100644
--- a/yt_dlp/downloader/youtube_live_chat.py
+++ b/yt_dlp/downloader/youtube_live_chat.py
@@ -22,6 +22,9 @@ class YoutubeLiveChatFD(FragmentFD):
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+ if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+ self.report_warning('Live chat download runs until the livestream ends. '
+ 'If you wish to download the video simultaneously, run a separate yt-dlp instance')
fragment_retries = self.params.get('fragment_retries', 0)
test = self.params.get('test', False)
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 66b12c72f..360fa4699 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -8,10 +8,6 @@ import struct
from base64 import urlsafe_b64encode
from binascii import unhexlify
-import typing
-if typing.TYPE_CHECKING:
- from ..YoutubeDL import YoutubeDL
-
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..compat import (
@@ -36,15 +32,15 @@ from ..utils import (
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
-def add_opener(self: 'YoutubeDL', handler):
+def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- assert isinstance(self._opener, compat_urllib_request.OpenerDirector)
- self._opener.add_handler(handler)
+ assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ ydl._opener.add_handler(handler)
-def remove_opener(self: 'YoutubeDL', handler):
+def remove_opener(ydl, handler):
'''
Remove handler(s) for opening URLs
@param handler Either handler object itself or handler type.
@@ -52,8 +48,8 @@ def remove_opener(self: 'YoutubeDL', handler):
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- opener = self._opener
- assert isinstance(self._opener, compat_urllib_request.OpenerDirector)
+ opener = ydl._opener
+ assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index bebcafa6b..f0eba8844 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1345,6 +1345,11 @@ MSO_INFO = {
'username_field': 'username',
'password_field': 'password',
},
+ 'Suddenlink': {
+ 'name': 'Suddenlink',
+ 'username_field': 'username',
+ 'password_field': 'password',
+ },
}
@@ -1636,6 +1641,52 @@ class AdobePassIE(InfoExtractor):
query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login')
+ elif mso_id == 'Suddenlink':
+ # Suddenlink is similar to SlingTV in using a tab history count and a meta refresh,
+ # but they also do a dynmaic redirect using javascript that has to be followed as well
+ first_bookend_page, urlh = post_form(
+ provider_redirect_page_res, 'Pressing Continue...')
+
+ hidden_data = self._hidden_inputs(first_bookend_page)
+ hidden_data['history_val'] = 1
+
+ provider_login_redirect_page = self._download_webpage(
+ urlh.geturl(), video_id, 'Sending First Bookend',
+ query=hidden_data)
+
+ provider_tryauth_url = self._html_search_regex(
+ r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
+
+ provider_tryauth_page = self._download_webpage(
+ provider_tryauth_url, video_id, 'Submitting TryAuth',
+ query=hidden_data)
+
+ provider_login_page_res = self._download_webpage_handle(
+ f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
+ video_id, 'Getting Login Page',
+ query=hidden_data)
+
+ provider_association_redirect, urlh = post_form(
+ provider_login_page_res, 'Logging in', {
+ mso_info['username_field']: username,
+ mso_info['password_field']: password
+ })
+
+ provider_refresh_redirect_url = extract_redirect_url(
+ provider_association_redirect, url=urlh.geturl())
+
+ last_bookend_page, urlh = self._download_webpage_handle(
+ provider_refresh_redirect_url, video_id,
+ 'Downloading Auth Association Redirect Page')
+
+ hidden_data = self._hidden_inputs(last_bookend_page)
+ hidden_data['history_val'] = 3
+
+ mvpd_confirm_page_res = self._download_webpage_handle(
+ urlh.geturl(), video_id, 'Sending Final Bookend',
+ query=hidden_data)
+
+ post_form(mvpd_confirm_page_res, 'Confirming Login')
else:
# Some providers (e.g. DIRECTV NOW) have another meta refresh
# based redirect that should be followed.
diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py
index 7d70e0427..1075b461e 100644
--- a/yt_dlp/extractor/ant1newsgr.py
+++ b/yt_dlp/extractor/ant1newsgr.py
@@ -97,8 +97,8 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
- return self.url_result_or_playlist_from_matches(
- embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(),
+ return self.playlist_from_matches(
+ embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py
index 4ad5d6ddd..7ea339b39 100644
--- a/yt_dlp/extractor/ard.py
+++ b/yt_dlp/extractor/ard.py
@@ -407,8 +407,9 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
(?:(?:beta|www)\.)?ardmediathek\.de/
(?:(?P<client>[^/]+)/)?
(?:player|live|video|(?P<playlist>sendung|sammlung))/
- (?:(?P<display_id>[^?#]+)/)?
- (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)'''
+ (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)?
+ (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)
+ (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))'''
_TESTS = [{
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
@@ -437,6 +438,13 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
'upload_date': '20211108',
},
}, {
+ 'url': 'https://www.ardmediathek.de/sendung/beforeigners/beforeigners/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw/1',
+ 'playlist_count': 6,
+ 'info_dict': {
+ 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JlZm9yZWlnbmVycw',
+ 'title': 'beforeigners/beforeigners/staffel-1',
+ },
+ }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'only_matching': True,
}, {
@@ -561,14 +569,15 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
break
pageNumber = pageNumber + 1
- return self.playlist_result(entries, playlist_title=display_id)
+ return self.playlist_result(entries, playlist_id, playlist_title=display_id)
def _real_extract(self, url):
- video_id, display_id, playlist_type, client = self._match_valid_url(url).group(
- 'id', 'display_id', 'playlist', 'client')
+ video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group(
+ 'id', 'display_id', 'playlist', 'client', 'season')
display_id, client = display_id or video_id, client or 'ard'
if playlist_type:
+ # TODO: Extract only specified season
return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type)
player_page = self._download_json(
diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py
index ea98f8688..9dbaabfa0 100644
--- a/yt_dlp/extractor/ccma.py
+++ b/yt_dlp/extractor/ccma.py
@@ -1,17 +1,14 @@
# coding: utf-8
from __future__ import unicode_literals
-import calendar
-import datetime
-
from .common import InfoExtractor
from ..utils import (
clean_html,
- extract_timezone,
int_or_none,
parse_duration,
parse_resolution,
try_get,
+ unified_timestamp,
url_or_none,
)
@@ -95,14 +92,8 @@ class CCMAIE(InfoExtractor):
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
tematica = try_get(informacio, lambda x: x['tematica']['text'])
- timestamp = None
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
- try:
- timezone, data_utc = extract_timezone(data_utc)
- timestamp = calendar.timegm((datetime.datetime.strptime(
- data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
- except TypeError:
- pass
+ timestamp = unified_timestamp(data_utc)
subtitles = {}
subtitols = media.get('subtitols') or []
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index f86e7cb3e..354814433 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -226,6 +226,7 @@ class InfoExtractor(object):
The following fields are optional:
+ direct: True if a direct video file was given (must only be set by GenericIE)
alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
@@ -274,7 +275,7 @@ class InfoExtractor(object):
* "url": A URL pointing to the subtitles file
It can optionally also have:
* "name": Name or description of the subtitles
- * http_headers: A dictionary of additional HTTP headers
+ * "http_headers": A dictionary of additional HTTP headers
to add to the request.
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles'; contains automatically generated
@@ -425,8 +426,8 @@ class InfoExtractor(object):
title, description etc.
- Subclasses of this one should re-define the _real_initialize() and
- _real_extract() methods and define a _VALID_URL regexp.
+ Subclasses of this should define a _VALID_URL regexp and, re-define the
+ _real_extract() and (optionally) _real_initialize() methods.
Probably, they should also be added to the list of extractors.
Subclasses may also override suitable() if necessary, but ensure the function
@@ -661,7 +662,7 @@ class InfoExtractor(object):
return False
def set_downloader(self, downloader):
- """Sets the downloader for this IE."""
+ """Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
def _real_initialize(self):
@@ -670,7 +671,7 @@ class InfoExtractor(object):
def _real_extract(self, url):
"""Real extraction process. Redefine in subclasses."""
- pass
+ raise NotImplementedError('This method must be implemented by subclasses')
@classmethod
def ie_key(cls):
@@ -749,7 +750,7 @@ class InfoExtractor(object):
errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
if fatal:
- raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
+ raise ExtractorError(errmsg, cause=err)
else:
self.report_warning(errmsg)
return False
@@ -1661,31 +1662,31 @@ class InfoExtractor(object):
'format_id': {'type': 'alias', 'field': 'id'},
'preference': {'type': 'alias', 'field': 'ie_pref'},
'language_preference': {'type': 'alias', 'field': 'lang'},
-
- # Deprecated
- 'dimension': {'type': 'alias', 'field': 'res'},
- 'resolution': {'type': 'alias', 'field': 'res'},
- 'extension': {'type': 'alias', 'field': 'ext'},
- 'bitrate': {'type': 'alias', 'field': 'br'},
- 'total_bitrate': {'type': 'alias', 'field': 'tbr'},
- 'video_bitrate': {'type': 'alias', 'field': 'vbr'},
- 'audio_bitrate': {'type': 'alias', 'field': 'abr'},
- 'framerate': {'type': 'alias', 'field': 'fps'},
- 'protocol': {'type': 'alias', 'field': 'proto'},
'source_preference': {'type': 'alias', 'field': 'source'},
+ 'protocol': {'type': 'alias', 'field': 'proto'},
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
- 'filesize_estimate': {'type': 'alias', 'field': 'size'},
- 'samplerate': {'type': 'alias', 'field': 'asr'},
- 'video_ext': {'type': 'alias', 'field': 'vext'},
- 'audio_ext': {'type': 'alias', 'field': 'aext'},
- 'video_codec': {'type': 'alias', 'field': 'vcodec'},
- 'audio_codec': {'type': 'alias', 'field': 'acodec'},
- 'video': {'type': 'alias', 'field': 'hasvid'},
- 'has_video': {'type': 'alias', 'field': 'hasvid'},
- 'audio': {'type': 'alias', 'field': 'hasaud'},
- 'has_audio': {'type': 'alias', 'field': 'hasaud'},
- 'extractor': {'type': 'alias', 'field': 'ie_pref'},
- 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'},
+
+ # Deprecated
+ 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
+ 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
+ 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
+ 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
+ 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
+ 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
+ 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
+ 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
}
def __init__(self, ie, field_preference):
@@ -1785,7 +1786,7 @@ class InfoExtractor(object):
continue
if self._get_field_setting(field, 'type') == 'alias':
alias, field = field, self._get_field_setting(field, 'field')
- if alias not in ('format_id', 'preference', 'language_preference'):
+ if self._get_field_setting(alias, 'deprecated'):
self.ydl.deprecation_warning(
f'Format sorting alias {alias} is deprecated '
f'and may be removed in a future version. Please use {field} instead')
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 5448acf01..09b795c56 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -520,6 +520,7 @@ from .foxnews import (
FoxNewsArticleIE,
)
from .foxsports import FoxSportsIE
+from .fptplay import FptplayIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
@@ -848,6 +849,7 @@ from .microsoftvirtualacademy import (
from .mildom import (
MildomIE,
MildomVodIE,
+ MildomClipIE,
MildomUserVodIE,
)
from .minds import (
@@ -1150,6 +1152,11 @@ from .palcomp3 import (
PalcoMP3VideoIE,
)
from .pandoratv import PandoraTVIE
+from .panopto import (
+ PanoptoIE,
+ PanoptoListIE,
+ PanoptoPlaylistIE
+)
from .paramountplus import (
ParamountPlusIE,
ParamountPlusSeriesIE,
@@ -1218,6 +1225,7 @@ from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
+ PokemonSoundLibraryIE,
)
from .pokergo import (
PokerGoIE,
@@ -2010,6 +2018,7 @@ from .ximalaya import (
XimalayaIE,
XimalayaAlbumIE
)
+from .xinpianchang import XinpianchangIE
from .xminus import XMinusIE
from .xnxx import XNXXIE
from .xstream import XstreamIE
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index d39dcc058..ef57b221c 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -18,6 +18,7 @@ from ..utils import (
ExtractorError,
float_or_none,
get_element_by_id,
+ get_first,
int_or_none,
js_to_json,
merge_dicts,
@@ -405,11 +406,9 @@ class FacebookIE(InfoExtractor):
..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or []
media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or []
if str(m.get('id')) == video_id and m.get('__typename') == 'Video']
- title = traverse_obj(media, (..., 'title', 'text'), get_all=False)
- description = traverse_obj(media, (
- ..., 'creation_story', 'comet_sections', 'message', 'story', 'message', 'text'), get_all=False)
- uploader_data = (traverse_obj(media, (..., 'owner'), get_all=False)
- or traverse_obj(post, (..., 'node', 'actors', ...), get_all=False) or {})
+ title = get_first(media, ('title', 'text'))
+ description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text'))
+ uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {}
page_title = title or self._html_search_regex((
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>(?P<content>[^<]*)</h2>',
diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py
new file mode 100644
index 000000000..a34e90bb1
--- /dev/null
+++ b/yt_dlp/extractor/fptplay.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import time
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import (
+ join_nonempty,
+)
+
+
+class FptplayIE(InfoExtractor):
+ _VALID_URL = r'https?://fptplay\.vn/(?P<type>xem-video)/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>[^/]+)?/?(?:[?#]|$)|)'
+ _GEO_COUNTRIES = ['VN']
+ IE_NAME = 'fptplay'
+ IE_DESC = 'fptplay.vn'
+ _TESTS = [{
+ 'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945',
+ 'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
+ 'info_dict': {
+ 'id': '621a123016f369ebbde55945',
+ 'ext': 'mp4',
+ 'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Ms. Cupid In Love',
+ 'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3',
+ 'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
+ 'info_dict': {
+ 'id': '61f3aa8a6b3b1d2e73c60eb5',
+ 'ext': 'mp4',
+ 'title': 'Má Tôi Là Đại Gia - 3',
+ 'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c',
+ },
+ }, {
+ 'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ type_url, video_id, episode = self._match_valid_url(url).group('type', 'id', 'episode')
+ webpage = self._download_webpage(url, video_id=video_id, fatal=False)
+ info = self._download_json(self.get_api_with_st_token(video_id, episode or 0), video_id)
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': join_nonempty(
+ self._html_search_meta(('og:title', 'twitter:title'), webpage), episode, delim=' - '),
+ 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def get_api_with_st_token(self, video_id, episode):
+ path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip'
+ timestamp = int(time.time()) + 10800
+
+ t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
+ r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+ n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
+
+ def convert(e):
+ t = ''
+ n = 0
+ i = [0, 0, 0]
+ a = [0, 0, 0, 0]
+ s = len(e)
+ c = 0
+ for z in range(s, 0, -1):
+ if n <= 3:
+ i[n] = e[c]
+ n += 1
+ c += 1
+ if 3 == n:
+ a[0] = (252 & i[0]) >> 2
+ a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
+ a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
+ a[3] = (63 & i[2])
+ for v in range(4):
+ t += r[a[v]]
+ n = 0
+ if n:
+ for o in range(n, 3):
+ i[o] = 0
+
+ for o in range(n + 1):
+ a[0] = (252 & i[0]) >> 2
+ a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
+ a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
+ a[3] = (63 & i[2])
+ t += r[a[o]]
+ n += 1
+ while n < 3:
+ t += ''
+ n += 1
+ return t
+
+ st_token = convert(n).replace('+', '-').replace('/', '_').replace('=', '')
+ return f'https://api.fptplay.net{path}?{urllib.parse.urlencode({"st": st_token, "e": timestamp})}'
diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py
index 40b8cb0b4..0d29da29b 100644
--- a/yt_dlp/extractor/frontendmasters.py
+++ b/yt_dlp/extractor/frontendmasters.py
@@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
entries = []
for lesson in lessons:
lesson_name = lesson.get('slug')
- if not lesson_name:
- continue
lesson_id = lesson.get('hash') or lesson.get('statsId')
+ if not lesson_id or not lesson_name:
+ continue
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
title = course.get('title')
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 0ddd050ff..6a8b8543b 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -146,6 +146,7 @@ from .tvp import TVPEmbedIE
from .blogger import BloggerIE
from .mainstreaming import MainStreamingIE
from .gfycat import GfycatIE
+from .panopto import PanoptoBaseIE
class GenericIE(InfoExtractor):
@@ -2498,6 +2499,15 @@ class GenericIE(InfoExtractor):
'id': '?vid=2295'
},
'playlist_count': 9
+ },
+ {
+ # Panopto embeds
+ 'url': 'https://www.monash.edu/learning-teaching/teachhq/learning-technologies/panopto/how-to/insert-a-quiz-into-a-panopto-video',
+ 'info_dict': {
+ 'title': 'Insert a quiz into a Panopto video',
+ 'id': 'insert-a-quiz-into-a-panopto-video'
+ },
+ 'playlist_count': 1
}
]
@@ -3723,6 +3733,9 @@ class GenericIE(InfoExtractor):
if gfycat_urls:
return self.playlist_from_matches(gfycat_urls, video_id, video_title, ie=GfycatIE.ie_key())
+ panopto_urls = PanoptoBaseIE._extract_urls(webpage)
+ if panopto_urls:
+ return self.playlist_from_matches(panopto_urls, video_id, video_title)
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py
index cab3aa045..4ac70ea57 100644
--- a/yt_dlp/extractor/mgtv.py
+++ b/yt_dlp/extractor/mgtv.py
@@ -13,12 +13,15 @@ from ..compat import (
from ..utils import (
ExtractorError,
int_or_none,
+ try_get,
+ url_or_none,
)
class MGTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
IE_DESC = '芒果TV'
+ IE_NAME = 'MangoTV'
_TESTS = [{
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
@@ -31,6 +34,32 @@ class MGTVIE(InfoExtractor):
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
+ 'url': 'https://w.mgtv.com/b/427837/15588271.html',
+ 'info_dict': {
+ 'id': '15588271',
+ 'ext': 'mp4',
+ 'title': '春日迟迟再出发 沉浸版',
+ 'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 4026,
+ },
+ }, {
+ 'url': 'https://w.mgtv.com/b/333652/7329822.html',
+ 'info_dict': {
+ 'id': '7329822',
+ 'ext': 'mp4',
+ 'title': '拜托,请你爱我',
+ 'description': 'md5:cd81be6499bafe32e4d143abd822bf9c',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 2656,
+ },
+ }, {
+ 'url': 'https://w.mgtv.com/b/427837/15591647.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home',
+ 'only_matching': True,
+ }, {
'url': 'http://www.mgtv.com/b/301817/3826653.html',
'only_matching': True,
}, {
@@ -40,12 +69,14 @@ class MGTVIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
- tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
+ tk2 = base64.urlsafe_b64encode(
+ f'did={compat_str(uuid.uuid4()).encode()}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1]
try:
api_data = self._download_json(
'https://pcweb.api.mgtv.com/player/video', video_id, query={
'tk2': tk2,
'video_id': video_id,
+ 'type': 'pch5'
}, headers=self.geo_verification_headers())['data']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
@@ -61,6 +92,7 @@ class MGTVIE(InfoExtractor):
'pm2': api_data['atc']['pm2'],
'tk2': tk2,
'video_id': video_id,
+ 'src': 'intelmgtv',
}, headers=self.geo_verification_headers())['data']
stream_domain = stream_data['stream_domain'][0]
@@ -71,7 +103,7 @@ class MGTVIE(InfoExtractor):
continue
format_data = self._download_json(
stream_domain + stream_path, video_id,
- note='Download video info for format #%d' % idx)
+ note=f'Download video info for format #{idx}')
format_url = format_data.get('info')
if not format_url:
continue
@@ -79,7 +111,7 @@ class MGTVIE(InfoExtractor):
r'_(\d+)_mp4/', format_url, 'tbr', default=None))
formats.append({
'format_id': compat_str(tbr or idx),
- 'url': format_url,
+ 'url': url_or_none(format_url),
'ext': 'mp4',
'tbr': tbr,
'protocol': 'm3u8_native',
@@ -97,4 +129,25 @@ class MGTVIE(InfoExtractor):
'description': info.get('desc'),
'duration': int_or_none(info.get('duration')),
'thumbnail': info.get('thumb'),
+ 'subtitles': self.extract_subtitles(video_id, stream_domain),
}
+
+ def _get_subtitles(self, video_id, domain):
+ info = self._download_json(f'https://pcweb.api.mgtv.com/video/title?videoId={video_id}',
+ video_id, fatal=False) or {}
+ subtitles = {}
+ for sub in try_get(info, lambda x: x['data']['title']) or []:
+ url_sub = sub.get('url')
+ if not url_sub:
+ continue
+ locale = sub.get('captionCountrySimpleName')
+ sub = self._download_json(f'{domain}{url_sub}', video_id, fatal=False,
+ note=f'Download subtitle for locale {sub.get("name")} ({locale})') or {}
+ sub_url = url_or_none(sub.get('info'))
+ if not sub_url:
+ continue
+ subtitles.setdefault(locale or 'en', []).append({
+ 'url': sub_url,
+ 'ext': 'srt'
+ })
+ return subtitles
diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py
index b5a2e17f2..5f2df29c6 100644
--- a/yt_dlp/extractor/mildom.py
+++ b/yt_dlp/extractor/mildom.py
@@ -1,102 +1,42 @@
# coding: utf-8
from __future__ import unicode_literals
-import base64
-from datetime import datetime
-import itertools
+import functools
import json
from .common import InfoExtractor
from ..utils import (
- update_url_query,
- random_uuidv4,
- try_get,
+ determine_ext,
+ dict_get,
+ ExtractorError,
float_or_none,
- dict_get
-)
-from ..compat import (
- compat_str,
+ OnDemandPagedList,
+ random_uuidv4,
+ traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
- _DISPATCHER_CONFIG = None
-
- def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', init=False):
- query = query or {}
- if query:
- query['__platform'] = 'web'
- url = update_url_query(url, self._common_queries(query, init=init))
- content = self._download_json(url, video_id, note=note)
- if content['code'] == 0:
- return content['body']
- else:
- self.raise_no_formats(
- f'Video not found or premium content. {content["code"]} - {content["message"]}',
- expected=True)
- def _common_queries(self, query={}, init=False):
- dc = self._fetch_dispatcher_config()
- r = {
- 'timestamp': self.iso_timestamp(),
- '__guest_id': '' if init else self.guest_id(),
- '__location': dc['location'],
- '__country': dc['country'],
- '__cluster': dc['cluster'],
- '__platform': 'web',
- '__la': self.lang_code(),
- '__pcv': 'v2.9.44',
- 'sfr': 'pc',
- 'accessToken': '',
- }
- r.update(query)
- return r
-
- def _fetch_dispatcher_config(self):
- if not self._DISPATCHER_CONFIG:
- tmp = self._download_json(
- 'https://disp.mildom.com/serverListV2', 'initialization',
- note='Downloading dispatcher_config', data=json.dumps({
- 'protover': 0,
- 'data': base64.b64encode(json.dumps({
- 'fr': 'web',
- 'sfr': 'pc',
- 'devi': 'Windows',
- 'la': 'ja',
- 'gid': None,
- 'loc': '',
- 'clu': '',
- 'wh': '1919*810',
- 'rtm': self.iso_timestamp(),
- 'ua': self.get_param('http_headers')['User-Agent'],
- }).encode('utf8')).decode('utf8').replace('\n', ''),
- }).encode('utf8'))
- self._DISPATCHER_CONFIG = self._parse_json(base64.b64decode(tmp['data']), 'initialization')
- return self._DISPATCHER_CONFIG
-
- @staticmethod
- def iso_timestamp():
- 'new Date().toISOString()'
- return datetime.utcnow().isoformat()[0:-3] + 'Z'
-
- def guest_id(self):
- 'getGuestId'
- if self._GUEST_ID:
- return self._GUEST_ID
- self._GUEST_ID = try_get(
- self, (
- lambda x: x._call_api(
- 'https://cloudac.mildom.com/nonolive/gappserv/guest/h5init', 'initialization',
- note='Downloading guest token', init=True)['guest_id'] or None,
- lambda x: x._get_cookies('https://www.mildom.com').get('gid').value,
- lambda x: x._get_cookies('https://m.mildom.com').get('gid').value,
- ), compat_str) or ''
- return self._GUEST_ID
-
- def lang_code(self):
- 'getCurrentLangCode'
- return 'ja'
+ def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
+ if not self._GUEST_ID:
+ self._GUEST_ID = f'pc-gp-{random_uuidv4()}'
+
+ content = self._download_json(
+ url, video_id, note=note, data=json.dumps(body).encode() if body else None,
+ headers={'Content-Type': 'application/json'} if body else {},
+ query={
+ '__guest_id': self._GUEST_ID,
+ '__platform': 'web',
+ **(query or {}),
+ })
+
+ if content['code'] != 0:
+ raise ExtractorError(
+ f'Mildom says: {content["message"]} (code {content["code"]})',
+ expected=True)
+ return content['body']
class MildomIE(MildomBaseIE):
@@ -106,31 +46,13 @@ class MildomIE(MildomBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- url = 'https://www.mildom.com/%s' % video_id
-
- webpage = self._download_webpage(url, video_id)
+ webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
- title = try_get(
- enterstudio, (
- lambda x: self._html_search_meta('twitter:description', webpage),
- lambda x: x['anchor_intro'],
- ), compat_str)
- description = try_get(
- enterstudio, (
- lambda x: x['intro'],
- lambda x: x['live_intro'],
- ), compat_str)
- uploader = try_get(
- enterstudio, (
- lambda x: self._html_search_meta('twitter:title', webpage),
- lambda x: x['loginname'],
- ), compat_str)
-
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
@@ -138,17 +60,20 @@ class MildomIE(MildomBaseIE):
'live_server_type': 'hls',
})
- stream_query = self._common_queries({
- 'streamReqId': random_uuidv4(),
- 'is_lhls': '0',
- })
- m3u8_url = update_url_query(servers['stream_server'] + '/%s_master.m3u8' % video_id, stream_query)
- formats = self._extract_m3u8_formats(m3u8_url, result_video_id, 'mp4', headers={
- 'Referer': 'https://www.mildom.com/',
- 'Origin': 'https://www.mildom.com',
- }, note='Downloading m3u8 information')
-
- del stream_query['streamReqId'], stream_query['timestamp']
+ playback_token = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
+ note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
+ playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
+ if not playback_token:
+ raise ExtractorError('Failed to obtain live playback token')
+
+ formats = self._extract_m3u8_formats(
+ f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
+ result_video_id, 'mp4', headers={
+ 'Referer': 'https://www.mildom.com/',
+ 'Origin': 'https://www.mildom.com',
+ })
+
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
@@ -156,10 +81,10 @@ class MildomIE(MildomBaseIE):
return {
'id': result_video_id,
- 'title': title,
- 'description': description,
+ 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
+ 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
- 'uploader': uploader,
+ 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
@@ -168,7 +93,7 @@ class MildomIE(MildomBaseIE):
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
- IE_DESC = 'Download a VOD in Mildom'
+ IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
@@ -215,11 +140,8 @@ class MildomVodIE(MildomBaseIE):
}]
def _real_extract(self, url):
- m = self._match_valid_url(url)
- user_id, video_id = m.group('user_id'), m.group('id')
- url = 'https://www.mildom.com/playback/%s/%s' % (user_id, video_id)
-
- webpage = self._download_webpage(url, video_id)
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
@@ -227,20 +149,6 @@ class MildomVodIE(MildomBaseIE):
'v_id': video_id,
})['playback']
- title = try_get(
- autoplay, (
- lambda x: self._html_search_meta('og:description', webpage),
- lambda x: x['title'],
- ), compat_str)
- description = try_get(
- autoplay, (
- lambda x: x['video_intro'],
- ), compat_str)
- uploader = try_get(
- autoplay, (
- lambda x: x['author_info']['login_name'],
- ), compat_str)
-
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
@@ -265,17 +173,81 @@ class MildomVodIE(MildomBaseIE):
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': float_or_none(autoplay['publish_time'], scale=1000),
- 'duration': float_or_none(autoplay['video_length'], scale=1000),
+ 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
+ 'description': traverse_obj(autoplay, 'video_intro'),
+ 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
+ 'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
- 'uploader': uploader,
+ 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
+class MildomClipIE(MildomBaseIE):
+ IE_NAME = 'mildom:clip'
+ IE_DESC = 'Clip in Mildom'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
+ 'info_dict': {
+ 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
+ 'title': '全然違ったよ',
+ 'timestamp': 1619181890,
+ 'duration': 59,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': 'ざきんぽ',
+ 'uploader_id': '10042245',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
+ 'info_dict': {
+ 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
+ 'title': 'かっこいい',
+ 'timestamp': 1621094003,
+ 'duration': 59,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': '(ルーキー',
+ 'uploader_id': '10111524',
+ },
+ }, {
+ 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
+ 'info_dict': {
+ 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
+ 'title': 'あ',
+ 'timestamp': 1614769431,
+ 'duration': 31,
+ 'thumbnail': r're:https?://.+',
+ 'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
+ 'uploader_id': '10660174',
+ },
+ }]
+
+ def _real_extract(self, url):
+ user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
+ webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
+
+ clip_detail = self._call_api(
+ 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
+ note='Downloading playback metadata', query={
+ 'clip_id': video_id,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_meta(
+ ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
+ 'timestamp': float_or_none(clip_detail.get('create_time')),
+ 'duration': float_or_none(clip_detail.get('length')),
+ 'thumbnail': clip_detail.get('cover'),
+ 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
+ 'uploader_id': user_id,
+
+ 'url': clip_detail['url'],
+ 'ext': determine_ext(clip_detail.get('url'), 'mp4'),
+ }
+
+
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
@@ -286,29 +258,32 @@ class MildomUserVodIE(MildomBaseIE):
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
- 'playlist_mincount': 351,
+ 'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
- 'playlist_mincount': 191,
+ 'playlist_mincount': 201,
}]
- def _entries(self, user_id):
- for page in itertools.count(1):
- reply = self._call_api(
- 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
- user_id, note='Downloading page %d' % page, query={
- 'user_id': user_id,
- 'page': page,
- 'limit': '30',
- })
- if not reply:
- break
- for x in reply:
- yield self.url_result('https://www.mildom.com/playback/%s/%s' % (user_id, x['v_id']))
+ def _fetch_page(self, user_id, page):
+ page += 1
+ reply = self._call_api(
+ 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
+ user_id, note=f'Downloading page {page}', query={
+ 'user_id': user_id,
+ 'page': page,
+ 'limit': '30',
+ })
+ if not reply:
+ return
+ for x in reply:
+ v_id = x.get('v_id')
+ if not v_id:
+ continue
+ yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
@@ -319,4 +294,5 @@ class MildomUserVodIE(MildomBaseIE):
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
- self._entries(user_id), user_id, 'Uploads from %s' % profile['loginname'])
+ OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
+ user_id, f'Uploads from {profile["loginname"]}')
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 49d58a685..4d723e886 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -8,6 +8,7 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ compat_HTTPError,
determine_ext,
ExtractorError,
int_or_none,
@@ -147,10 +148,14 @@ class NRKIE(NRKBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url).split('/')[-1]
- path_templ = 'playback/%s/program/' + video_id
-
def call_playback_api(item, query=None):
- return self._call_api(path_templ % item, video_id, item, query=query)
+ try:
+ return self._call_api(f'playback/{item}/program/{video_id}', video_id, item, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ return self._call_api(f'playback/{item}/{video_id}', video_id, item, query=query)
+ raise
+
# known values for preferredCdn: akamai, iponly, minicdn and telenor
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py
index 0525b4830..b476c0986 100644
--- a/yt_dlp/extractor/openrec.py
+++ b/yt_dlp/extractor/openrec.py
@@ -4,10 +4,11 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ get_first,
int_or_none,
traverse_obj,
unified_strdate,
- unified_timestamp
+ unified_timestamp,
)
from ..compat import compat_str
@@ -19,42 +20,34 @@ class OpenRecBaseIE(InfoExtractor):
def _extract_movie(self, webpage, video_id, name, is_live):
window_stores = self._extract_pagestore(webpage, video_id)
- movie_store = traverse_obj(
- window_stores,
- ('v8', 'state', 'movie'),
- ('v8', 'movie'),
- expected_type=dict)
- if not movie_store:
+ movie_stores = [
+ # extract all three important data (most of data are duplicated each other, but slightly different!)
+ traverse_obj(window_stores, ('v8', 'state', 'movie'), expected_type=dict),
+ traverse_obj(window_stores, ('v8', 'movie'), expected_type=dict),
+ traverse_obj(window_stores, 'movieStore', expected_type=dict),
+ ]
+ if not any(movie_stores):
raise ExtractorError(f'Failed to extract {name} info')
- title = movie_store.get('title')
- description = movie_store.get('introduction')
- thumbnail = movie_store.get('thumbnailUrl')
-
- uploader = traverse_obj(movie_store, ('channel', 'user', 'name'), expected_type=compat_str)
- uploader_id = traverse_obj(movie_store, ('channel', 'user', 'id'), expected_type=compat_str)
-
- timestamp = int_or_none(traverse_obj(movie_store, ('publishedAt', 'time')), scale=1000)
-
- m3u8_playlists = movie_store.get('media') or {}
+ m3u8_playlists = get_first(movie_stores, 'media') or {}
formats = []
for name, m3u8_url in m3u8_playlists.items():
if not m3u8_url:
continue
formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id='hls-%s' % name))
+ m3u8_url, video_id, ext='mp4', live=is_live, m3u8_id=name))
self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
+ 'title': get_first(movie_stores, 'title'),
+ 'description': get_first(movie_stores, 'introduction'),
+ 'thumbnail': get_first(movie_stores, 'thumbnailUrl'),
'formats': formats,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'timestamp': timestamp,
+ 'uploader': get_first(movie_stores, ('channel', 'user', 'name')),
+ 'uploader_id': get_first(movie_stores, ('channel', 'user', 'id')),
+ 'timestamp': int_or_none(get_first(movie_stores, ['publishedAt', 'time']), scale=1000) or unified_timestamp(get_first(movie_stores, 'publishedAt')),
'is_live': is_live,
}
@@ -72,7 +65,7 @@ class OpenRecIE(OpenRecBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id)
+ webpage = self._download_webpage(f'https://www.openrec.tv/live/{video_id}', video_id)
return self._extract_movie(webpage, video_id, 'live', True)
@@ -96,7 +89,7 @@ class OpenRecCaptureIE(OpenRecBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id)
+ webpage = self._download_webpage(f'https://www.openrec.tv/capture/{video_id}', video_id)
window_stores = self._extract_pagestore(webpage, video_id)
movie_store = window_stores.get('movie')
@@ -104,15 +97,6 @@ class OpenRecCaptureIE(OpenRecBaseIE):
capture_data = window_stores.get('capture')
if not capture_data:
raise ExtractorError('Cannot extract title')
- title = capture_data.get('title')
- thumbnail = capture_data.get('thumbnailUrl')
- upload_date = unified_strdate(capture_data.get('createdAt'))
-
- uploader = traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str)
- uploader_id = traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str)
-
- timestamp = traverse_obj(movie_store, 'createdAt', expected_type=compat_str)
- timestamp = unified_timestamp(timestamp)
formats = self._extract_m3u8_formats(
capture_data.get('source'), video_id, ext='mp4')
@@ -120,13 +104,13 @@ class OpenRecCaptureIE(OpenRecBaseIE):
return {
'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
+ 'title': capture_data.get('title'),
+ 'thumbnail': capture_data.get('thumbnailUrl'),
'formats': formats,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
+ 'timestamp': unified_timestamp(traverse_obj(movie_store, 'createdAt', expected_type=compat_str)),
+ 'uploader': traverse_obj(movie_store, ('channel', 'name'), expected_type=compat_str),
+ 'uploader_id': traverse_obj(movie_store, ('channel', 'id'), expected_type=compat_str),
+ 'upload_date': unified_strdate(capture_data.get('createdAt')),
}
@@ -148,6 +132,6 @@ class OpenRecMovieIE(OpenRecBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage('https://www.openrec.tv/movie/%s' % video_id, video_id)
+ webpage = self._download_webpage(f'https://www.openrec.tv/movie/{video_id}', video_id)
return self._extract_movie(webpage, video_id, 'movie', False)
diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py
new file mode 100644
index 000000000..d458dfe50
--- /dev/null
+++ b/yt_dlp/extractor/panopto.py
@@ -0,0 +1,445 @@
+import re
+import calendar
+import json
+import functools
+from datetime import datetime
+from random import random
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_urlparse
+)
+
+from ..utils import (
+ bug_reports_message,
+ ExtractorError,
+ get_first,
+ int_or_none,
+ OnDemandPagedList,
+ parse_qs,
+ traverse_obj,
+)
+
+
+class PanoptoBaseIE(InfoExtractor):
+ BASE_URL_RE = r'(?P<base_url>https?://[\w.]+\.panopto.(?:com|eu)/Panopto)'
+
+ def _call_api(self, base_url, path, video_id, data=None, fatal=True, **kwargs):
+ response = self._download_json(
+ base_url + path, video_id, data=json.dumps(data).encode('utf8') if data else None,
+ fatal=fatal, headers={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs)
+ if not response:
+ return
+ error_code = response.get('ErrorCode')
+ if error_code == 2:
+ self.raise_login_required(method='cookies')
+ elif error_code is not None:
+ msg = f'Panopto said: {response.get("ErrorMessage")}'
+ if fatal:
+ raise ExtractorError(msg, video_id=video_id, expected=True)
+ else:
+ self.report_warning(msg, video_id=video_id)
+ return response
+
+ @staticmethod
+ def _parse_fragment(url):
+ return {k: json.loads(v[0]) for k, v in compat_urlparse.parse_qs(compat_urllib_parse_urlparse(url).fragment).items()}
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=["\'](?P<url>%s/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)' % PanoptoIE.BASE_URL_RE,
+ webpage)]
+
+
+class PanoptoIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
+ 'info_dict': {
+ 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
+ 'title': 'Panopto for Business - Use Cases',
+ 'timestamp': 1459184200,
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb&mode=Delivery&random=[\d.]+',
+ 'upload_date': '20160328',
+ 'ext': 'mp4',
+ 'cast': [],
+ 'duration': 88.17099999999999,
+ 'average_rating': int,
+ 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
+ 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'channel': 'Showcase Videos'
+ },
+ },
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
+ 'info_dict': {
+ 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
+ 'title': 'Overcoming Top 4 Challenges of Enterprise Video',
+ 'uploader': 'Panopto Support',
+ 'timestamp': 1449409251,
+ 'thumbnail': r're:https://demo\.hosted\.panopto\.com/Panopto/Services/FrameGrabber\.svc/FrameRedirect\?objectId=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59&mode=Delivery&random=[\d.]+',
+ 'upload_date': '20151206',
+ 'ext': 'mp4',
+ 'chapters': 'count:21',
+ 'cast': ['Panopto Support'],
+ 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
+ 'average_rating': int,
+ 'description': 'md5:4391837802b3fc856dadf630c4b375d1',
+ 'duration': 1088.2659999999998,
+ 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
+ 'channel': 'Webcasts',
+ },
+ },
+ {
+ # Extra params in URL
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
+ 'info_dict': {
+ 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
+ 'ext': 'mp4',
+ 'duration': 129.513,
+ 'cast': ['Kathryn Kelly'],
+ 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
+ 'timestamp': 1569845768,
+ 'tags': ['Viewer', 'Enterprise'],
+ 'upload_date': '20190930',
+ 'thumbnail': r're:https://howtovideos\.hosted\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=5fa74e93-3d87-4694-b60e-aaa4012214ed&mode=Delivery&random=[\d.]+',
+ 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
+ 'title': 'Getting Started: View a Video',
+ 'average_rating': int,
+ 'uploader': 'Kathryn Kelly',
+ 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
+ 'channel': 'Getting Started',
+ }
+ },
+ {
+ # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
+ 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
+ 'info_dict': {
+ 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
+ 'ext': 'mp4',
+ 'cast': ['LTS CLI Script'],
+ 'duration': 2178.45,
+ 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
+ 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
+ 'average_rating': int,
+ 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
+ 'uploader': 'LTS CLI Script',
+ 'timestamp': 1572458134,
+ 'title': 'WW2 Vets Interview 3 Ronald Stanley George',
+ 'thumbnail': r're:https://unisa\.au\.panopto\.com/Panopto/Services/FrameGrabber.svc/FrameRedirect\?objectId=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4&mode=Delivery&random=[\d.]+',
+ 'channel': 'World War II Veteran Interviews',
+ 'upload_date': '20191030',
+ },
+ },
+ {
+ 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
+ 'only_matching': True
+ },
+ {
+ 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
+ 'only_matching': True
+ },
+ ]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PanoptoPlaylistIE.suitable(url) else super().suitable(url)
+
+ def _mark_watched(self, base_url, video_id, delivery_info):
+ duration = traverse_obj(delivery_info, ('Delivery', 'Duration'), expected_type=float)
+ invocation_id = delivery_info.get('InvocationId')
+ stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
+ if invocation_id and stream_id and duration:
+ timestamp_str = f'/Date({calendar.timegm(datetime.utcnow().timetuple())}000)/'
+ data = {
+ 'streamRequests': [
+ {
+ 'ClientTimeStamp': timestamp_str,
+ 'ID': 0,
+ 'InvocationID': invocation_id,
+ 'PlaybackSpeed': 1,
+ 'SecondsListened': duration - 1,
+ 'SecondsRejected': 0,
+ 'StartPosition': 0,
+ 'StartReason': 2,
+ 'StopReason': None,
+ 'StreamID': stream_id,
+ 'TimeStamp': timestamp_str,
+ 'UpdatesRejected': 0
+ },
+ ]}
+
+ self._download_webpage(
+ base_url + '/Services/Analytics.svc/AddStreamRequests', video_id,
+ fatal=False, data=json.dumps(data).encode('utf8'), headers={'content-type': 'application/json'},
+ note='Marking watched', errnote='Unable to mark watched')
+
+ @staticmethod
+ def _extract_chapters(delivery):
+ chapters = []
+ for timestamp in delivery.get('Timestamps', []):
+ start, duration = int_or_none(timestamp.get('Time')), int_or_none(timestamp.get('Duration'))
+ if start is None or duration is None:
+ continue
+ chapters.append({
+ 'start_time': start,
+ 'end_time': start + duration,
+ 'title': timestamp.get('Caption')
+ })
+ return chapters
+
+ def _extract_streams_formats_and_subtitles(self, video_id, streams, **fmt_kwargs):
+ formats = []
+ subtitles = {}
+ for stream in streams or []:
+ stream_formats = []
+ http_stream_url = stream.get('StreamHttpUrl')
+ stream_url = stream.get('StreamUrl')
+
+ if http_stream_url:
+ stream_formats.append({'url': http_stream_url})
+
+ if stream_url:
+ media_type = stream.get('ViewerMediaFileTypeName')
+ if media_type in ('hls', ):
+ m3u8_formats, stream_subtitles = self._extract_m3u8_formats_and_subtitles(stream_url, video_id)
+ stream_formats.extend(m3u8_formats)
+ subtitles = self._merge_subtitles(subtitles, stream_subtitles)
+ else:
+ stream_formats.append({
+ 'url': stream_url
+ })
+ for fmt in stream_formats:
+ fmt.update({
+ 'format_note': stream.get('Tag'),
+ **fmt_kwargs
+ })
+ formats.extend(stream_formats)
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ base_url, video_id = self._match_valid_url(url).group('base_url', 'id')
+ delivery_info = self._call_api(
+ base_url, '/Pages/Viewer/DeliveryInfo.aspx', video_id,
+ query={
+ 'deliveryId': video_id,
+ 'invocationId': '',
+ 'isLiveNotes': 'false',
+ 'refreshAuthCookie': 'true',
+ 'isActiveBroadcast': 'false',
+ 'isEditing': 'false',
+ 'isKollectiveAgentInstalled': 'false',
+ 'isEmbed': 'false',
+ 'responseType': 'json',
+ }
+ )
+
+ delivery = delivery_info['Delivery']
+ session_start_time = int_or_none(delivery.get('SessionStartTime'))
+
+ # Podcast stream is usually the combined streams. We will prefer that by default.
+ podcast_formats, podcast_subtitles = self._extract_streams_formats_and_subtitles(
+ video_id, delivery.get('PodcastStreams'), format_note='PODCAST')
+
+ streams_formats, streams_subtitles = self._extract_streams_formats_and_subtitles(
+ video_id, delivery.get('Streams'), preference=-10)
+
+ formats = podcast_formats + streams_formats
+ subtitles = self._merge_subtitles(podcast_subtitles, streams_subtitles)
+ self._sort_formats(formats)
+
+ self.mark_watched(base_url, video_id, delivery_info)
+
+ return {
+ 'id': video_id,
+ 'title': delivery.get('SessionName'),
+ 'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), default=[], expected_type=lambda x: x or None),
+ 'timestamp': session_start_time - 11640000000 if session_start_time else None,
+ 'duration': delivery.get('Duration'),
+ 'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
+ 'average_rating': delivery.get('AverageRating'),
+ 'chapters': self._extract_chapters(delivery) or None,
+ 'uploader': delivery.get('OwnerDisplayName') or None,
+ 'uploader_id': delivery.get('OwnerId'),
+ 'description': delivery.get('SessionAbstract'),
+ 'tags': traverse_obj(delivery, ('Tags', ..., 'Content')),
+ 'channel_id': delivery.get('SessionGroupPublicID'),
+ 'channel': traverse_obj(delivery, 'SessionGroupLongName', 'SessionGroupShortName', get_all=False),
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
+
+
+class PanoptoPlaylistIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
+ _TESTS = [
+ {
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
+ 'info_dict': {
+ 'title': 'Featured Video Tutorials',
+ 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
+ 'description': '',
+ },
+ 'playlist_mincount': 36
+ },
+ {
+ 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
+ 'info_dict': {
+ 'title': 'Library Website Introduction Playlist',
+ 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
+ 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
+ },
+ 'playlist_mincount': 4
+ },
+
+ ]
+
+ def _entries(self, base_url, playlist_id, session_list_id):
+ session_list_info = self._call_api(
+ base_url, f'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id)
+
+ items = session_list_info['Items']
+ for item in items:
+ if item.get('TypeName') != 'Session':
+ self.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once=True)
+ continue
+ yield {
+ '_type': 'url',
+ 'id': item.get('Id'),
+ 'url': item.get('ViewerUri'),
+ 'title': item.get('Name'),
+ 'description': item.get('Description'),
+ 'duration': item.get('Duration'),
+ 'channel': traverse_obj(item, ('Parent', 'Name')),
+ 'channel_id': traverse_obj(item, ('Parent', 'Id'))
+ }
+
+ def _real_extract(self, url):
+ base_url, playlist_id = self._match_valid_url(url).group('base_url', 'id')
+
+ video_id = get_first(parse_qs(url), 'id')
+ if video_id:
+ if self.get_param('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result(base_url + f'/Pages/Viewer.aspx?id={video_id}', ie_key=PanoptoIE.ie_key(), video_id=video_id)
+ else:
+ self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
+
+ playlist_info = self._call_api(base_url, f'/Api/Playlists/{playlist_id}', playlist_id)
+ return self.playlist_result(
+ self._entries(base_url, playlist_id, playlist_info['SessionListId']),
+ playlist_id=playlist_id, playlist_title=playlist_info.get('Name'),
+ playlist_description=playlist_info.get('Description'))
+
+
+class PanoptoListIE(PanoptoBaseIE):
+ _VALID_URL = PanoptoBaseIE.BASE_URL_RE + r'/Pages/Sessions/List\.aspx'
+ _PAGE_SIZE = 250
+ _TESTS = [
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
+ 'info_dict': {
+ 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
+ 'title': 'Showcase Videos'
+ },
+ 'playlist_mincount': 140
+
+ },
+ {
+ 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
+ 'info_dict': {
+ 'id': 'panopto_list',
+ 'title': 'panopto_list'
+ },
+ 'playlist_mincount': 300
+ },
+ {
+ # Folder that contains 8 folders and a playlist
+ 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
+ 'info_dict': {
+ 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
+ 'title': 'Video Tutorials'
+ },
+ 'playlist_mincount': 9
+ }
+
+ ]
+
+ def _fetch_page(self, base_url, query_params, display_id, page):
+
+ params = {
+ 'sortColumn': 1,
+ 'getFolderData': True,
+ 'includePlaylists': True,
+ **query_params,
+ 'page': page,
+ 'maxResults': self._PAGE_SIZE,
+ }
+
+ response = self._call_api(
+ base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page+1}',
+ data={'queryParameters': params}, fatal=False)
+
+ for result in get_first(response, 'Results', default=[]):
+ # This could be a video, playlist (or maybe something else)
+ item_id = result.get('DeliveryID')
+ yield {
+ '_type': 'url',
+ 'id': item_id,
+ 'title': result.get('SessionName'),
+ 'url': traverse_obj(result, 'ViewerUrl', 'EmbedUrl', get_all=False) or (base_url + f'/Pages/Viewer.aspx?id={item_id}'),
+ 'duration': result.get('Duration'),
+ 'channel': result.get('FolderName'),
+ 'channel_id': result.get('FolderID'),
+ }
+
+ for folder in get_first(response, 'Subfolders', default=[]):
+ folder_id = folder.get('ID')
+ yield self.url_result(
+ base_url + f'/Pages/Sessions/List.aspx#folderID="{folder_id}"',
+ ie_key=PanoptoListIE.ie_key(), video_id=folder_id, title=folder.get('Name'))
+
+ def _extract_folder_metadata(self, base_url, folder_id):
+ response = self._call_api(
+ base_url, '/Services/Data.svc/GetFolderInfo', folder_id,
+ data={'folderID': folder_id}, fatal=False)
+ return {
+ 'title': get_first(response, 'Name', default=[])
+ }
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ base_url = mobj.group('base_url')
+
+ query_params = self._parse_fragment(url)
+ folder_id, display_id = query_params.get('folderID'), 'panopto_list'
+
+ if query_params.get('isSubscriptionsPage'):
+ display_id = 'subscriptions'
+ if not query_params.get('subscribableTypes'):
+ query_params['subscribableTypes'] = [0, 1, 2]
+ elif query_params.get('isSharedWithMe'):
+ display_id = 'sharedwithme'
+ elif folder_id:
+ display_id = folder_id
+
+ query = query_params.get('query')
+ if query:
+ display_id += f': query "{query}"'
+
+ info = {
+ '_type': 'playlist',
+ 'id': display_id,
+ 'title': display_id,
+ }
+ if folder_id:
+ info.update(self._extract_folder_metadata(base_url, folder_id))
+
+ info['entries'] = OnDemandPagedList(
+ functools.partial(self._fetch_page, base_url, query_params, display_id), self._PAGE_SIZE)
+
+ return info
diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py
index e0b2ab982..9d6b82178 100644
--- a/yt_dlp/extractor/peertube.py
+++ b/yt_dlp/extractor/peertube.py
@@ -87,6 +87,7 @@ class PeerTubeIE(InfoExtractor):
maindreieck-tv\.de|
mani\.tube|
manicphase\.me|
+ media\.fsfe\.org|
media\.gzevd\.de|
media\.inno3\.cricket|
media\.kaitaia\.life|
diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py
index b93a02b7d..1a292b8ac 100644
--- a/yt_dlp/extractor/periscope.py
+++ b/yt_dlp/extractor/periscope.py
@@ -33,7 +33,7 @@ class PeriscopeBaseIE(InfoExtractor):
return {
'id': broadcast.get('id') or video_id,
- 'title': self._live_title(title) if is_live else title,
+ 'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py
index 402b574a7..b411390e2 100644
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
from .common import InfoExtractor
from ..utils import (
@@ -138,3 +139,42 @@ class PokemonWatchIE(InfoExtractor):
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})
+
+
+class PokemonSoundLibraryIE(InfoExtractor):
+ _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp'
+
+ _TESTS = [{
+ 'url': 'https://soundlibrary.pokemon.co.jp/',
+ 'info_dict': {
+ 'title': 'Pokémon Diamond and Pearl Sound Tracks',
+ },
+ 'playlist_mincount': 149,
+ }]
+
+ def _real_extract(self, url):
+ musicbox_webpage = self._download_webpage(
+ 'https://soundlibrary.pokemon.co.jp/musicbox', None,
+ 'Downloading list of songs')
+ song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)]
+ song_titles = song_titles[4::2]
+
+ # each songs don't have permalink; instead we return all songs at once
+ song_entries = [{
+ 'id': f'pokemon-soundlibrary-{song_id}',
+ 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav',
+ # note: the server always serves MP3 files, despite its extension of the URL above
+ 'ext': 'mp3',
+ 'acodec': 'mp3',
+ 'vcodec': 'none',
+ 'title': song_title,
+ 'track': song_title,
+ 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.',
+ 'uploader': 'Pokémon',
+ 'release_year': 2006,
+ 'release_date': '20060928',
+ 'track_number': song_id,
+ 'album': 'Pokémon Diamond and Pearl',
+ } for song_id, song_title in enumerate(song_titles, 1)]
+
+ return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks')
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index 79a5b2336..0fd65db4b 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -100,7 +100,7 @@ class RokfinIE(InfoExtractor):
video_url, video_id, fatal=False, live=live_status == 'is_live')
if not formats:
- if metadata.get('premiumPlan'):
+ if traverse_obj(metadata, 'premiumPlan', 'premium'):
self.raise_login_required('This video is only available to premium users', True, method='cookies')
elif scheduled:
self.raise_no_formats(
@@ -129,7 +129,7 @@ class RokfinIE(InfoExtractor):
'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none),
'live_status': live_status,
'availability': self._availability(
- needs_premium=bool(metadata.get('premiumPlan')),
+ needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')),
is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False),
# 'comment_count': metadata.get('numComments'), # Data provided by website is wrong
'__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None,
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 8146b3ef5..64b8a71b6 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -59,8 +59,16 @@ class SoundcloudEmbedIE(InfoExtractor):
class SoundcloudBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'soundcloud'
+
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/'
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
+ _access_token = None
+ _HEADERS = {}
def _store_client_id(self, client_id):
self._downloader.cache.store('soundcloud', 'client_id', client_id)
@@ -103,14 +111,6 @@ class SoundcloudBaseIE(InfoExtractor):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
self._login()
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
- _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
- _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
- _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
- _access_token = None
- _HEADERS = {}
- _NETRC_MACHINE = 'soundcloud'
-
def _login(self):
username, password = self._get_login_info()
if username is None:
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index daf1c7450..4bc2263f0 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -67,6 +67,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'series': 'The Witcher',
'season': 'Misc',
'episode_number': 13,
+ 'episode': 'Episode 13',
},
},
{
@@ -92,6 +93,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'series': 'Arma 3',
'season': 'Zeus Games',
'episode_number': 3,
+ 'episode': 'Episode 3',
},
},
]
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 620973a9f..56cc2dcc6 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -15,6 +15,7 @@ from ..compat import (
from ..utils import (
ExtractorError,
HEADRequest,
+ get_first,
int_or_none,
join_nonempty,
LazyList,
@@ -816,8 +817,7 @@ class DouyinIE(TikTokIE):
render_data = self._parse_json(
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
- return self._parse_aweme_video_web(
- traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url)
+ return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
class TikTokVMIE(InfoExtractor):
diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py
new file mode 100644
index 000000000..9832d2398
--- /dev/null
+++ b/yt_dlp/extractor/xinpianchang.py
@@ -0,0 +1,95 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ try_get,
+ update_url_query,
+ url_or_none,
+)
+
+
+class XinpianchangIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.xinpianchang\.com/(?P<id>[^/]+?)(?:\D|$)'
+ IE_NAME = 'xinpianchang'
+ IE_DESC = 'xinpianchang.com'
+ _TESTS = [{
+ 'url': 'https://www.xinpianchang.com/a11766551',
+ 'info_dict': {
+ 'id': 'a11766551',
+ 'ext': 'mp4',
+ 'title': '北京2022冬奥会闭幕式再见短片-冰墩墩下班了',
+ 'description': 'md5:4a730c10639a82190fabe921c0fa4b87',
+ 'duration': 151,
+ 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/',
+ 'uploader': '正时文创',
+ 'uploader_id': 10357277,
+ 'categories': ['宣传片', '国家城市', '广告', '其他'],
+ 'keywords': ['北京冬奥会', '冰墩墩', '再见', '告别', '冰墩墩哭了', '感动', '闭幕式', '熄火']
+ },
+ }, {
+ 'url': 'https://www.xinpianchang.com/a11762904',
+ 'info_dict': {
+ 'id': 'a11762904',
+ 'ext': 'mp4',
+ 'title': '冬奥会决胜时刻《法国派出三只鸡?》',
+ 'description': 'md5:55cb139ef8f48f0c877932d1f196df8b',
+ 'duration': 136,
+ 'thumbnail': r're:^https?://oss-xpc0\.xpccdn\.com.+/assets/',
+ 'uploader': '精品动画',
+ 'uploader_id': 10858927,
+ 'categories': ['动画', '三维CG'],
+ 'keywords': ['France Télévisions', '法国3台', '蠢萌', '冬奥会']
+ },
+ }, {
+ 'url': 'https://www.xinpianchang.com/a11779743?from=IndexPick&part=%E7%BC%96%E8%BE%91%E7%B2%BE%E9%80%89&index=2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id=video_id)
+ domain = self.find_value_with_regex(var='requireNewDomain', webpage=webpage)
+ vid = self.find_value_with_regex(var='vid', webpage=webpage)
+ app_key = self.find_value_with_regex(var='modeServerAppKey', webpage=webpage)
+ api = update_url_query(f'{domain}/mod/api/v2/media/{vid}', {'appKey': app_key})
+ data = self._download_json(api, video_id=video_id)['data']
+ formats, subtitles = [], {}
+ for k, v in data.get('resource').items():
+ if k in ('dash', 'hls'):
+ v_url = v.get('url')
+ if not v_url:
+ continue
+ if k == 'dash':
+ fmts, subs = self._extract_mpd_formats_and_subtitles(v_url, video_id=video_id)
+ elif k == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(v_url, video_id=video_id)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
+ elif k == 'progressive':
+ formats.extend([{
+ 'url': url_or_none(prog.get('url')),
+ 'width': int_or_none(prog.get('width')),
+ 'height': int_or_none(prog.get('height')),
+ 'ext': 'mp4',
+ } for prog in v if prog.get('url') or []])
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': data.get('title'),
+ 'description': data.get('description'),
+ 'duration': int_or_none(data.get('duration')),
+ 'categories': data.get('categories'),
+ 'keywords': data.get('keywords'),
+ 'thumbnail': data.get('cover'),
+ 'uploader': try_get(data, lambda x: x['owner']['username']),
+ 'uploader_id': try_get(data, lambda x: x['owner']['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def find_value_with_regex(self, var, webpage):
+ return self._search_regex(rf'var\s{var}\s=\s\"(?P<vid>[^\"]+)\"', webpage, name=var)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index ee0277fd7..66bb8d9f0 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -39,6 +39,7 @@ from ..utils import (
ExtractorError,
float_or_none,
format_field,
+ get_first,
int_or_none,
is_html,
join_nonempty,
@@ -72,10 +73,6 @@ from ..utils import (
)
-def get_first(obj, keys, **kwargs):
- return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
-
-
# any clients starting with _ cannot be explicity requested by the user
INNERTUBE_CLIENTS = {
'web': {
@@ -2081,7 +2078,93 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'age_limit': 0,
'channel_follower_count': int
}, 'params': {'format': 'mhtml', 'skip_download': True}
- }
+ }, {
+ # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
+ 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
+ 'info_dict': {
+ 'id': '2NUZ8W2llS4',
+ 'ext': 'mp4',
+ 'title': 'The NP that test your phone performance 🙂',
+ 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
+ 'uploader': 'Leon Nguyen',
+ 'uploader_id': 'VNSXIII',
+ 'uploader_url': 'http://www.youtube.com/user/VNSXIII',
+ 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
+ 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
+ 'duration': 21,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Gaming'],
+ 'tags': 'count:23',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'upload_date': '20220103',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Leon Nguyen',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+ 'channel_follower_count': int
+ }
+ }, {
+ # date text is premiered video, ensure upload date in UTC (published 1641172509)
+ 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
+ 'info_dict': {
+ 'id': 'mzZzzBU6lrM',
+ 'ext': 'mp4',
+ 'title': 'I Met GeorgeNotFound In Real Life...',
+ 'description': 'md5:cca98a355c7184e750f711f3a1b22c84',
+ 'uploader': 'Quackity',
+ 'uploader_id': 'QuackityHQ',
+ 'uploader_url': 'http://www.youtube.com/user/QuackityHQ',
+ 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
+ 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
+ 'duration': 955,
+ 'view_count': int,
+ 'age_limit': 0,
+ 'categories': ['Entertainment'],
+ 'tags': 'count:26',
+ 'playable_in_embed': True,
+ 'live_status': 'not_live',
+ 'release_timestamp': 1641172509,
+ 'release_date': '20220103',
+ 'upload_date': '20220103',
+ 'like_count': int,
+ 'availability': 'public',
+ 'channel': 'Quackity',
+ 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
+ 'channel_follower_count': int
+ }
+ },
+ { # continuous livestream. Microformat upload date should be preferred.
+ # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27
+ 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU',
+ 'info_dict': {
+ 'id': 'kgx4WGK0oNU',
+ 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'ext': 'mp4',
+ 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA',
+ 'availability': 'public',
+ 'age_limit': 0,
+ 'release_timestamp': 1637975704,
+ 'upload_date': '20210619',
+ 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg',
+ 'uploader': '阿鲍Abao',
+ 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA',
+ 'channel': 'Abao in Tokyo',
+ 'channel_follower_count': int,
+ 'release_date': '20211127',
+ 'tags': 'count:39',
+ 'categories': ['People & Blogs'],
+ 'like_count': int,
+ 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA',
+ 'view_count': int,
+ 'playable_in_embed': True,
+ 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+ },
+ 'params': {'skip_download': True}
+ },
]
@classmethod
@@ -3008,6 +3091,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
+ if is_damaged:
+ self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@@ -3027,7 +3112,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''),
'language_preference': language_preference,
- 'preference': -10 if is_damaged else None,
+ # Strictly de-prioritize damaged and 3gp formats
+ 'preference': -10 if is_damaged else -2 if itag == '17' else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
@@ -3336,9 +3422,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# URL checking if user don't care about getting the best possible thumbnail
'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
'description': video_description,
- 'upload_date': unified_strdate(
- get_first(microformats, 'uploadDate')
- or search_meta('uploadDate')),
'uploader': get_first(video_details, 'author'),
'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
'uploader_url': owner_profile_url,
@@ -3489,6 +3572,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for content in contents:
vpir = content.get('videoPrimaryInfoRenderer')
if vpir:
+ info['upload_date'] = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d')
stl = vpir.get('superTitleLink')
if stl:
stl = self._get_text(stl)
@@ -3567,6 +3651,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'channel_id': 'uploader_id',
'channel_url': 'uploader_url',
}
+
+ # The upload date for scheduled and current live streams / premieres in microformats
+ # is generally the true upload date. Although not in UTC, we will prefer that in this case.
+ # Note this changes to the published date when the stream/premiere has finished.
+ # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
+ if not info.get('upload_date') or info.get('is_live') or info.get('live_status') == 'is_upcoming':
+ info['upload_date'] = (
+ unified_strdate(get_first(microformats, 'uploadDate'))
+ or unified_strdate(search_meta('uploadDate'))
+ or info.get('upload_date'))
+
for to, frm in fallbacks.items():
if not info.get(to):
info[to] = info.get(frm)
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
index f84ba5cff..419bf30d8 100644
--- a/yt_dlp/extractor/zingmp3.py
+++ b/yt_dlp/extractor/zingmp3.py
@@ -9,7 +9,6 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
traverse_obj,
- HEADRequest,
)
@@ -106,18 +105,17 @@ class ZingMp3BaseIE(InfoExtractor):
def _real_initialize(self):
if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'):
- self._request_webpage(HEADRequest(self._DOMAIN), None, note='Updating cookies')
+ self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}),
+ None, note='Updating cookies')
def _real_extract(self, url):
song_id, type_url = self._match_valid_url(url).group('id', 'type')
-
api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id})
-
return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url)
def get_api_with_signature(self, name_api, param):
- sha256 = hashlib.sha256(''.join(f'{k}={v}' for k, v in param.items()).encode('utf-8')).hexdigest()
-
+ param.update({'ctime': '1'})
+ sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest()
data = {
'apiKey': self._API_KEY,
'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(),
@@ -149,7 +147,7 @@ class ZingMp3IE(ZingMp3BaseIE):
},
}, {
'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
- 'md5': 'e9c972b693aa88301ef981c8151c4343',
+ 'md5': 'c7f23d971ac1a4f675456ed13c9b9612',
'info_dict': {
'id': 'ZO8ZF7C7',
'title': 'Sương Hoa Đưa Lối',
@@ -158,6 +156,22 @@ class ZingMp3IE(ZingMp3BaseIE):
'duration': 207,
'track': 'Sương Hoa Đưa Lối',
'artist': 'K-ICM, RYO',
+ 'album': 'Sương Hoa Đưa Lối (Single)',
+ 'album_artist': 'K-ICM, RYO',
+ },
+ }, {
+ 'url': 'https://zingmp3.vn/bai-hat/Nguoi-Yeu-Toi-Lanh-Lung-Sat-Da-Mr-Siro/ZZ6IW7OU.html',
+ 'md5': '3e9f7a9bd0d965573dbff8d7c68b629d',
+ 'info_dict': {
+ 'id': 'ZZ6IW7OU',
+ 'title': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
+ 'ext': 'mp3',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'duration': 303,
+ 'track': 'Người Yêu Tôi Lạnh Lùng Sắt Đá',
+ 'artist': 'Mr. Siro',
+ 'album': 'Người Yêu Tôi Lạnh Lùng Sắt Đá (Single)',
+ 'album_artist': 'Mr. Siro',
},
}, {
'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',
@@ -184,6 +198,14 @@ class ZingMp3AlbumIE(ZingMp3BaseIE):
},
'playlist_count': 9,
}, {
+ 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html',
+ 'info_dict': {
+ '_type': 'playlist',
+ 'id': 'ZWZAEZZD',
+ 'title': 'Những Bài Hát Hay Nhất Của Mr. Siro',
+ },
+ 'playlist_count': 49,
+ }, {
'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html',
'only_matching': True,
}, {
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 9f1f31974..9f6b45ec6 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -319,7 +319,7 @@ def create_parser():
general.add_option(
'--mark-watched',
action='store_true', dest='mark_watched', default=False,
- help='Mark videos watched (even with --simulate). Currently only supported for YouTube')
+ help='Mark videos watched (even with --simulate)')
general.add_option(
'--no-mark-watched',
action='store_false', dest='mark_watched',
@@ -1178,7 +1178,7 @@ def create_parser():
help='Do not write video description (default)')
filesystem.add_option(
'--write-info-json',
- action='store_true', dest='writeinfojson', default=False,
+ action='store_true', dest='writeinfojson', default=None,
help='Write video metadata to a .info.json file (this may contain personal information)')
filesystem.add_option(
'--no-write-info-json',
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 907627381..aee84cf5b 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -404,7 +404,7 @@ class FFmpegPostProcessor(PostProcessor):
class FFmpegExtractAudioPP(FFmpegPostProcessor):
COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
- SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac')
+ SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac')
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py
index 5452b92d8..5bc435da3 100644
--- a/yt_dlp/postprocessor/metadataparser.py
+++ b/yt_dlp/postprocessor/metadataparser.py
@@ -1,5 +1,4 @@
import re
-
from enum import Enum
from .common import PostProcessor
@@ -26,12 +25,17 @@ class MetadataParserPP(PostProcessor):
'''
if not isinstance(action, cls.Actions):
raise ValueError(f'{action!r} is not a valid action')
- getattr(cls, action.value)(cls, *data)
+ getattr(cls, action.value)(cls, *data) # So this can raise error to validate
@staticmethod
def field_to_template(tmpl):
if re.match(r'[a-zA-Z_]+$', tmpl):
return f'%({tmpl})s'
+
+ from ..YoutubeDL import YoutubeDL
+ err = YoutubeDL.validate_outtmpl(tmpl)
+ if err:
+ raise err
return tmpl
@staticmethod
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 87463c999..c9b57c2f0 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -47,6 +47,7 @@ from .compat import (
compat_HTMLParser,
compat_HTTPError,
compat_basestring,
+ compat_brotli,
compat_chr,
compat_cookiejar,
compat_ctypes_WINFUNCTYPE,
@@ -143,10 +144,16 @@ def random_user_agent():
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+SUPPORTED_ENCODINGS = [
+ 'gzip', 'deflate'
+]
+if compat_brotli:
+ SUPPORTED_ENCODINGS.append('br')
+
std_headers = {
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate',
+ 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS),
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate',
}
@@ -1022,8 +1029,8 @@ def make_HTTPS_handler(params, **kwargs):
def bug_reports_message(before=';'):
msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , '
- 'filling out the "Broken site" issue template properly. '
- 'Confirm you are on the latest version using -U')
+ 'filling out the appropriate issue template. '
+ 'Confirm you are on the latest version using yt-dlp -U')
before = before.rstrip()
if not before or before.endswith(('.', '!', '?')):
@@ -1076,9 +1083,10 @@ class ExtractorError(YoutubeDLError):
'' if expected else bug_reports_message())))
def format_traceback(self):
- if self.traceback is None:
- return None
- return ''.join(traceback.format_tb(self.traceback))
+ return join_nonempty(
+ self.traceback and ''.join(traceback.format_tb(self.traceback)),
+ self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
+ delim='\n') or None
class UnsupportedError(ExtractorError):
@@ -1356,6 +1364,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
except zlib.error:
return zlib.decompress(data)
+ @staticmethod
+ def brotli(data):
+ if not data:
+ return data
+ return compat_brotli.decompress(data)
+
def http_request(self, req):
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# always respected by websites, some tend to give out URLs with non percent-encoded
@@ -1416,6 +1430,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
+ # brotli
+ if resp.headers.get('Content-encoding', '') == 'br':
+ resp = compat_urllib_request.addinfourl(
+ io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
# https://github.com/ytdl-org/youtube-dl/issues/6457).
if 300 <= resp.code < 400:
@@ -3485,7 +3505,7 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
extra_gap += 1
if delim:
table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
- table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
+ table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
for row in table:
for pos, text in enumerate(map(str, row)):
if '\t' in text:
@@ -3583,6 +3603,9 @@ def match_str(filter_str, dct, incomplete=False):
def match_filter_func(filter_str):
+ if filter_str is None:
+ return None
+
def _match_func(info_dict, *args, **kwargs):
if match_str(filter_str, info_dict, *args, **kwargs):
return None
@@ -5195,6 +5218,10 @@ def traverse_dict(dictn, keys, casesense=True):
return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
+def get_first(obj, keys, **kwargs):
+ return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
+
+
def variadic(x, allowed_types=(str, bytes, dict)):
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
@@ -5461,5 +5488,5 @@ has_websockets = bool(compat_websockets)
def merge_headers(*dicts):
- """Merge dicts of network headers case insensitively, prioritizing the latter ones"""
- return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
+ """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
+ return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 01e1b2345..d5df2af90 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,5 +1,5 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2022.02.04'
+__version__ = '2022.03.08.1'
-RELEASE_GIT_HEAD = 'c1653e9ef'
+RELEASE_GIT_HEAD = 'c0c2c57d3'