aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2022-06-27 01:25:17 +0800
committerJesús <heckyel@hyperbola.info>2022-06-27 01:25:17 +0800
commit16e8548f6a720a78679e417a20a300db2036bf6c (patch)
treeb1247bca3417ce882e4a4d80213f41c20113c1a4
parent4bbf329feb5a820ac21269fa426c95ca14d7af25 (diff)
parente08f72e6759fb6b1102521f0bdb9457038ef7c06 (diff)
downloadhypervideo-pre-16e8548f6a720a78679e417a20a300db2036bf6c.tar.lz
hypervideo-pre-16e8548f6a720a78679e417a20a300db2036bf6c.tar.xz
hypervideo-pre-16e8548f6a720a78679e417a20a300db2036bf6c.zip
updated from upstream | 27/06/2022 at 01:25
-rw-r--r--CONTRIBUTORS36
-rw-r--r--Changelog.md249
-rwxr-xr-xdevscripts/bash-completion.py3
-rw-r--r--devscripts/check-porn.py10
-rwxr-xr-xdevscripts/fish-completion.py6
-rw-r--r--devscripts/generate_aes_testdata.py8
-rwxr-xr-xdevscripts/make_contributing.py1
-rw-r--r--devscripts/make_lazy_extractors.py9
-rw-r--r--devscripts/make_readme.py68
-rw-r--r--devscripts/make_supportedsites.py6
-rw-r--r--devscripts/prepare_manpage.py3
-rwxr-xr-xdevscripts/run_tests.sh2
-rwxr-xr-xdevscripts/zsh-completion.py3
-rw-r--r--pyinst.py53
-rw-r--r--pytest.ini4
-rw-r--r--setup.cfg39
-rw-r--r--setup.py5
-rw-r--r--supportedsites.md60
-rw-r--r--test/helper.py42
-rw-r--r--test/test_InfoExtractor.py29
-rw-r--r--test/test_YoutubeDL.py149
-rw-r--r--test/test_YoutubeDLCookieJar.py8
-rw-r--r--test/test_aes.py2
-rw-r--r--test/test_age_restriction.py3
-rw-r--r--test/test_all_urls.py5
-rw-r--r--test/test_cache.py5
-rw-r--r--test/test_compat.py44
-rw-r--r--test/test_cookies.py8
-rwxr-xr-xtest/test_download.py28
-rw-r--r--test/test_downloader_http.py12
-rw-r--r--test/test_execution.py8
-rw-r--r--test/test_http.py23
-rw-r--r--test/test_jsinterp.py2
-rw-r--r--test/test_netrc.py3
-rw-r--r--test/test_overwrites.py6
-rw-r--r--test/test_post_hooks.py6
-rw-r--r--test/test_postprocessors.py2
-rw-r--r--test/test_socks.py16
-rw-r--r--test/test_subtitles.py106
-rw-r--r--test/test_utils.py21
-rw-r--r--test/test_verbose_output.py6
-rw-r--r--test/test_youtube_lists.py3
-rw-r--r--test/test_youtube_misc.py1
-rw-r--r--test/test_youtube_signature.py9
-rw-r--r--tox.ini15
-rw-r--r--yt-dlp.sh2
-rw-r--r--yt_dlp/YoutubeDL.py590
-rw-r--r--yt_dlp/__init__.py190
-rw-r--r--yt_dlp/__main__.py1
-rw-r--r--yt_dlp/aes.py5
-rw-r--r--yt_dlp/cache.py3
-rw-r--r--yt_dlp/compat/__init__.py28
-rw-r--r--yt_dlp/compat/_deprecated.py48
-rw-r--r--yt_dlp/compat/_legacy.py39
-rw-r--r--yt_dlp/compat/compat_utils.py32
-rw-r--r--yt_dlp/compat/functools.py26
-rw-r--r--yt_dlp/compat/imghdr.py14
-rw-r--r--yt_dlp/cookies.py85
-rw-r--r--yt_dlp/downloader/__init__.py7
-rw-r--r--yt_dlp/downloader/common.py145
-rw-r--r--yt_dlp/downloader/dash.py3
-rw-r--r--yt_dlp/downloader/external.py122
-rw-r--r--yt_dlp/downloader/f4m.py39
-rw-r--r--yt_dlp/downloader/fragment.py56
-rw-r--r--yt_dlp/downloader/hls.py24
-rw-r--r--yt_dlp/downloader/http.py27
-rw-r--r--yt_dlp/downloader/ism.py4
-rw-r--r--yt_dlp/downloader/niconico.py6
-rw-r--r--yt_dlp/downloader/rtmp.py6
-rw-r--r--yt_dlp/downloader/youtube_live_chat.py9
-rw-r--r--yt_dlp/extractor/__init__.py37
-rw-r--r--yt_dlp/extractor/_extractors.py2198
-rw-r--r--yt_dlp/extractor/abematv.py17
-rw-r--r--yt_dlp/extractor/adobepass.py27
-rw-r--r--yt_dlp/extractor/animelab.py270
-rw-r--r--yt_dlp/extractor/archiveorg.py45
-rw-r--r--yt_dlp/extractor/arnes.py2
-rw-r--r--yt_dlp/extractor/atscaleconf.py34
-rw-r--r--yt_dlp/extractor/audius.py4
-rw-r--r--yt_dlp/extractor/awaan.py2
-rw-r--r--yt_dlp/extractor/bbc.py12
-rw-r--r--yt_dlp/extractor/bellmedia.py10
-rw-r--r--yt_dlp/extractor/bilibili.py21
-rw-r--r--yt_dlp/extractor/bloomberg.py10
-rw-r--r--yt_dlp/extractor/brightcove.py4
-rw-r--r--yt_dlp/extractor/cbc.py4
-rw-r--r--yt_dlp/extractor/ccc.py1
-rw-r--r--yt_dlp/extractor/cda.py12
-rw-r--r--yt_dlp/extractor/chingari.py8
-rw-r--r--yt_dlp/extractor/common.py613
-rw-r--r--yt_dlp/extractor/commonprotocols.py5
-rw-r--r--yt_dlp/extractor/crunchyroll.py27
-rw-r--r--yt_dlp/extractor/curiositystream.py15
-rw-r--r--yt_dlp/extractor/cwtv.py1
-rw-r--r--yt_dlp/extractor/dailymotion.py9
-rw-r--r--yt_dlp/extractor/dailywire.py114
-rw-r--r--yt_dlp/extractor/digitalconcerthall.py2
-rw-r--r--yt_dlp/extractor/dropbox.py4
-rw-r--r--yt_dlp/extractor/dropout.py33
-rw-r--r--yt_dlp/extractor/duboku.py50
-rw-r--r--yt_dlp/extractor/ertgr.py5
-rw-r--r--yt_dlp/extractor/espn.py145
-rw-r--r--yt_dlp/extractor/expressen.py7
-rw-r--r--yt_dlp/extractor/extractors.py2190
-rw-r--r--yt_dlp/extractor/facebook.py6
-rw-r--r--yt_dlp/extractor/fc2.py18
-rw-r--r--yt_dlp/extractor/flickr.py2
-rw-r--r--yt_dlp/extractor/fourzerostudio.py107
-rw-r--r--yt_dlp/extractor/foxgay.py2
-rw-r--r--yt_dlp/extractor/foxnews.py9
-rw-r--r--yt_dlp/extractor/franceculture.py125
-rw-r--r--yt_dlp/extractor/freetv.py141
-rw-r--r--yt_dlp/extractor/fuyintv.py30
-rw-r--r--yt_dlp/extractor/generic.py229
-rw-r--r--yt_dlp/extractor/giga.py9
-rw-r--r--yt_dlp/extractor/googledrive.py58
-rw-r--r--yt_dlp/extractor/hitbox.py6
-rw-r--r--yt_dlp/extractor/ina.py84
-rw-r--r--yt_dlp/extractor/instagram.py2
-rw-r--r--yt_dlp/extractor/iqiyi.py6
-rw-r--r--yt_dlp/extractor/iwara.py80
-rw-r--r--yt_dlp/extractor/ixigua.py84
-rw-r--r--yt_dlp/extractor/joj.py2
-rw-r--r--yt_dlp/extractor/jwplatform.py5
-rw-r--r--yt_dlp/extractor/kaltura.py2
-rw-r--r--yt_dlp/extractor/keezmovies.py2
-rw-r--r--yt_dlp/extractor/kicker.py55
-rw-r--r--yt_dlp/extractor/kth.py28
-rw-r--r--yt_dlp/extractor/kusi.py6
-rw-r--r--yt_dlp/extractor/lastfm.py2
-rw-r--r--yt_dlp/extractor/lbry.py7
-rw-r--r--yt_dlp/extractor/line.py2
-rw-r--r--yt_dlp/extractor/lnkgo.py2
-rw-r--r--yt_dlp/extractor/medaltv.py2
-rw-r--r--yt_dlp/extractor/mediaset.py12
-rw-r--r--yt_dlp/extractor/metacafe.py13
-rw-r--r--yt_dlp/extractor/minds.py2
-rw-r--r--yt_dlp/extractor/mirrorcouk.py98
-rw-r--r--yt_dlp/extractor/mixcloud.py3
-rw-r--r--yt_dlp/extractor/naver.py139
-rw-r--r--yt_dlp/extractor/ndr.py244
-rw-r--r--yt_dlp/extractor/ndtv.py14
-rw-r--r--yt_dlp/extractor/nebula.py9
-rw-r--r--yt_dlp/extractor/neteasemusic.py18
-rw-r--r--yt_dlp/extractor/netverse.py176
-rw-r--r--yt_dlp/extractor/nhk.py20
-rw-r--r--yt_dlp/extractor/niconico.py6
-rw-r--r--yt_dlp/extractor/npr.py21
-rw-r--r--yt_dlp/extractor/nrk.py7
-rw-r--r--yt_dlp/extractor/openload.py15
-rw-r--r--yt_dlp/extractor/peloton.py12
-rw-r--r--yt_dlp/extractor/playsuisse.py147
-rw-r--r--yt_dlp/extractor/playvid.py13
-rw-r--r--yt_dlp/extractor/pokemon.py41
-rw-r--r--yt_dlp/extractor/popcorntimes.py7
-rw-r--r--yt_dlp/extractor/pornhub.py33
-rw-r--r--yt_dlp/extractor/premiershiprugby.py39
-rw-r--r--yt_dlp/extractor/puls4.py7
-rw-r--r--yt_dlp/extractor/radiko.py4
-rw-r--r--yt_dlp/extractor/radiofrance.py49
-rw-r--r--yt_dlp/extractor/radlive.py2
-rw-r--r--yt_dlp/extractor/rokfin.py10
-rw-r--r--yt_dlp/extractor/rtve.py10
-rw-r--r--yt_dlp/extractor/rumble.py30
-rw-r--r--yt_dlp/extractor/screencast.py13
-rw-r--r--yt_dlp/extractor/shared.py13
-rw-r--r--yt_dlp/extractor/soundcloud.py4
-rw-r--r--yt_dlp/extractor/southpark.py43
-rw-r--r--yt_dlp/extractor/spotify.py39
-rw-r--r--yt_dlp/extractor/storyfire.py2
-rw-r--r--yt_dlp/extractor/streamcz.py4
-rw-r--r--yt_dlp/extractor/stv.py2
-rw-r--r--yt_dlp/extractor/substack.py100
-rw-r--r--yt_dlp/extractor/tennistv.py179
-rw-r--r--yt_dlp/extractor/testurl.py2
-rw-r--r--yt_dlp/extractor/tiktok.py86
-rw-r--r--yt_dlp/extractor/trovo.py2
-rw-r--r--yt_dlp/extractor/tver.py37
-rw-r--r--yt_dlp/extractor/twitter.py2
-rw-r--r--yt_dlp/extractor/udemy.py14
-rw-r--r--yt_dlp/extractor/urort.py11
-rw-r--r--yt_dlp/extractor/vevo.py119
-rw-r--r--yt_dlp/extractor/videa.py9
-rw-r--r--yt_dlp/extractor/videocampus_sachsen.py27
-rw-r--r--yt_dlp/extractor/vidio.py4
-rw-r--r--yt_dlp/extractor/vidlii.py2
-rw-r--r--yt_dlp/extractor/vimeo.py32
-rw-r--r--yt_dlp/extractor/vine.py2
-rw-r--r--yt_dlp/extractor/voicy.py6
-rw-r--r--yt_dlp/extractor/vrv.py15
-rw-r--r--yt_dlp/extractor/vshare.py8
-rw-r--r--yt_dlp/extractor/wppilot.py6
-rw-r--r--yt_dlp/extractor/xfileshare.py7
-rw-r--r--yt_dlp/extractor/xhamster.py15
-rw-r--r--yt_dlp/extractor/yahoo.py17
-rw-r--r--yt_dlp/extractor/ynet.py6
-rw-r--r--yt_dlp/extractor/younow.py2
-rw-r--r--yt_dlp/extractor/youporn.py5
-rw-r--r--yt_dlp/extractor/youtube.py437
-rw-r--r--yt_dlp/extractor/zattoo.py2
-rw-r--r--yt_dlp/extractor/zdf.py18
-rw-r--r--yt_dlp/extractor/zhihu.py2
-rw-r--r--yt_dlp/jsinterp.py215
-rw-r--r--yt_dlp/options.py429
-rw-r--r--yt_dlp/postprocessor/common.py10
-rw-r--r--yt_dlp/postprocessor/embedthumbnail.py12
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py401
-rw-r--r--yt_dlp/postprocessor/modify_chapters.py4
-rw-r--r--yt_dlp/postprocessor/sponskrub.py14
-rw-r--r--yt_dlp/postprocessor/sponsorblock.py4
-rw-r--r--yt_dlp/socks.py31
-rw-r--r--yt_dlp/utils.py771
-rw-r--r--yt_dlp/version.py4
213 files changed, 8374 insertions, 5705 deletions
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 9b29acb0c..17a1d192d 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -231,3 +231,39 @@ Fam0r
bohwaz
dodrian
vvto33
+ca-za
+connercsbn
+diegorodriguezv
+ekangmonyet
+elyse0
+evansp
+GiedriusS
+HE7086
+JordanWeatherby
+m4tu4g
+MarwenDallel
+nevack
+putnam
+rand-net
+vertan
+Wikidepia
+Yipten
+moench-tegeder
+christoph-heinrich
+HobbyistDev
+LunarFang416
+sbor23
+aurelg
+adamanldo
+gamer191
+vkorablin
+Burve
+mnn
+ZhymabekRoman
+mozbugbox
+aejdl
+ping
+sqrtNOT
+bubbleguuum
+darkxex
+miseran
diff --git a/Changelog.md b/Changelog.md
index 243f3d244..a6b898bd8 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -11,6 +11,249 @@
-->
+### 2022.06.22.1
+
+* [build] Fix updating homebrew formula
+
+### 2022.06.22
+
+* [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119)
+* **Add option `--download-sections` to download video partially**
+ * Chapter regex and time ranges are accepted (Eg: `--download-sections *1:10-2:20`)
+* Add option `--alias`
+* Add option `--lazy-playlist` to process entries as they are received
+* Add option `--retry-sleep`
+* Add slicing notation to `--playlist-items`
+ * Adds support for negative indices and step
+ * Add `-I` as alias for `--playlist-index`
+ * Makes `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse` redundant
+* `--config-location -` to provide options interactively
+* [build] Add Linux standalone builds
+* [update] Self-restart after update
+* Merge youtube-dl: Upto [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a9)
+* Add `--no-update`
+* Allow extractors to specify section_start/end for clips
+* Do not print progress to `stderr` with `-q`
+* Ensure pre-processor errors do not block video download
+* Fix `--simulate --max-downloads`
+* Improve error handling of bad config files
+* Return an error code if update fails
+* Fix bug in [3a408f9](https://github.com/yt-dlp/yt-dlp/commit/3a408f9d199127ca2626359e21a866a09ab236b3)
+* [ExtractAudio] Allow conditional conversion
+* [ModifyChapters] Fix repeated removal of small segments
+* [ThumbnailsConvertor] Allow conditional conversion
+* [cookies] Detect profiles for cygwin/BSD by [moench-tegeder](https://github.com/moench-tegeder)
+* [dash] Show fragment count with `--live-from-start` by [flashdagger](https://github.com/flashdagger)
+* [extractor] Add `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor] Add `default` parameter to `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor] Add dev option `--load-pages`
+* [extractor] Handle `json_ld` with multiple `@type`s
+* [extractor] Import `_ALL_CLASSES` lazily
+* [extractor] Recognize `src` attribute from HTML5 media elements by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/generic] Revert e6ae51c123897927eb3c9899923d8ffd31c7f85d
+* [f4m] Bugfix
+* [ffmpeg] Check version lazily
+* [jsinterp] Some optimizations and refactoring by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan)
+* [utils] Improve performance using `functools.cache`
+* [utils] Send HTTP/1.1 ALPN extension by [coletdjnz](https://github.com/coletdjnz)
+* [utils] `ExtractorError`: Fix `exc_info`
+* [utils] `ISO3166Utils`: Add `EU` and `AP`
+* [utils] `Popen`: Refactor to use contextmanager
+* [utils] `locked_file`: Fix for PyPy on Windows
+* [update] Expose more functionality to API
+* [update] Use `.git` folder to distinguish `source`/`unknown`
+* [compat] Add `functools.cached_property`
+* [test] Fix `FakeYDL` signatures by [coletdjnz](https://github.com/coletdjnz)
+* [docs] Improvements
+* [cleanup, ExtractAudio] Refactor
+* [cleanup, downloader] Refactor `report_progress`
+* [cleanup, extractor] Refactor `_download_...` methods
+* [cleanup, extractor] Rename `extractors.py` to `_extractors.py`
+* [cleanup, utils] Don't use kwargs for `format_field`
+* [cleanup, build] Refactor
+* [cleanup, docs] Re-indent "Usage and Options" section
+* [cleanup] Deprecate `YoutubeDL.parse_outtmpl`
+* [cleanup] Misc fixes and cleanup by [Lesmiscore](https://github.com/Lesmiscore), [MrRawes](https://github.com/MrRawes), [christoph-heinrich](https://github.com/christoph-heinrich), [flashdagger](https://github.com/flashdagger), [gamer191](https://github.com/gamer191), [kwconder](https://github.com/kwconder), [pukkandan](https://github.com/pukkandan)
+* [extractor/DailyWire] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/fourzerostudio] Add extractors by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/GoogleDrive] Add folder extractor by [evansp](https://github.com/evansp), [pukkandan](https://github.com/pukkandan)
+* [extractor/MirrorCoUK] Add extractor by [LunarFang416](https://github.com/LunarFang416), [pukkandan](https://github.com/pukkandan)
+* [extractor/atscaleconfevent] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [extractor/freetv] Add extractor by [elyse0](https://github.com/elyse0)
+* [extractor/ixigua] Add Extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/kicker.de] Add extractor by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/netverse] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan)
+* [extractor/playsuisse] Add extractor by [pukkandan](https://github.com/pukkandan), [sbor23](https://github.com/sbor23)
+* [extractor/substack] Add extractor by [elyse0](https://github.com/elyse0)
+* [extractor/youtube] **Support downloading clips**
+* [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Add warning for PostLiveDvr
+* [extractor/youtube] Bring back `_extract_chapters_from_description`
+* [extractor/youtube] Extract `comment_count` from webpage
+* [extractor/youtube] Fix `:ytnotifications` extractor by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Fix initial player response extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [extractor/youtube] Fix live chat for videos with content warning by [coletdjnz](https://github.com/coletdjnz)
+* [extractor/youtube] Make signature extraction non-fatal
+* [extractor/youtube:tab] Detect `videoRenderer` in `_post_thread_continuation_entries`
+* [extractor/BiliIntl] Fix metadata extraction
+* [extractor/BiliIntl] Fix subtitle extraction by [HobbyistDev](https://github.com/HobbyistDev)
+* [extractor/FranceCulture] Fix extractor by [aurelg](https://github.com/aurelg), [pukkandan](https://github.com/pukkandan)
+* [extractor/PokemonSoundLibrary] Remove extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/StreamCZ] Fix extractor by [adamanldo](https://github.com/adamanldo), [dirkf](https://github.com/dirkf)
+* [extractor/WatchESPN] Support free videos and BAM_DTC by [ischmidt20](https://github.com/ischmidt20)
+* [extractor/animelab] Remove extractor by [gamer191](https://github.com/gamer191)
+* [extractor/bloomberg] Change playback endpoint by [m4tu4g](https://github.com/m4tu4g)
+* [extractor/ccc] Extract view_count by [vkorablin](https://github.com/vkorablin)
+* [extractor/crunchyroll:beta] Fix extractor after API change by [Burve](https://github.com/Burve), [tejing1](https://github.com/tejing1)
+* [extractor/curiositystream] Get `auth_token` from cookie by [mnn](https://github.com/mnn)
+* [extractor/digitalconcerthall] Fix extractor by [ZhymabekRoman](https://github.com/ZhymabekRoman)
+* [extractor/dropbox] Extract the correct `mountComponent`
+* [extractor/dropout] Login is not mandatory
+* [extractor/duboku] Fix for hostname change by [mozbugbox](https://github.com/mozbugbox)
+* [extractor/espn] Add `WatchESPN` extractor by [ischmidt20](https://github.com/ischmidt20), [pukkandan](https://github.com/pukkandan)
+* [extractor/expressen] Fix extractor by [aejdl](https://github.com/aejdl)
+* [extractor/foxnews] Update embed extraction by [elyse0](https://github.com/elyse0)
+* [extractor/ina] Fix extractor by [elyse0](https://github.com/elyse0)
+* [extractor/iwara:user] Make paging better by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/jwplatform] Look for `data-video-jw-id`
+* [extractor/lbry] Update livestream API by [flashdagger](https://github.com/flashdagger)
+* [extractor/mediaset] Improve `_VALID_URL`
+* [extractor/naver] Add `navernow` extractor by [ping](https://github.com/ping)
+* [extractor/niconico:series] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT)
+* [extractor/npr] Use stream url from json-ld by [r5d](https://github.com/r5d)
+* [extractor/pornhub] Extract `uploader_id` field by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/radiofrance] Add more radios by [bubbleguuum](https://github.com/bubbleguuum)
+* [extractor/rumble] Detect JS embed
+* [extractor/rumble] Extract subtitles by [fstirlitz](https://github.com/fstirlitz)
+* [extractor/southpark] Add `southpark.lat` extractor by [darkxex](https://github.com/darkxex)
+* [extractor/spotify:show] Fix extractor
+* [extractor/tiktok] Detect embeds
+* [extractor/tiktok] Extract `SIGI_STATE` by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan), [sulyi](https://github.com/sulyi)
+* [extractor/tver] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/vevo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [extractor/yahoo:gyao] Fix extractor
+* [extractor/zattoo] Fix live streams by [miseran](https://github.com/miseran)
+* [extractor/zdf] Improve format sorting by [elyse0](https://github.com/elyse0)
+
+
+### 2022.05.18
+
+* Add support for SSL client certificate authentication by [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf)
+ * Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password`
+* Add `--match-filter -` to interactively ask for each video
+* `--max-downloads` should obey `--break-per-input`
+* Allow use of weaker ciphers with `--legacy-server-connect`
+* Don't imply `-s` for later stages of `-O`
+* Fix `--date today`
+* Fix `--skip-unavailable-fragments`
+* Fix color in `-q -F`
+* Fix redirect HTTP method handling by [coletdjnz](https://github.com/coletdjnz)
+* Improve `--clean-infojson`
+* Remove warning for videos with an empty title
+* Run `FFmpegFixupM3u8PP` for live-streams if needed
+* Show name of downloader in verbose log
+* [cookies] Allow `cookiefile` to be a text stream
+* [cookies] Report progress when importing cookies
+* [downloader/ffmpeg] Specify headers for each URL by [elyse0](https://github.com/elyse0)
+* [fragment] Do not change chunk-size when `--test`
+* [fragment] Make single thread download work for `--live-from-start` by [Lesmiscore](https://github.com/Lesmiscore)
+* [hls] Fix `byte_range` for `EXT-X-MAP` fragment by [fstirlitz](https://github.com/fstirlitz)
+* [http] Fix retrying on read timeout by [coletdjnz](https://github.com/coletdjnz)
+* [ffmpeg] Fix features detection
+* [EmbedSubtitle] Enable for more video extensions
+* [EmbedThumbnail] Disable thumbnail conversion for mkv by [evansp](https://github.com/evansp)
+* [EmbedThumbnail] Do not obey `-k`
+* [EmbedThumbnail] Do not remove id3v1 tags
+* [FFmpegMetadata] Remove `\0` from metadata
+* [FFmpegMetadata] Remove filename from attached info-json
+* [FixupM3u8] Obey `--hls-prefer-mpegts`
+* [Sponsorblock] Don't crash when duration is unknown
+* [XAttrMetadata] Refactor and document dependencies
+* [extractor] Document netrc machines
+* [extractor] Update `manifest_url`s after redirect by [elyse0](https://github.com/elyse0)
+* [extractor] Update dash `manifest_url` after redirects by [elyse0](https://github.com/elyse0)
+* [extractor] Use `classmethod`/`property` where possible
+* [generic] Refactor `_extract_rss`
+* [utils] `is_html`: Handle double BOM
+* [utils] `locked_file`: Ignore illegal seek on `truncate` by [jakeogh](https://github.com/jakeogh)
+* [utils] `sanitize_path`: Fix when path is empty string
+* [utils] `write_string`: Workaround newline issue in `conhost`
+* [utils] `certifi`: Make sure the pem file exists
+* [utils] Fix `WebSocketsWrapper`
+* [utils] `locked_file`: Do not give executable bits for newly created files by [Lesmiscore](https://github.com/Lesmiscore)
+* [utils] `YoutubeDLCookieJar`: Detect and reject JSON file by [Lesmiscore](https://github.com/Lesmiscore)
+* [test] Convert warnings into errors and fix some existing warnings by [fstirlitz](https://github.com/fstirlitz)
+* [dependencies] Create module with all dependency imports
+* [compat] Split into sub-modules by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [compat] Implement `compat.imghdr`
+* [build] Add `make uninstall` by [MrRawes](https://github.com/MrRawes)
+* [build] Avoid use of `install -D`
+* [build] Fix `Makefile` by [putnam](https://github.com/putnam)
+* [build] Fix `--onedir` on macOS
+* [build] Add more test-runners
+* [cleanup] Deprecate some compat vars by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan)
+* [cleanup] Remove unused code paths, extractors, scripts and tests by [fstirlitz](https://github.com/fstirlitz)
+* [cleanup] Upgrade syntax (`pyupgrade`) and sort imports (`isort`)
+* [cleanup, docs, build] Misc fixes
+* [BilibiliLive] Add extractor by [HE7086](https://github.com/HE7086), [pukkandan](https://github.com/pukkandan)
+* [Fifa] Add Extractor by [Bricio](https://github.com/Bricio)
+* [goodgame] Add extractor by [nevack](https://github.com/nevack)
+* [gronkh] Add playlist extractors by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [icareus] Add extractor by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan)
+* [iwara] Add playlist extractors by [i6t](https://github.com/i6t)
+* [Likee] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [masters] Add extractor by [m4tu4g](https://github.com/m4tu4g)
+* [nebula] Add support for subscriptions by [hheimbuerger](https://github.com/hheimbuerger)
+* [Podchaser] Add extractors by [connercsbn](https://github.com/connercsbn)
+* [rokfin:search] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add `:ytnotifications` extractor by [krichbanana](https://github.com/krichbanana)
+* [youtube] Add YoutubeStoriesIE (`ytstories:<channel UCID>`) by [coletdjnz](https://github.com/coletdjnz)
+* [ZingMp3] Add chart and user extractors by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [adn] Update AES key by [elyse0](https://github.com/elyse0)
+* [adobepass] Allow cookies for authenticating MSO
+* [bandcamp] Exclude merch links by [Yipten](https://github.com/Yipten)
+* [chingari] Fix archiving and tests
+* [DRTV] Improve `_VALID_URL` by [vertan](https://github.com/vertan)
+* [facebook] Improve thumbnail extraction by [Wikidepia](https://github.com/Wikidepia)
+* [fc2] Stop heatbeating once FFmpeg finishes by [Lesmiscore](https://github.com/Lesmiscore)
+* [Gofile] Fix extraction and support password-protected links by [mehq](https://github.com/mehq)
+* [hotstar, cleanup] Refactor extractors
+* [InfoQ] Don't fail on missing audio format by [evansp](https://github.com/evansp)
+* [Jamendo] Extract more metadata by [evansp](https://github.com/evansp)
+* [kaltura] Update API calls by [flashdagger](https://github.com/flashdagger)
+* [KhanAcademy] Fix extractor by [rand-net](https://github.com/rand-net)
+* [LCI] Fix extractor by [MarwenDallel](https://github.com/MarwenDallel)
+* [lrt] Support livestreams by [GiedriusS](https://github.com/GiedriusS)
+* [niconico] Set `expected_protocol` to a public field
+* [Niconico] Support 2FA by [ekangmonyet](https://github.com/ekangmonyet)
+* [Olympics] Fix format extension
+* [openrec:movie] Enable fallback for /movie/ URLs
+* [PearVideo] Add fallback for formats by [hatienl0i261299](https://github.com/hatienl0i261299)
+* [radiko] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore)
+* [rai] Add `release_year`
+* [reddit] Prevent infinite loop
+* [rokfin] Implement login by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan)
+* [ruutu] Support hs.fi embeds by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan)
+* [spotify] Detect iframe embeds by [fstirlitz](https://github.com/fstirlitz)
+* [telegram] Fix metadata extraction
+* [tmz, cleanup] Update tests by [diegorodriguezv](https://github.com/diegorodriguezv)
+* [toggo] Fix `_VALID_URL` by [ca-za](https://github.com/ca-za)
+* [trovo] Update to new API by [nyuszika7h](https://github.com/nyuszika7h)
+* [TVer] Improve extraction by [Lesmiscore](https://github.com/Lesmiscore)
+* [twitcasting] Pass headers for each formats by [Lesmiscore](https://github.com/Lesmiscore)
+* [VideocampusSachsen] Improve extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel)
+* [vimeo] Fix extractors
+* [wat] Fix extraction of multi-language videos and subtitles by [elyse0](https://github.com/elyse0)
+* [wistia] Fix `_VALID_URL` by [dirkf](https://github.com/dirkf)
+* [youtube, cleanup] Minor refactoring by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube] Added piped instance urls by [JordanWeatherby](https://github.com/JordanWeatherby)
+* [youtube] Deprioritize auto-generated thumbnails
+* [youtube] Deprioritize format 22 (often damaged)
+* [youtube] Fix episode metadata extraction
+* [zee5] Fix extractor by [Ashish0804](https://github.com/Ashish0804)
+* [zingmp3, cleanup] Refactor extractors
+
+
### 2022.04.08
* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz)
@@ -785,7 +1028,7 @@
* [build] Improvements
* Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev)
* Release windows exe built with `py2exe`
- * Enable lazy-extractors in releases.
+ * Enable lazy-extractors in releases
* Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental)
* Clean up error reporting in update
* Refactor `pyinst.py`, misc cleanup and improve docs
@@ -1038,7 +1281,7 @@
* [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan)
* [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari)
* [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao)
-* [build] Provide `--onedir` zip for windows by [pukkandan](https://github.com/pukkandan)
+* [build] Provide `--onedir` zip for windows
* [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour
* [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden)
* [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi)
@@ -2031,7 +2274,7 @@
* **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details
* **Format Selection:** See [Format Selection](README.md#format-selection) for details
* New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*`
- * Changed video format sorting to show video only files and video+audio files together.
+ * Changed video format sorting to show video only files and video+audio files together
* Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams`
* Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively
* Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index 268e8a2ae..9b4a9d4e2 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -1,9 +1,12 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import yt_dlp
BASH_COMPLETION_FILE = "completions/bash/yt-dlp"
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
index 08f663e4b..fc72c3051 100644
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -13,9 +13,11 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import gettestcases
-from yt_dlp.utils import compat_urllib_parse_urlparse, compat_urllib_request
+import urllib.parse
+import urllib.request
+
+from test.helper import gettestcases
if len(sys.argv) > 1:
METHOD = 'LIST'
@@ -26,7 +28,7 @@ else:
for test in gettestcases():
if METHOD == 'EURISTIC':
try:
- webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
+ webpage = urllib.request.urlopen(test['url'], timeout=10).read()
except Exception:
print('\nFail: {}'.format(test['name']))
continue
@@ -36,7 +38,7 @@ for test in gettestcases():
RESULT = 'porn' in webpage.lower()
elif METHOD == 'LIST':
- domain = compat_urllib_parse_urlparse(test['url']).netloc
+ domain = urllib.parse.urlparse(test['url']).netloc
if not domain:
print('\nFail: {}'.format(test['name']))
continue
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index d9c0048e2..5d2f68a48 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -1,10 +1,14 @@
#!/usr/bin/env python3
-import optparse
+
+# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import optparse
+
import yt_dlp
from yt_dlp.utils import shell_quote
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
index c7d83f1a7..7f3c88bcf 100644
--- a/devscripts/generate_aes_testdata.py
+++ b/devscripts/generate_aes_testdata.py
@@ -1,11 +1,15 @@
#!/usr/bin/env python3
-import codecs
+
+# Allow direct execution
import os
-import subprocess
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import codecs
+import subprocess
+
from yt_dlp.aes import aes_encrypt, key_expansion
from yt_dlp.utils import intlist_to_bytes
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 361e17d8c..d74462a3c 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
import optparse
import re
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 8c481bc2d..785d66a6a 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,12 +1,15 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
-import optparse
import sys
-from inspect import getsource
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import optparse
+from inspect import getsource
+
NO_ATTR = object()
STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_WORKING', '_NETRC_MACHINE', 'age_limit']
CLASS_METHODS = [
@@ -53,7 +56,7 @@ def get_all_ies():
if os.path.exists(PLUGINS_DIRNAME):
os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
try:
- from yt_dlp.extractor import _ALL_CLASSES
+ from yt_dlp.extractor.extractors import _ALL_CLASSES
finally:
if os.path.exists(BLOCKED_DIRNAME):
os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index fd234bf58..f2e08d7c6 100644
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -1,7 +1,12 @@
#!/usr/bin/env python3
-# yt-dlp --help | make_readme.py
-# This must be run in a console of correct width
+"""
+yt-dlp --help | make_readme.py
+This must be run in a console of correct width
+"""
+
+
+import functools
import re
import sys
@@ -10,21 +15,60 @@ README_FILE = 'README.md'
OPTIONS_START = 'General Options:'
OPTIONS_END = 'CONFIGURATION'
EPILOG_START = 'See full documentation'
+ALLOWED_OVERSHOOT = 2
+
+DISABLE_PATCH = object()
+
+
+def take_section(text, start=None, end=None, *, shift=0):
+ return text[
+ text.index(start) + shift if start else None:
+ text.index(end) + shift if end else None
+ ]
-helptext = sys.stdin.read()
-if isinstance(helptext, bytes):
- helptext = helptext.decode()
+def apply_patch(text, patch):
+ return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text)
-start, end = helptext.index(f'\n {OPTIONS_START}'), helptext.index(f'\n{EPILOG_START}')
-options = re.sub(r'(?m)^ (\w.+)$', r'## \1', helptext[start + 1: end + 1])
+
+options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1)
+
+max_width = max(map(len, options.split('\n')))
+switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group())
+delim = f'\n{" " * switch_col_width}'
+
+PATCHES = (
+ ( # Headings
+ r'(?m)^ (\w.+\n)( (?=\w))?',
+ r'## \1'
+ ),
+ ( # Do not split URLs
+ rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s',
+ lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n'))
+ ),
+ ( # Do not split "words"
+ rf'(?m)({delim}\S+)+$',
+ lambda mobj: ''.join((delim, mobj.group(0).replace(delim, '')))
+ ),
+ ( # Allow overshooting last line
+ rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})',
+ lambda mobj: (mobj.group().replace(delim, ' ')
+ if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT
+ else mobj.group())
+ ),
+ ( # Avoid newline when a space is available b/w switch and description
+ DISABLE_PATCH, # This creates issues with prepare_manpage
+ r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim),
+ r'\1 '
+ ),
+)
with open(README_FILE, encoding='utf-8') as f:
readme = f.read()
-header = readme[:readme.index(f'## {OPTIONS_START}')]
-footer = readme[readme.index(f'# {OPTIONS_END}'):]
-
with open(README_FILE, 'w', encoding='utf-8') as f:
- for part in (header, options, footer):
- f.write(part)
+ f.write(''.join((
+ take_section(readme, end=f'## {OPTIONS_START}'),
+ functools.reduce(apply_patch, PATCHES, options),
+ take_section(readme, f'# {OPTIONS_END}'),
+ )))
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index d8c53c5e1..e46f7af56 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -1,10 +1,14 @@
#!/usr/bin/env python3
-import optparse
+
+# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import optparse
+
from yt_dlp.extractor import list_extractor_classes
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index df9abe5ae..d12ff4947 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
import optparse
import os.path
import re
@@ -23,7 +24,7 @@ yt\-dlp \- A youtube-dl fork with additional features and patches
def main():
parser = optparse.OptionParser(usage='%prog OUTFILE.md')
- options, args = parser.parse_args()
+ _, args = parser.parse_args()
if len(args) != 1:
parser.error('Expected an output filename')
diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh
index e9904ae35..d496a092b 100755
--- a/devscripts/run_tests.sh
+++ b/devscripts/run_tests.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/usr/bin/env sh
if [ -z $1 ]; then
test_set='test'
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index 59faea06a..267af5f6e 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -1,9 +1,12 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import yt_dlp
ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp"
diff --git a/pyinst.py b/pyinst.py
index fba30fa94..2744d3fce 100644
--- a/pyinst.py
+++ b/pyinst.py
@@ -1,28 +1,12 @@
#!/usr/bin/env python3
+
import os
import platform
import sys
from PyInstaller.__main__ import run as run_pyinstaller
-OS_NAME = platform.system()
-if OS_NAME == 'Windows':
- from PyInstaller.utils.win32.versioninfo import (
- FixedFileInfo,
- SetVersion,
- StringFileInfo,
- StringStruct,
- StringTable,
- VarFileInfo,
- VarStruct,
- VSVersionInfo,
- )
-elif OS_NAME == 'Darwin':
- pass
-else:
- raise Exception(f'{OS_NAME} is not supported')
-
-ARCH = platform.architecture()[0][:2]
+OS_NAME, ARCH = sys.platform, platform.architecture()[0][:2]
def main():
@@ -33,10 +17,7 @@ def main():
if not onedir and '-F' not in opts and '--onefile' not in opts:
opts.append('--onefile')
- name = 'yt-dlp%s' % ('_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '')
- final_file = ''.join((
- 'dist/', f'{name}/' if onedir else '', name, '.exe' if OS_NAME == 'Windows' else ''))
-
+ name, final_file = exe(onedir)
print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}')
print('Remember to update the version using "devscripts/update-version.py"')
if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'):
@@ -79,6 +60,21 @@ def read_version(fname):
return locals()['__version__']
+def exe(onedir):
+ """@returns (name, path)"""
+ name = '_'.join(filter(None, (
+ 'yt-dlp',
+ {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME),
+ ARCH == '32' and 'x86'
+ )))
+ return name, ''.join(filter(None, (
+ 'dist/',
+ onedir and f'{name}/',
+ name,
+ OS_NAME == 'win32' and '.exe'
+ )))
+
+
def version_to_list(version):
version_list = version.split('.')
return list(map(int, version_list)) + [0] * (4 - len(version_list))
@@ -109,11 +105,22 @@ def pycryptodome_module():
def set_version_info(exe, version):
- if OS_NAME == 'Windows':
+ if OS_NAME == 'win32':
windows_set_version(exe, version)
def windows_set_version(exe, version):
+ from PyInstaller.utils.win32.versioninfo import (
+ FixedFileInfo,
+ SetVersion,
+ StringFileInfo,
+ StringStruct,
+ StringTable,
+ VarFileInfo,
+ VarStruct,
+ VSVersionInfo,
+ )
+
version_list = version_to_list(version)
suffix = '_x86' if ARCH == '32' else ''
SetVersion(exe, VSVersionInfo(
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index 52feb4aba..000000000
--- a/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-[pytest]
-addopts = -ra -v --strict-markers
-markers =
- download
diff --git a/setup.cfg b/setup.cfg
index 5fe95226a..415cca91a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,41 @@
[wheel]
-universal = True
+universal = true
+
[flake8]
-exclude = devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
+exclude = build,venv,.tox,.git,.pytest_cache
ignore = E402,E501,E731,E741,W503
+max_line_length = 120
+per_file_ignores =
+ devscripts/lazy_load_template.py: F401
+
+
+[tool:pytest]
+addopts = -ra -v --strict-markers
+markers =
+ download
+
+
+[tox:tox]
+skipsdist = true
+envlist = py{36,37,38,39,310},pypy{36,37,38,39}
+skip_missing_interpreters = true
+
+[testenv] # tox
+deps =
+ pytest
+commands = pytest {posargs:"-m not download"}
+passenv = HOME # For test_compat_expanduser
+setenv =
+ # PYTHONWARNINGS = error # Catches PIP's warnings too
+
+
+[isort]
+py_version = 36
+multi_line_output = VERTICAL_HANGING_INDENT
+line_length = 80
+reverse_relative = true
+ensure_newline_before_comments = true
+include_trailing_comma = true
+known_first_party =
+ test
diff --git a/setup.py b/setup.py
index 9d54943f2..6710222bd 100644
--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@ REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets']
if sys.argv[1:2] == ['py2exe']:
- import py2exe
+ import py2exe # noqa: F401
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'The recommended way is to use "pyinst.py" to build using pyinstaller')
@@ -124,6 +124,9 @@ setup(
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3.9',
+ 'Programming Language :: Python :: 3.10',
+ 'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: Implementation',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
diff --git a/supportedsites.md b/supportedsites.md
index 7663c09d4..7a91358d5 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -1,4 +1,6 @@
# Supported sites
+ - **0000studio:archive**
+ - **0000studio:clip**
- **17live**
- **17live:clip**
- **1tv**: Первый канал
@@ -60,8 +62,6 @@
- **AmHistoryChannel**
- **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **AnimalPlanet**
- - **AnimeLab**: [<abbr title="netrc machine"><em>animelab</em></abbr>]
- - **AnimeLabShows**: [<abbr title="netrc machine"><em>animelab</em></abbr>]
- **AnimeOnDemand**: [<abbr title="netrc machine"><em>animeondemand</em></abbr>]
- **ant1newsgr:article**: ant1news.gr articles
- **ant1newsgr:embed**: ant1news.gr embedded videos
@@ -89,6 +89,7 @@
- **AsianCrush**
- **AsianCrushPlaylist**
- **AtresPlayer**: [<abbr title="netrc machine"><em>atresplayer</em></abbr>]
+ - **AtScaleConfEvent**
- **ATTTechChannel**
- **ATVAt**
- **AudiMedia**
@@ -276,6 +277,8 @@
- **dailymotion**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>]
- **dailymotion:playlist**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>]
- **dailymotion:user**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>]
+ - **DailyWire**
+ - **DailyWirePodcast**
- **damtomo:record**
- **damtomo:video**
- **daum.net**
@@ -322,8 +325,8 @@
- **drtv**
- **drtv:live**
- **DTube**
- - **duboku**: www.duboku.co
- - **duboku:list**: www.duboku.co entire series
+ - **duboku**: www.duboku.io
+ - **duboku:list**: www.duboku.io entire series
- **Dumpert**
- **dvtv**: http://video.aktualne.cz/
- **dw**
@@ -376,6 +379,7 @@
- **fc2:embed**
- **fc2:live**
- **Fczenit**
+ - **Fifa**
- **Filmmodu**
- **filmon**
- **filmon:channel**
@@ -402,6 +406,8 @@
- **FranceTVSite**
- **Freesound**
- **freespeech.org**
+ - **freetv:series**
+ - **FreeTvMovies**
- **FrontendMasters**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>]
- **FrontendMastersCourse**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>]
- **FrontendMastersLesson**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>]
@@ -447,14 +453,18 @@
- **GodTube**
- **Gofile**
- **Golem**
+ - **goodgame:stream**
- **google:podcasts**
- **google:podcasts:feed**
- **GoogleDrive**
+ - **GoogleDrive:Folder**
- **GoPro**
- **Goshgay**
- **GoToStage**
- **GPUTechConf**
- **Gronkh**
+ - **gronkh:feed**
+ - **gronkh:vods**
- **Groupon**
- **hbo**
- **HearThisAt**
@@ -492,6 +502,7 @@
- **HungamaSong**
- **huya:live**: huya.com
- **Hypem**
+ - **Icareus**
- **ign.com**
- **IGNArticle**
- **IGNVideo**
@@ -528,6 +539,9 @@
- **ivi:compilation**: ivi.ru compilations
- **ivideon**: Ivideon TV
- **Iwara**
+ - **iwara:playlist**
+ - **iwara:user**
+ - **Ixigua**
- **Izlesene**
- **Jable**
- **JablePlaylist**
@@ -547,12 +561,14 @@
- **Ketnet**
- **khanacademy**
- **khanacademy:unit**
+ - **Kicker**
- **KickStarter**
- **KinjaEmbed**
- **KinoPoisk**
- **KonserthusetPlay**
- **Koo**
- **KrasView**: Красвью
+ - **KTH**
- **Ku6**
- **KUSI**
- **kuwo:album**: 酷我音乐 - 专辑
@@ -587,6 +603,8 @@
- **Libsyn**
- **life**: Life.ru
- **life:embed**
+ - **likee**
+ - **likee:user**
- **limelight**
- **limelight:channel**
- **limelight:channel_list**
@@ -605,7 +623,8 @@
- **loc**: Library of Congress
- **LocalNews8**
- **LoveHomePorn**
- - **lrt.lt**
+ - **LRTStream**
+ - **LRTVOD**
- **lynda**: [<abbr title="netrc machine"><em>lynda</em></abbr>] lynda.com videos
- **lynda:course**: [<abbr title="netrc machine"><em>lynda</em></abbr>] lynda.com online courses
- **m6**
@@ -626,6 +645,7 @@
- **Markiza**
- **MarkizaPage**
- **massengeschmack.tv**
+ - **Masters**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
- **MedalTV**
@@ -664,6 +684,7 @@
- **miomio.tv**
- **mirrativ**
- **mirrativ:user**
+ - **MirrorCoUK**
- **MiTele**: mitele.es
- **mixch**
- **mixch:archive**
@@ -729,6 +750,7 @@
- **NationalGeographicTV**
- **Naver**
- **Naver:live**
+ - **navernow**
- **NBA**
- **nba:watch**
- **nba:watch:collection**
@@ -747,7 +769,8 @@
- **ndr:embed:base**
- **NDTV**
- **Nebula**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>]
- - **nebula:collection**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>]
+ - **nebula:channel**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>]
+ - **nebula:subscriptions**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>]
- **NerdCubedFeed**
- **netease:album**: 网易云音乐 - 专辑
- **netease:djradio**: 网易云音乐 - 电台
@@ -757,6 +780,8 @@
- **netease:singer**: 网易云音乐 - 歌手
- **netease:song**: 网易云音乐
- **NetPlus**: [<abbr title="netrc machine"><em>netplus</em></abbr>]
+ - **Netverse**
+ - **NetversePlaylist**
- **Netzkino**
- **Newgrounds**
- **Newgrounds:playlist**
@@ -920,6 +945,7 @@
- **PlayPlusTV**: [<abbr title="netrc machine"><em>playplustv</em></abbr>]
- **PlayStuff**
- **PlaysTV**
+ - **PlaySuisse**
- **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz
- **Playvid**
- **PlayVids**
@@ -927,9 +953,9 @@
- **pluralsight**: [<abbr title="netrc machine"><em>pluralsight</em></abbr>]
- **pluralsight:course**
- **PlutoTV**
+ - **Podchaser**
- **podomatic**
- **Pokemon**
- - **PokemonSoundLibrary**
- **PokemonWatch**
- **PokerGo**: [<abbr title="netrc machine"><em>pokergo</em></abbr>]
- **PokerGoCollection**: [<abbr title="netrc machine"><em>pokergo</em></abbr>]
@@ -1026,9 +1052,10 @@
- **RICE**
- **RMCDecouverte**
- **RockstarGames**
- - **Rokfin**
- - **rokfin:channel**
- - **rokfin:stack**
+ - **Rokfin**: [<abbr title="netrc machine"><em>rokfin</em></abbr>]
+ - **rokfin:channel**: Rokfin Channels
+ - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix
+ - **rokfin:stack**: Rokfin Stacks
- **RoosterTeeth**: [<abbr title="netrc machine"><em>roosterteeth</em></abbr>]
- **RoosterTeethSeries**: [<abbr title="netrc machine"><em>roosterteeth</em></abbr>]
- **RottenTomatoes**
@@ -1136,6 +1163,7 @@
- **southpark.cc.com**
- **southpark.cc.com:español**
- **southpark.de**
+ - **southpark.lat**
- **southpark.nl**
- **southparkstudios.dk**
- **SovietsCloset**
@@ -1175,6 +1203,7 @@
- **StretchInternet**
- **Stripchat**
- **stv:player**
+ - **Substack**
- **SunPorno**
- **sverigesradio:episode**
- **sverigesradio:publication**
@@ -1370,8 +1399,6 @@
- **video.google:search**: Google Video search; "gvsearch:" prefix
- **video.sky.it**
- **video.sky.it:live**
- - **VideocampusSachsen**
- - **VideocampusSachsenEmbed**
- **VideoDetective**
- **videofy.me**
- **videomore**
@@ -1400,6 +1427,7 @@
- **vimeo:watchlater**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)
- **Vimm:recording**
- **Vimm:stream**
+ - **Vimp**
- **Vimple**: Vimple - one-click video hosting
- **Vine**
- **vine:user**
@@ -1450,6 +1478,7 @@
- **washingtonpost:article**
- **wat.tv**
- **WatchBox**
+ - **WatchESPN**
- **WatchIndianPorn**: Watch Indian Porn
- **WDR**
- **wdr:mobile**: (**Currently broken**)
@@ -1522,14 +1551,17 @@
- **YourPorn**
- **YourUpload**
- **youtube**: YouTube
+ - **youtube:clip**
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
- **youtube:music:search_url**: YouTube music search URLs with selectable sections (Eg: #songs)
+ - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies)
- **youtube:playlist**: YouTube playlists
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
- **youtube:search**: YouTube search; "ytsearch:" prefix
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
- **youtube:search_url**: YouTube search URLs with sorting and filter support
+ - **youtube:stories**: YouTube channel stories; "ytstories:" prefix
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
- **youtube:tab**: YouTube Tabs
- **youtube:user**: YouTube user videos; "ytuser:" prefix
@@ -1550,6 +1582,10 @@
- **Zhihu**
- **zingmp3**: zingmp3.vn
- **zingmp3:album**
+ - **zingmp3:chart-home**
+ - **zingmp3:chart-music-video**
+ - **zingmp3:user**
+ - **zingmp3:week-chart**
- **zoom**
- **Zype**
- **generic**: Generic downloader that works on some sites
diff --git a/test/helper.py b/test/helper.py
index 2333ace98..f19e1a34f 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -9,7 +9,7 @@ import types
import yt_dlp.extractor
from yt_dlp import YoutubeDL
-from yt_dlp.compat import compat_os_name, compat_str
+from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, write_string
if 'pytest' in sys.modules:
@@ -44,7 +44,7 @@ def try_rm(filename):
raise
-def report_warning(message):
+def report_warning(message, *args, **kwargs):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored
@@ -67,10 +67,10 @@ class FakeYDL(YoutubeDL):
super().__init__(params, auto_init=False)
self.result = []
- def to_screen(self, s, skip_eol=None):
+ def to_screen(self, s, *args, **kwargs):
print(s)
- def trouble(self, s, tb=None):
+ def trouble(self, s, *args, **kwargs):
raise Exception(s)
def download(self, x):
@@ -80,10 +80,10 @@ class FakeYDL(YoutubeDL):
# Silence an expected warning matching a regex
old_report_warning = self.report_warning
- def report_warning(self, message):
+ def report_warning(self, message, *args, **kwargs):
if re.match(regex, message):
return
- old_report_warning(message)
+ old_report_warning(message, *args, **kwargs)
self.report_warning = types.MethodType(report_warning, self)
@@ -96,29 +96,29 @@ md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
def expect_value(self, got, expected, field):
- if isinstance(expected, compat_str) and expected.startswith('re:'):
+ if isinstance(expected, str) and expected.startswith('re:'):
match_str = expected[len('re:'):]
match_rex = re.compile(match_str)
self.assertTrue(
- isinstance(got, compat_str),
- f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}')
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
match_rex.match(got),
f'field {field} (value: {got!r}) should match {match_str!r}')
- elif isinstance(expected, compat_str) and expected.startswith('startswith:'):
+ elif isinstance(expected, str) and expected.startswith('startswith:'):
start_str = expected[len('startswith:'):]
self.assertTrue(
- isinstance(got, compat_str),
- f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}')
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
got.startswith(start_str),
f'field {field} (value: {got!r}) should start with {start_str!r}')
- elif isinstance(expected, compat_str) and expected.startswith('contains:'):
+ elif isinstance(expected, str) and expected.startswith('contains:'):
contains_str = expected[len('contains:'):]
self.assertTrue(
- isinstance(got, compat_str),
- f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}')
+ isinstance(got, str),
+ f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
self.assertTrue(
contains_str in got,
f'field {field} (value: {got!r}) should contain {contains_str!r}')
@@ -142,12 +142,12 @@ def expect_value(self, got, expected, field):
index, field, type_expected, type_got))
expect_value(self, item_got, item_expected, field)
else:
- if isinstance(expected, compat_str) and expected.startswith('md5:'):
+ if isinstance(expected, str) and expected.startswith('md5:'):
self.assertTrue(
- isinstance(got, compat_str),
+ isinstance(got, str),
f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}')
got = 'md5:' + md5(got)
- elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected):
+ elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected):
self.assertTrue(
isinstance(got, (list, dict)),
f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}')
@@ -236,7 +236,7 @@ def expect_info_dict(self, got_dict, expected_dict):
missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
if missing_keys:
def _repr(v):
- if isinstance(v, compat_str):
+ if isinstance(v, str):
return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
elif isinstance(v, type):
return v.__name__
@@ -301,9 +301,9 @@ def assertEqual(self, got, expected, msg=None):
def expect_warnings(ydl, warnings_re):
real_warning = ydl.report_warning
- def _report_warning(w):
+ def _report_warning(w, *args, **kwargs):
if not any(re.search(w_re, w) for w_re in warnings_re):
- real_warning(w)
+ real_warning(w, *args, **kwargs)
ydl.report_warning = _report_warning
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 257ea7dd3..f57a29ffc 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,10 +7,12 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.server
import threading
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from yt_dlp.compat import compat_etree_fromstring, compat_http_server
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from yt_dlp.compat import compat_etree_fromstring
from yt_dlp.extractor import YoutubeIE, get_info_extractor
from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.utils import (
@@ -23,7 +26,7 @@ TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
-class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -502,6 +505,24 @@ class TestInfoExtractor(unittest.TestCase):
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+ <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+ controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
@@ -1637,7 +1658,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
# or the underlying `_download_webpage_handle` returning no content
# when a response matches `expected_status`.
- httpd = compat_http_server.HTTPServer(
+ httpd = http.server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
port = http_server_port(httpd)
server_thread = threading.Thread(target=httpd.serve_forever)
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 1133f6165..1eb3abc17 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,23 +7,21 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import copy
import json
-from test.helper import FakeYDL, assertRegexpMatches
+import urllib.error
+from test.helper import FakeYDL, assertRegexpMatches
from yt_dlp import YoutubeDL
-from yt_dlp.compat import (
- compat_os_name,
- compat_setenv,
- compat_str,
- compat_urllib_error,
-)
+from yt_dlp.compat import compat_os_name
from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor
from yt_dlp.utils import (
ExtractorError,
LazyList,
+ OnDemandPagedList,
int_or_none,
match_filter_func,
)
@@ -39,7 +38,7 @@ class YDL(FakeYDL):
def process_info(self, info_dict):
self.downloaded_info_dicts.append(info_dict.copy())
- def to_screen(self, msg):
+ def to_screen(self, msg, *args, **kwargs):
self.msgs.append(msg)
def dl(self, *args, **kwargs):
@@ -840,14 +839,14 @@ class TestYoutubeDL(unittest.TestCase):
# test('%(foo|)s', ('', '_')) # fixme
# Environment variable expansion for prepare_filename
- compat_setenv('__yt_dlp_var', 'expanded')
+ os.environ['__yt_dlp_var'] = 'expanded'
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var'
test(envvar, (envvar, 'expanded'))
if compat_os_name == 'nt':
test('%s%', ('%s%', '%s%'))
- compat_setenv('s', 'expanded')
+ os.environ['s'] = 'expanded'
test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s
- compat_setenv('(test)s', 'expanded')
+ os.environ['(test)s'] = 'expanded'
test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template
# Path expansion and escaping
@@ -989,41 +988,79 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(res, [])
def test_playlist_items_selection(self):
- entries = [{
- 'id': compat_str(i),
- 'title': compat_str(i),
- 'url': TEST_URL,
- } for i in range(1, 5)]
- playlist = {
- '_type': 'playlist',
- 'id': 'test',
- 'entries': entries,
- 'extractor': 'test:playlist',
- 'extractor_key': 'test:playlist',
- 'webpage_url': 'http://example.com',
- }
+ INDICES, PAGE_SIZE = list(range(1, 11)), 3
+
+ def entry(i, evaluated):
+ evaluated.append(i)
+ return {
+ 'id': str(i),
+ 'title': str(i),
+ 'url': TEST_URL,
+ }
- def get_downloaded_info_dicts(params):
+ def pagedlist_entries(evaluated):
+ def page_func(n):
+ start = PAGE_SIZE * n
+ for i in INDICES[start: start + PAGE_SIZE]:
+ yield entry(i, evaluated)
+ return OnDemandPagedList(page_func, PAGE_SIZE)
+
+ def page_num(i):
+ return (i + PAGE_SIZE - 1) // PAGE_SIZE
+
+ def generator_entries(evaluated):
+ for i in INDICES:
+ yield entry(i, evaluated)
+
+ def list_entries(evaluated):
+ return list(generator_entries(evaluated))
+
+ def lazylist_entries(evaluated):
+ return LazyList(generator_entries(evaluated))
+
+ def get_downloaded_info_dicts(params, entries):
ydl = YDL(params)
- # make a deep copy because the dictionary and nested entries
- # can be modified
- ydl.process_ie_result(copy.deepcopy(playlist))
+ ydl.process_ie_result({
+ '_type': 'playlist',
+ 'id': 'test',
+ 'extractor': 'test:playlist',
+ 'extractor_key': 'test:playlist',
+ 'webpage_url': 'http://example.com',
+ 'entries': entries,
+ })
return ydl.downloaded_info_dicts
- def test_selection(params, expected_ids):
- results = [
- (v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
- for v in get_downloaded_info_dicts(params)]
- self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))))
-
- test_selection({}, [1, 2, 3, 4])
- test_selection({'playlistend': 10}, [1, 2, 3, 4])
- test_selection({'playlistend': 2}, [1, 2])
- test_selection({'playliststart': 10}, [])
- test_selection({'playliststart': 2}, [2, 3, 4])
- test_selection({'playlist_items': '2-4'}, [2, 3, 4])
+ def test_selection(params, expected_ids, evaluate_all=False):
+ expected_ids = list(expected_ids)
+ if evaluate_all:
+ generator_eval = pagedlist_eval = INDICES
+ elif not expected_ids:
+ generator_eval = pagedlist_eval = []
+ else:
+ generator_eval = INDICES[0: max(expected_ids)]
+ pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE:
+ PAGE_SIZE * page_num(max(expected_ids))]
+
+ for name, func, expected_eval in (
+ ('list', list_entries, INDICES),
+ ('Generator', generator_entries, generator_eval),
+ # ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path
+ ('PagedList', pagedlist_entries, pagedlist_eval),
+ ):
+ evaluated = []
+ entries = func(evaluated)
+ results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
+ for v in get_downloaded_info_dicts(params, entries)]
+ self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}')
+ self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}')
+ test_selection({}, INDICES)
+ test_selection({'playlistend': 20}, INDICES, True)
+ test_selection({'playlistend': 2}, INDICES[:2])
+ test_selection({'playliststart': 11}, [], True)
+ test_selection({'playliststart': 2}, INDICES[1:])
+ test_selection({'playlist_items': '2-4'}, INDICES[1:4])
test_selection({'playlist_items': '2,4'}, [2, 4])
- test_selection({'playlist_items': '10'}, [])
+ test_selection({'playlist_items': '20'}, [], True)
test_selection({'playlist_items': '0'}, [])
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
@@ -1032,15 +1069,37 @@ class TestYoutubeDL(unittest.TestCase):
# Tests for https://github.com/yt-dlp/yt-dlp/issues/720
# https://github.com/yt-dlp/yt-dlp/issues/302
- test_selection({'playlistreverse': True}, [4, 3, 2, 1])
- test_selection({'playliststart': 2, 'playlistreverse': True}, [4, 3, 2])
+ test_selection({'playlistreverse': True}, INDICES[::-1])
+ test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1])
test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2])
test_selection({'playlist_items': '4,2'}, [4, 2])
+ # Tests for --playlist-items start:end:step
+ test_selection({'playlist_items': ':'}, INDICES, True)
+ test_selection({'playlist_items': '::1'}, INDICES, True)
+ test_selection({'playlist_items': '::-1'}, INDICES[::-1], True)
+ test_selection({'playlist_items': ':6'}, INDICES[:6])
+ test_selection({'playlist_items': ':-6'}, INDICES[:-5], True)
+ test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True)
+ test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True)
+
+ test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True)
+ test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True)
+ test_selection({'playlist_items': ':inf:-1'}, [], True)
+ test_selection({'playlist_items': '0-2:2'}, [2])
+ test_selection({'playlist_items': '1-:2'}, INDICES[::2], True)
+ test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True)
+
+ test_selection({'playlist_items': '10::3'}, [10], True)
+ test_selection({'playlist_items': '-1::3'}, [10], True)
+ test_selection({'playlist_items': '11::3'}, [], True)
+ test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True)
+ test_selection({'playlist_items': '-15::15'}, [], True)
+
def test_urlopen_no_file_protocol(self):
# see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()
- self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd')
+ self.assertRaises(urllib.error.URLError, ydl.urlopen, 'file:///etc/passwd')
def test_do_not_override_ie_key_in_url_transparent(self):
ydl = YDL()
@@ -1126,7 +1185,7 @@ class TestYoutubeDL(unittest.TestCase):
def _entries(self):
for n in range(3):
- video_id = compat_str(n)
+ video_id = str(n)
yield {
'_type': 'url_transparent',
'ie_key': VideoIE.ie_key(),
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 6280e1f2c..0d4e7dc97 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
-import re
import sys
-import tempfile
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import re
+import tempfile
+
from yt_dlp.utils import YoutubeDLCookieJar
diff --git a/test/test_aes.py b/test/test_aes.py
index 2b7b7cf54..037246588 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,6 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import base64
from yt_dlp.aes import (
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index e1012f69b..ff248432b 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,8 +7,8 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import is_download_test, try_rm
+from test.helper import is_download_test, try_rm
from yt_dlp import YoutubeDL
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index b6019554e..848c96ff0 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
+
# Allow direct execution
-import collections
import os
import sys
import unittest
@@ -8,8 +8,9 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import gettestcases
+import collections
+from test.helper import gettestcases
from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors
diff --git a/test/test_cache.py b/test/test_cache.py
index 14e54ba20..ce1624b68 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -1,15 +1,16 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
-import shutil
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL
+import shutil
+from test.helper import FakeYDL
from yt_dlp.cache import Cache
diff --git a/test/test_compat.py b/test/test_compat.py
index 224175c65..c6a8f4ecb 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -7,16 +8,14 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import struct
+import urllib.parse
+
from yt_dlp import compat
from yt_dlp.compat import (
compat_etree_fromstring,
compat_expanduser,
- compat_getenv,
- compat_setenv,
- compat_str,
- compat_struct_unpack,
compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
compat_urllib_parse_urlencode,
)
@@ -26,28 +25,19 @@ class TestCompat(unittest.TestCase):
with self.assertWarns(DeprecationWarning):
compat.compat_basestring
- compat.asyncio.events # Must not raise error
-
- def test_compat_getenv(self):
- test_str = 'тест'
- compat_setenv('yt_dlp_COMPAT_GETENV', test_str)
- self.assertEqual(compat_getenv('yt_dlp_COMPAT_GETENV'), test_str)
+ with self.assertWarns(DeprecationWarning):
+ compat.WINDOWS_VT_MODE
- def test_compat_setenv(self):
- test_var = 'yt_dlp_COMPAT_SETENV'
- test_str = 'тест'
- compat_setenv(test_var, test_str)
- compat_getenv(test_var)
- self.assertEqual(compat_getenv(test_var), test_str)
+ compat.asyncio.events # Must not raise error
def test_compat_expanduser(self):
old_home = os.environ.get('HOME')
test_str = R'C:\Documents and Settings\тест\Application Data'
try:
- compat_setenv('HOME', test_str)
+ os.environ['HOME'] = test_str
self.assertEqual(compat_expanduser('~'), test_str)
finally:
- compat_setenv('HOME', old_home or '')
+ os.environ['HOME'] = old_home or ''
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
@@ -69,8 +59,8 @@ class TestCompat(unittest.TestCase):
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
def test_compat_urllib_parse_unquote_plus(self):
- self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def')
- self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def')
+ self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
+ self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
def test_compat_urllib_parse_urlencode(self):
self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
@@ -91,11 +81,11 @@ class TestCompat(unittest.TestCase):
</root>
'''
doc = compat_etree_fromstring(xml.encode())
- self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
- self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
- self.assertTrue(isinstance(doc.find('normal').text, compat_str))
- self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
- self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
+ self.assertTrue(isinstance(doc.attrib['foo'], str))
+ self.assertTrue(isinstance(doc.attrib['spam'], str))
+ self.assertTrue(isinstance(doc.find('normal').text, str))
+ self.assertTrue(isinstance(doc.find('chinese').text, str))
+ self.assertTrue(isinstance(doc.find('foo/bar').text, str))
def test_compat_etree_fromstring_doctype(self):
xml = '''<?xml version="1.0"?>
@@ -104,7 +94,7 @@ class TestCompat(unittest.TestCase):
compat_etree_fromstring(xml)
def test_struct_unpack(self):
- self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
+ self.assertEqual(struct.unpack('!B', b'\x00'), (0,))
if __name__ == '__main__':
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 5bfaec367..cfeb11b55 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -14,16 +14,16 @@ from yt_dlp.cookies import (
class Logger:
- def debug(self, message):
+ def debug(self, message, *args, **kwargs):
print(f'[verbose] {message}')
- def info(self, message):
+ def info(self, message, *args, **kwargs):
print(message)
- def warning(self, message, only_once=False):
+ def warning(self, message, *args, **kwargs):
self.error(message)
- def error(self, message):
+ def error(self, message, *args, **kwargs):
raise Exception(message)
diff --git a/test/test_download.py b/test/test_download.py
index 9a83bee2f..b397b3ecf 100755
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -1,14 +1,19 @@
#!/usr/bin/env python3
+
# Allow direct execution
-import hashlib
-import json
import os
-import socket
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import hashlib
+import http.client
+import json
+import socket
+import urllib.error
+
from test.helper import (
assertGreaterEqual,
expect_info_dict,
@@ -20,12 +25,7 @@ from test.helper import (
try_rm,
)
-import yt_dlp.YoutubeDL
-from yt_dlp.compat import (
- compat_http_client,
- compat_HTTPError,
- compat_urllib_error,
-)
+import yt_dlp.YoutubeDL # isort: split
from yt_dlp.extractor import get_info_extractor
from yt_dlp.utils import (
DownloadError,
@@ -43,7 +43,7 @@ class YoutubeDL(yt_dlp.YoutubeDL):
self.processed_info_dicts = []
super().__init__(*args, **kwargs)
- def report_warning(self, message):
+ def report_warning(self, message, *args, **kwargs):
# Don't accept warnings during tests
raise ExtractorError(message)
@@ -102,9 +102,10 @@ def generator(test_case, tname):
def print_skipping(reason):
print('Skipping %s: %s' % (test_case['name'], reason))
+ self.skipTest(reason)
+
if not ie.working():
print_skipping('IE marked as not _WORKING')
- return
for tc in test_cases:
info_dict = tc.get('info_dict', {})
@@ -118,11 +119,10 @@ def generator(test_case, tname):
if 'skip' in test_case:
print_skipping(test_case['skip'])
- return
+
for other_ie in other_ies:
if not other_ie.working():
print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
- return
params = get_params(test_case.get('params', {}))
params['outtmpl'] = tname + '_' + params['outtmpl']
@@ -167,7 +167,7 @@ def generator(test_case, tname):
force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one
- if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
+ if not err.exc_info[0] in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine) or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503):
raise
if try_num == RETRIES:
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index c33308064..cce7c59e2 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -1,17 +1,19 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
-import re
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.server
+import re
import threading
-from test.helper import http_server_port, try_rm
+from test.helper import http_server_port, try_rm
from yt_dlp import YoutubeDL
-from yt_dlp.compat import compat_http_server
from yt_dlp.downloader.http import HttpFD
from yt_dlp.utils import encodeFilename
@@ -21,7 +23,7 @@ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
TEST_SIZE = 10 * 1024
-class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -78,7 +80,7 @@ class FakeLogger:
class TestHttpFD(unittest.TestCase):
def setUp(self):
- self.httpd = compat_http_server.HTTPServer(
+ self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
diff --git a/test/test_execution.py b/test/test_execution.py
index 6efd432e9..1d15fddab 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python3
-import contextlib
+
+# Allow direct execution
import os
-import subprocess
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import contextlib
+import subprocess
+
from yt_dlp.utils import encodeArgument
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
diff --git a/test/test_http.py b/test/test_http.py
index 146df7500..b1aac7720 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,17 +7,19 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import http.server
import ssl
import threading
-from test.helper import http_server_port
+import urllib.request
+from test.helper import http_server_port
from yt_dlp import YoutubeDL
-from yt_dlp.compat import compat_http_server, compat_urllib_request
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -53,7 +56,7 @@ class FakeLogger:
class TestHTTP(unittest.TestCase):
def setUp(self):
- self.httpd = compat_http_server.HTTPServer(
+ self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
@@ -64,7 +67,7 @@ class TestHTTP(unittest.TestCase):
class TestHTTPS(unittest.TestCase):
def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
- self.httpd = compat_http_server.HTTPServer(
+ self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.load_cert_chain(certfn, None)
@@ -90,7 +93,7 @@ class TestClientCert(unittest.TestCase):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
cacertfn = os.path.join(self.certdir, 'ca.crt')
- self.httpd = compat_http_server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.verify_mode = ssl.CERT_REQUIRED
sslctx.load_verify_locations(cafile=cacertfn)
@@ -130,7 +133,7 @@ class TestClientCert(unittest.TestCase):
def _build_proxy_handler(name):
- class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
proxy_name = name
def log_message(self, format, *args):
@@ -146,14 +149,14 @@ def _build_proxy_handler(name):
class TestProxy(unittest.TestCase):
def setUp(self):
- self.proxy = compat_http_server.HTTPServer(
+ self.proxy = http.server.HTTPServer(
('127.0.0.1', 0), _build_proxy_handler('normal'))
self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
- self.geo_proxy = compat_http_server.HTTPServer(
+ self.geo_proxy = http.server.HTTPServer(
('127.0.0.1', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
@@ -170,7 +173,7 @@ class TestProxy(unittest.TestCase):
response = ydl.urlopen(url).read().decode()
self.assertEqual(response, f'normal: {url}')
- req = compat_urllib_request.Request(url)
+ req = urllib.request.Request(url)
req.add_header('Ytdl-request-proxy', geo_proxy)
response = ydl.urlopen(req).read().decode()
self.assertEqual(response, f'geo: {url}')
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 872c58c8f..4277cabe0 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,6 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
from yt_dlp.jsinterp import JSInterpreter
diff --git a/test/test_netrc.py b/test/test_netrc.py
index f7a0b33d2..dc708d974 100644
--- a/test/test_netrc.py
+++ b/test/test_netrc.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
import os
import sys
import unittest
diff --git a/test/test_overwrites.py b/test/test_overwrites.py
index a6d5bae40..6954c07f9 100644
--- a/test/test_overwrites.py
+++ b/test/test_overwrites.py
@@ -1,11 +1,15 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
-import subprocess
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import subprocess
+
from test.helper import is_download_test, try_rm
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py
index e84a08f29..3778d1794 100644
--- a/test/test_post_hooks.py
+++ b/test/test_post_hooks.py
@@ -1,13 +1,15 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import get_params, is_download_test, try_rm
-import yt_dlp.YoutubeDL
+from test.helper import get_params, is_download_test, try_rm
+import yt_dlp.YoutubeDL # isort: split
from yt_dlp.utils import DownloadError
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index 9d8a4dcc5..c49e3ede0 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,6 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_shlex_quote
from yt_dlp.postprocessor import (
diff --git a/test/test_socks.py b/test/test_socks.py
index a8b068cdd..6651290d2 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,11 +7,12 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
import random
import subprocess
-from test.helper import FakeYDL, get_params, is_download_test
+import urllib.request
-from yt_dlp.compat import compat_str, compat_urllib_request
+from test.helper import FakeYDL, get_params, is_download_test
@is_download_test
@@ -51,7 +53,7 @@ class TestMultipleSocks(unittest.TestCase):
if params is None:
return
ydl = FakeYDL()
- req = compat_urllib_request.Request('http://yt-dl.org/ip')
+ req = urllib.request.Request('http://yt-dl.org/ip')
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
self.assertEqual(
ydl.urlopen(req).read().decode(),
@@ -62,7 +64,7 @@ class TestMultipleSocks(unittest.TestCase):
if params is None:
return
ydl = FakeYDL()
- req = compat_urllib_request.Request('https://yt-dl.org/ip')
+ req = urllib.request.Request('https://yt-dl.org/ip')
req.add_header('Ytdl-request-proxy', params['secondary_proxy'])
self.assertEqual(
ydl.urlopen(req).read().decode(),
@@ -99,13 +101,13 @@ class TestSocks(unittest.TestCase):
return ydl.urlopen('http://yt-dl.org/ip').read().decode()
def test_socks4(self):
- self.assertTrue(isinstance(self._get_ip('socks4'), compat_str))
+ self.assertTrue(isinstance(self._get_ip('socks4'), str))
def test_socks4a(self):
- self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str))
+ self.assertTrue(isinstance(self._get_ip('socks4a'), str))
def test_socks5(self):
- self.assertTrue(isinstance(self._get_ip('socks5'), compat_str))
+ self.assertTrue(isinstance(self._get_ip('socks5'), str))
if __name__ == '__main__':
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 182bd7a4b..57362895f 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,8 +7,8 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, is_download_test, md5
+from test.helper import FakeYDL, is_download_test, md5
from yt_dlp.extractor import (
NPOIE,
NRKTVIE,
@@ -38,6 +39,9 @@ class BaseTestSubtitles(unittest.TestCase):
self.DL = FakeYDL()
self.ie = self.IE()
self.DL.add_info_extractor(self.ie)
+ if not self.IE.working():
+ print('Skipping: %s marked as not _WORKING' % self.IE.ie_key())
+ self.skipTest('IE marked as not _WORKING')
def getInfoDict(self):
info_dict = self.DL.extract_info(self.url, download=False)
@@ -57,6 +61,21 @@ class BaseTestSubtitles(unittest.TestCase):
@is_download_test
class TestYoutubeSubtitles(BaseTestSubtitles):
+ # Available subtitles for QRS8MkLhQmM:
+ # Language formats
+ # ru vtt, ttml, srv3, srv2, srv1, json3
+ # fr vtt, ttml, srv3, srv2, srv1, json3
+ # en vtt, ttml, srv3, srv2, srv1, json3
+ # nl vtt, ttml, srv3, srv2, srv1, json3
+ # de vtt, ttml, srv3, srv2, srv1, json3
+ # ko vtt, ttml, srv3, srv2, srv1, json3
+ # it vtt, ttml, srv3, srv2, srv1, json3
+ # zh-Hant vtt, ttml, srv3, srv2, srv1, json3
+ # hi vtt, ttml, srv3, srv2, srv1, json3
+ # pt-BR vtt, ttml, srv3, srv2, srv1, json3
+ # es-MX vtt, ttml, srv3, srv2, srv1, json3
+ # ja vtt, ttml, srv3, srv2, srv1, json3
+ # pl vtt, ttml, srv3, srv2, srv1, json3
url = 'QRS8MkLhQmM'
IE = YoutubeIE
@@ -65,47 +84,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
- self.assertEqual(md5(subtitles['en']), '688dd1ce0981683867e7fe6fde2a224b')
- self.assertEqual(md5(subtitles['it']), '31324d30b8430b309f7f5979a504a769')
+ self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+ self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9')
for lang in ['fr', 'de']:
self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang)
- def test_youtube_subtitles_ttml_format(self):
+ def _test_subtitles_format(self, fmt, md5_hash, lang='en'):
self.DL.params['writesubtitles'] = True
- self.DL.params['subtitlesformat'] = 'ttml'
+ self.DL.params['subtitlesformat'] = fmt
subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'c97ddf1217390906fa9fbd34901f3da2')
+ self.assertEqual(md5(subtitles[lang]), md5_hash)
+
+ def test_youtube_subtitles_ttml_format(self):
+ self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2')
def test_youtube_subtitles_vtt_format(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['subtitlesformat'] = 'vtt'
- subtitles = self.getSubtitles()
- self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d')
+ self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d')
- def test_youtube_automatic_captions(self):
- self.url = '8YoUxe5ncPo'
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertTrue(subtitles['it'] is not None)
+ def test_youtube_subtitles_json3_format(self):
+ self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b')
- def test_youtube_no_automatic_captions(self):
- self.url = 'QRS8MkLhQmM'
+ def _test_automatic_captions(self, url, lang):
+ self.url = url
self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslangs'] = [lang]
subtitles = self.getSubtitles()
- self.assertTrue(not subtitles)
+ self.assertTrue(subtitles[lang] is not None)
+ def test_youtube_automatic_captions(self):
+ # Available automatic captions for 8YoUxe5ncPo:
+ # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3)
+ # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr,
+ # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da,
+ # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv,
+ # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy,
+ # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur,
+ # mt, ms, mr, ug, ta, my, af, sw, is, am,
+ # *it*, iw, sv, ar,
+ # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi,
+ # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl,
+ # ky, sd
+ # ...
+ self._test_automatic_captions('8YoUxe5ncPo', 'it')
+
+ @unittest.skip('Video unavailable')
def test_youtube_translated_subtitles(self):
- # This video has a subtitles track, which can be translated
- self.url = 'i0ZabxXmH4Y'
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslangs'] = ['it']
- subtitles = self.getSubtitles()
- self.assertTrue(subtitles['it'] is not None)
+ # This video has a subtitles track, which can be translated (#4555)
+ self._test_automatic_captions('Ky9eprVWzlI', 'it')
def test_youtube_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
- self.url = 'n5BB19UTcdA'
+ # Available automatic captions for 8YoUxe5ncPo:
+ # ...
+ # 8YoUxe5ncPo has no subtitles
+ self.url = '8YoUxe5ncPo'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@@ -137,6 +169,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestTedSubtitles(BaseTestSubtitles):
url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html'
IE = TedTalkIE
@@ -162,12 +195,12 @@ class TestVimeoSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'})
- self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888')
- self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8')
+ self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1')
+ self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac')
def test_nosubtitles(self):
self.DL.expect_warning('video doesn\'t have subtitles')
- self.url = 'http://vimeo.com/56015672'
+ self.url = 'http://vimeo.com/68093876'
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
@@ -175,6 +208,7 @@ class TestVimeoSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestWallaSubtitles(BaseTestSubtitles):
url = 'http://vod.walla.co.il/movie/2705958/the-yes-men'
IE = WallaIE
@@ -197,6 +231,7 @@ class TestWallaSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky'
IE = CeskaTelevizeIE
@@ -219,6 +254,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestLyndaSubtitles(BaseTestSubtitles):
url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html'
IE = LyndaIE
@@ -232,6 +268,7 @@ class TestLyndaSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestNPOSubtitles(BaseTestSubtitles):
url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860'
IE = NPOIE
@@ -245,6 +282,7 @@ class TestNPOSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestMTVSubtitles(BaseTestSubtitles):
url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
IE = ComedyCentralIE
@@ -269,8 +307,8 @@ class TestNRKSubtitles(BaseTestSubtitles):
self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), {'no'})
- self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2')
+ self.assertEqual(set(subtitles.keys()), {'nb-ttv'})
+ self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149')
@is_download_test
@@ -295,6 +333,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken - DRM only')
class TestVikiSubtitles(BaseTestSubtitles):
url = 'http://www.viki.com/videos/1060846v-punch-episode-18'
IE = VikiIE
@@ -323,6 +362,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles):
@is_download_test
+@unittest.skip('IE broken')
class TestThePlatformFeedSubtitles(BaseTestSubtitles):
url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207'
IE = ThePlatformFeedIE
@@ -360,7 +400,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'en'})
- self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
def test_subtitles_in_page(self):
self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree'
@@ -368,7 +408,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles):
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(set(subtitles.keys()), {'en'})
- self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c')
+ self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045')
@is_download_test
diff --git a/test/test_utils.py b/test/test_utils.py
index 184c39cff..8024a8e7c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
+
# Allow direct execution
-import contextlib
import os
import sys
import unittest
@@ -8,19 +8,16 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-# Various small unit tests
+import contextlib
import io
import itertools
import json
import xml.etree.ElementTree
from yt_dlp.compat import (
- compat_chr,
compat_etree_fromstring,
- compat_getenv,
compat_HTMLParseError,
compat_os_name,
- compat_setenv,
)
from yt_dlp.utils import (
Config,
@@ -266,20 +263,20 @@ class TestUtil(unittest.TestCase):
def env(var):
return f'%{var}%' if sys.platform == 'win32' else f'${var}'
- compat_setenv('yt_dlp_EXPATH_PATH', 'expanded')
+ os.environ['yt_dlp_EXPATH_PATH'] = 'expanded'
self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded')
old_home = os.environ.get('HOME')
test_str = R'C:\Documents and Settings\тест\Application Data'
try:
- compat_setenv('HOME', test_str)
- self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
- self.assertEqual(expand_path('~'), compat_getenv('HOME'))
+ os.environ['HOME'] = test_str
+ self.assertEqual(expand_path(env('HOME')), os.getenv('HOME'))
+ self.assertEqual(expand_path('~'), os.getenv('HOME'))
self.assertEqual(
expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')),
- '%s/expanded' % compat_getenv('HOME'))
+ '%s/expanded' % os.getenv('HOME'))
finally:
- compat_setenv('HOME', old_home or '')
+ os.environ['HOME'] = old_home or ''
def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
@@ -1128,7 +1125,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
# "Narrow" Python builds don't support unicode code points outside BMP.
try:
- compat_chr(0x10000)
+ chr(0x10000)
supports_outside_bmp = True
except ValueError:
supports_outside_bmp = False
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
index 657994074..21ce10a1f 100644
--- a/test/test_verbose_output.py
+++ b/test/test_verbose_output.py
@@ -1,11 +1,15 @@
#!/usr/bin/env python3
+
+# Allow direct execution
import os
-import subprocess
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import subprocess
+
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 66611e236..c2dd0ac30 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
@@ -6,8 +7,8 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, is_download_test
+from test.helper import FakeYDL, is_download_test
from yt_dlp.extractor import YoutubeIE, YoutubeTabIE
diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py
index 36f8be689..81be5d3c9 100644
--- a/test/test_youtube_misc.py
+++ b/test/test_youtube_misc.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Allow direct execution
import os
import sys
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 2c2013295..4fc2917e5 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -1,18 +1,19 @@
#!/usr/bin/env python3
+
# Allow direct execution
-import contextlib
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import contextlib
import re
import string
import urllib.request
-from test.helper import FakeYDL, is_download_test
-from yt_dlp.compat import compat_str
+from test.helper import FakeYDL, is_download_test
from yt_dlp.extractor import YoutubeIE
from yt_dlp.jsinterp import JSInterpreter
@@ -157,7 +158,7 @@ def t_factory(name, sig_func, url_pattern):
def signature(jscode, sig_input):
func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
src_sig = (
- compat_str(string.printable[:sig_input])
+ str(string.printable[:sig_input])
if isinstance(sig_input, int) else sig_input)
return func(src_sig)
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index c8c14aafc..000000000
--- a/tox.ini
+++ /dev/null
@@ -1,15 +0,0 @@
-[tox]
-envlist = py26,py27,py33,py34,py35
-
-# Needed?
-[testenv]
-deps =
- nose
- coverage
-# We need a valid $HOME for test_compat_expanduser
-passenv = HOME
-defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
- --exclude test_subtitles.py --exclude test_write_annotations.py
- --exclude test_youtube_lists.py --exclude test_socks.py
-commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=yt_dlp --cover-html
- # test.test_download:TestDownload.test_NowVideo
diff --git a/yt-dlp.sh b/yt-dlp.sh
index 0321a3362..22a69250c 100644
--- a/yt-dlp.sh
+++ b/yt-dlp.sh
@@ -1,2 +1,2 @@
-#!/bin/sh
+#!/usr/bin/env sh
exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@"
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 94f8dcaef..de8a8c4d2 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
import collections
import contextlib
import datetime
@@ -26,20 +25,11 @@ import urllib.request
from string import ascii_letters
from .cache import Cache
-from .compat import (
- compat_get_terminal_size,
- compat_os_name,
- compat_shlex_quote,
- compat_str,
- compat_urllib_error,
- compat_urllib_request,
- windows_enable_vt_mode,
-)
+from .compat import HAS_LEGACY as compat_has_legacy
+from .compat import compat_os_name, compat_shlex_quote
from .cookies import load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
-from .extractor import _LAZY_LOADER
-from .extractor import _PLUGIN_CLASSES as plugin_extractors
from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
@@ -59,6 +49,7 @@ from .postprocessor import (
)
from .utils import (
DEFAULT_OUTTMPL,
+ IDENTITY,
LINK_TEMPLATES,
NO_DEFAULT,
NUMBER_RE,
@@ -75,13 +66,13 @@ from .utils import (
ExtractorError,
GeoRestrictedError,
HEADRequest,
- InAdvancePagedList,
ISO3166Utils,
LazyList,
MaxDownloadsReached,
Namespace,
PagedList,
PerRequestProxyHandler,
+ PlaylistEntries,
Popen,
PostProcessingError,
ReExtractInfo,
@@ -141,6 +132,7 @@ from .utils import (
url_basename,
variadic,
version_tuple,
+ windows_enable_vt_mode,
write_json_file,
write_string,
)
@@ -193,13 +185,6 @@ class YoutubeDL:
For compatibility, a single list is also accepted
print_to_file: A dict with keys WHEN (same as forceprint) mapped to
a list of tuples with (template, filename)
- forceurl: Force printing final URL. (Deprecated)
- forcetitle: Force printing title. (Deprecated)
- forceid: Force printing ID. (Deprecated)
- forcethumbnail: Force printing thumbnail URL. (Deprecated)
- forcedescription: Force printing description. (Deprecated)
- forcefilename: Force printing final filename. (Deprecated)
- forceduration: Force printing duration. (Deprecated)
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
@@ -249,11 +234,9 @@ class YoutubeDL:
and don't overwrite any file if False
For compatibility with youtube-dl,
"nooverwrites" may also be used instead
- playliststart: Playlist item to start at.
- playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download.
- playlistreverse: Download playlist items in reverse order.
playlistrandom: Download playlist items in random order.
+ lazy_playlist: Process playlist entries as they are received.
matchtitle: Download only matching titles.
rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance.
@@ -276,9 +259,6 @@ class YoutubeDL:
writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Deprecated - Use subtitleslangs = ['all']
- Downloads all the subtitles of the video
- (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download (can be regex).
@@ -332,7 +312,6 @@ class YoutubeDL:
bidi_workaround: Work around buggy terminals without bidirectional text
support, using fridibi
debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well (deprecated)
default_search: Prepend this string if an input url is not valid.
'auto' for elaborate guessing
encoding: Use this encoding instead of the system-specified.
@@ -348,10 +327,6 @@ class YoutubeDL:
* when: When to run the postprocessor. Allowed values are
the entries of utils.POSTPROCESS_WHEN
Assumed to be 'post_process' if not given
- post_hooks: Deprecated - Register a custom postprocessor instead
- A list of functions that get called as the final step
- for each video file, after all postprocessors have been
- called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
@@ -396,8 +371,6 @@ class YoutubeDL:
- "detect_or_warn": check whether we can do anything
about it, warn otherwise (default)
source_address: Client-side IP address to bind to.
- call_home: Boolean, true iff we are allowed to contact the
- yt-dlp servers for debugging. (BROKEN)
sleep_interval_requests: Number of seconds to sleep between requests
during extraction
sleep_interval: Number of seconds to sleep before each download when
@@ -432,17 +405,10 @@ class YoutubeDL:
geo_bypass_ip_block:
IP range in CIDR notation that will be used similarly to
geo_bypass_country
-
- The following options determine which downloader is picked:
external_downloader: A dictionary of protocol keys and the executable of the
external downloader to use for it. The allowed protocols
are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
Set the value to 'native' to use the native downloader
- hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
- or {'m3u8': 'ffmpeg'} instead.
- Use the native HLS downloader instead of ffmpeg/avconv
- if True, otherwise use ffmpeg/avconv if False, otherwise
- use downloader suggested by extractor if None.
compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort
@@ -452,6 +418,16 @@ class YoutubeDL:
Allowed keys are 'download', 'postprocess',
'download-title' (console title) and 'postprocess-title'.
The template is mapped on a dictionary with keys 'progress' and 'info'
+ retry_sleep_functions: Dictionary of functions that takes the number of attempts
+ as argument and returns the time to sleep in seconds.
+ Allowed keys are 'http', 'fragment', 'file_access'
+ download_ranges: A function that gets called for every video with the signature
+ (info_dict, *, ydl) -> Iterable[Section].
+ Only the returned sections will be downloaded. Each Section contains:
+ * start_time: Start time of the section in seconds
+ * end_time: End time of the section in seconds
+ * title: Section title (Optional)
+ * index: Section number (Optional)
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see yt_dlp/downloader/common.py):
@@ -461,8 +437,6 @@ class YoutubeDL:
external_downloader_args, concurrent_fragment_downloads.
The following options are used by the post processors:
- prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
@@ -482,12 +456,54 @@ class YoutubeDL:
See "EXTRACTOR ARGUMENTS" for details.
Eg: {'youtube': {'skip': ['dash', 'hls']}}
mark_watched: Mark videos watched (even with --simulate). Only for YouTube
- youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
+
+ The following options are deprecated and may be removed in the future:
+
+ playliststart: - Use playlist_items
+ Playlist item to start at.
+ playlistend: - Use playlist_items
+ Playlist item to end at.
+ playlistreverse: - Use playlist_items
+ Download playlist items in reverse order.
+ forceurl: - Use forceprint
+ Force printing final URL.
+ forcetitle: - Use forceprint
+ Force printing title.
+ forceid: - Use forceprint
+ Force printing ID.
+ forcethumbnail: - Use forceprint
+ Force printing thumbnail URL.
+ forcedescription: - Use forceprint
+ Force printing description.
+ forcefilename: - Use forceprint
+ Force printing final filename.
+ forceduration: - Use forceprint
+ Force printing duration.
+ allsubtitles: - Use subtitleslangs = ['all']
+ Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ include_ads: - Doesn't work
+ Download ads as well
+ call_home: - Not implemented
+ Boolean, true iff we are allowed to contact the
+ yt-dlp servers for debugging.
+ post_hooks: - Register a custom postprocessor
+ A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
+ hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
+ Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+ prefer_ffmpeg: - avconv support is deprecated
+ If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ youtube_include_dash_manifest: - Use extractor_args
If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
care about DASH. (only for youtube)
- youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
+ youtube_include_hls_manifest: - Use extractor_args
If True (default), HLS manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
@@ -544,21 +560,27 @@ class YoutubeDL:
self.cache = Cache(self)
windows_enable_vt_mode()
- self._out_files = {
- 'error': sys.stderr,
- 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
- 'console': None if compat_os_name == 'nt' else next(
+ stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
+ self._out_files = Namespace(
+ out=stdout,
+ error=sys.stderr,
+ screen=sys.stderr if self.params.get('quiet') else stdout,
+ console=None if compat_os_name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
- }
- self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
- self._allow_colors = {
- type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
- for type_ in ('screen', 'error')
- }
-
- if sys.version_info < (3, 6):
- self.report_warning(
- 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
+ )
+ self._allow_colors = Namespace(**{
+ type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
+ for type_, stream in self._out_files.items_ if type_ != 'console'
+ })
+
+ MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
+ current_version = sys.version_info[:2]
+ if current_version < MIN_RECOMMENDED:
+ msg = 'Support for Python version %d.%d has been deprecated and will break in future versions of yt-dlp'
+ if current_version < MIN_SUPPORTED:
+ msg = 'Python version %d.%d is no longer supported'
+ self.deprecation_warning(
+ f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
if self.params.get('allow_unplayable_formats'):
self.report_warning(
@@ -586,7 +608,10 @@ class YoutubeDL:
for msg in self.params.get('_deprecation_warnings', []):
self.deprecation_warning(msg)
- if 'list-formats' in self.params.get('compat_opts', []):
+ self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
+ if not compat_has_legacy:
+ self.params['compat_opts'].add('no-compat-legacy')
+ if 'list-formats' in self.params['compat_opts']:
self.params['listformats_table'] = False
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
@@ -610,15 +635,9 @@ class YoutubeDL:
try:
import pty
master, slave = pty.openpty()
- width = compat_get_terminal_size().columns
- if width is None:
- width_args = []
- else:
- width_args = ['-w', str(width)]
- sp_kwargs = dict(
- stdin=subprocess.PIPE,
- stdout=slave,
- stderr=self._out_files['error'])
+ width = shutil.get_terminal_size().columns
+ width_args = [] if width is None else ['-w', str(width)]
+ sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
try:
self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
except OSError:
@@ -647,7 +666,7 @@ class YoutubeDL:
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- self.outtmpl_dict = self.parse_outtmpl()
+ self._parse_outtmpl()
# Creating format selector here allows us to catch syntax errors before the extraction
self.format_selector = (
@@ -747,6 +766,7 @@ class YoutubeDL:
def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
+ assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
self._pps[when].append(pp)
pp.set_downloader(self)
@@ -770,7 +790,7 @@ class YoutubeDL:
return message
assert hasattr(self, '_output_process')
- assert isinstance(message, compat_str)
+ assert isinstance(message, str)
line_count = message.count('\n') + 1
self._output_process.stdin.write((message + '\n').encode())
self._output_process.stdin.flush()
@@ -789,9 +809,9 @@ class YoutubeDL:
"""Print message to stdout"""
if quiet is not None:
self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
- self._write_string(
- '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._out_files['print'])
+ if skip_eol is not False:
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
def to_screen(self, message, skip_eol=False, quiet=None):
"""Print message to screen if not in quiet mode"""
@@ -802,20 +822,20 @@ class YoutubeDL:
return
self._write_string(
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
- self._out_files['screen'])
+ self._out_files.screen)
def to_stderr(self, message, only_once=False):
"""Print message to stderr"""
- assert isinstance(message, compat_str)
+ assert isinstance(message, str)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
+ self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
def _send_console_code(self, code):
- if compat_os_name == 'nt' or not self._out_files['console']:
+ if compat_os_name == 'nt' or not self._out_files.console:
return
- self._write_string(code, self._out_files['console'])
+ self._write_string(code, self._out_files.console)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
@@ -905,13 +925,14 @@ class YoutubeDL:
text = fallback
return format_text(text, f) if allow_colors else text if fallback is None else fallback
+ def _format_out(self, *args, **kwargs):
+ return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
+
def _format_screen(self, *args, **kwargs):
- return self._format_text(
- self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
+ return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
def _format_err(self, *args, **kwargs):
- return self._format_text(
- self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
+ return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
@@ -942,7 +963,7 @@ class YoutubeDL:
'''Log debug message or Print message to stderr'''
if not self.params.get('verbose', False):
return
- message = '[debug] %s' % message
+ message = f'[debug] {message}'
if self.params.get('logger'):
self.params['logger'].debug(message)
else:
@@ -973,21 +994,19 @@ class YoutubeDL:
self.report_warning(msg)
def parse_outtmpl(self):
- outtmpl_dict = self.params.get('outtmpl', {})
- if not isinstance(outtmpl_dict, dict):
- outtmpl_dict = {'default': outtmpl_dict}
- # Remove spaces in the default template
- if self.params.get('restrictfilenames'):
+ self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
+ self._parse_outtmpl()
+ return self.params['outtmpl']
+
+ def _parse_outtmpl(self):
+ sanitize = IDENTITY
+ if self.params.get('restrictfilenames'): # Remove spaces in the default template
sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
- else:
- sanitize = lambda x: x
- outtmpl_dict.update({
- k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
- if outtmpl_dict.get(k) is None})
- for _, val in outtmpl_dict.items():
- if isinstance(val, bytes):
- self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
- return outtmpl_dict
+
+ outtmpl = self.params.setdefault('outtmpl', {})
+ if not isinstance(outtmpl, dict):
+ self.params['outtmpl'] = outtmpl = {'default': outtmpl}
+ outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
def get_output_path(self, dir_type='', filename=None):
paths = self.params.get('paths', {})
@@ -1038,6 +1057,7 @@ class YoutubeDL:
def _copy_infodict(info_dict):
info_dict = dict(info_dict)
info_dict.pop('__postprocessors', None)
+ info_dict.pop('__pending_error', None)
return info_dict
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
@@ -1135,7 +1155,7 @@ class YoutubeDL:
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
return sanitize_filename(str(value), restricted=restricted, is_id=(
bool(re.search(r'(^|[_.])id(\.|$)', key))
- if 'filename-sanitization' in self.params.get('compat_opts', [])
+ if 'filename-sanitization' in self.params['compat_opts']
else NO_DEFAULT))
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
@@ -1224,7 +1244,7 @@ class YoutubeDL:
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
if outtmpl is None:
- outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
+ outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
try:
outtmpl = self._outtmpl_expandpath(outtmpl)
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
@@ -1390,7 +1410,7 @@ class YoutubeDL:
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
- def __handle_extraction_exceptions(func):
+ def _handle_extraction_exceptions(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
while True:
@@ -1463,7 +1483,7 @@ class YoutubeDL:
self.to_screen('')
raise
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
@@ -1529,6 +1549,7 @@ class YoutubeDL:
self.add_extra_info(info_copy, extra_info)
info_copy, _ = self.pre_process(info_copy)
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
+ self._raise_pending_errors(info_copy)
if self.params.get('force_write_download_archive', False):
self.record_download_archive(info_copy)
return ie_result
@@ -1536,10 +1557,11 @@ class YoutubeDL:
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
ie_result = self.process_video_result(ie_result, download=download)
+ self._raise_pending_errors(ie_result)
additional_urls = (ie_result or {}).get('additional_urls')
if additional_urls:
# TODO: Improve MetadataParserPP to allow setting a list
- if isinstance(additional_urls, compat_str):
+ if isinstance(additional_urls, str):
additional_urls = [additional_urls]
self.to_screen(
'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
@@ -1570,9 +1592,13 @@ class YoutubeDL:
if not info:
return info
+ exempted_fields = {'_type', 'url', 'ie_key'}
+ if not ie_result.get('section_end') and ie_result.get('section_start') is None:
+ # For video clips, the id etc of the clip extractor should be used
+ exempted_fields |= {'id', 'extractor', 'extractor_key'}
+
new_result = info.copy()
- new_result.update(filter_dict(ie_result, lambda k, v: (
- v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
+ new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
@@ -1644,112 +1670,31 @@ class YoutubeDL:
}
def __process_playlist(self, ie_result, download):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- if 'entries' not in ie_result:
- raise EntryNotInPlaylist('There are no entries')
-
- MissingEntry = object()
- incomplete_entries = bool(ie_result.get('requested_entries'))
- if incomplete_entries:
- def fill_missing_entries(entries, indices):
- ret = [MissingEntry] * max(indices)
- for i, entry in zip(indices, entries):
- ret[i - 1] = entry
- return ret
- ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1)
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+ """Process each entry in the playlist"""
+ title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
+ self.to_screen(f'[download] Downloading playlist: {title}')
- ie_entries = ie_result['entries']
- if isinstance(ie_entries, list):
- playlist_count = len(ie_entries)
- msg = f'Collected {playlist_count} videos; downloading %d of them'
- ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
+ all_entries = PlaylistEntries(self, ie_result)
+ entries = orderedSet(all_entries.get_requested_items(), lazy=True)
- def get_entry(i):
- return ie_entries[i - 1]
+ lazy = self.params.get('lazy_playlist')
+ if lazy:
+ resolved_entries, n_entries = [], 'N/A'
+ ie_result['requested_entries'], ie_result['entries'] = None, None
else:
- msg = 'Downloading %d videos'
- if not isinstance(ie_entries, (PagedList, LazyList)):
- ie_entries = LazyList(ie_entries)
- elif isinstance(ie_entries, InAdvancePagedList):
- if ie_entries._pagesize == 1:
- playlist_count = ie_entries._pagecount
-
- def get_entry(i):
- return YoutubeDL.__handle_extraction_exceptions(
- lambda self, i: ie_entries[i - 1]
- )(self, i)
-
- entries, broken = [], False
- items = playlistitems if playlistitems is not None else itertools.count(playliststart)
- for i in items:
- if i == 0:
- continue
- if playlistitems is None and playlistend is not None and playlistend < i:
- break
- entry = None
- try:
- entry = get_entry(i)
- if entry is MissingEntry:
- raise EntryNotInPlaylist()
- except (IndexError, EntryNotInPlaylist):
- if incomplete_entries:
- raise EntryNotInPlaylist(f'Entry {i} cannot be found')
- elif not playlistitems:
- break
- entries.append(entry)
- try:
- if entry is not None:
- # TODO: Add auto-generated fields
- self._match_entry(entry, incomplete=True, silent=True)
- except (ExistingVideoReached, RejectedVideoReached):
- broken = True
- break
- ie_result['entries'] = entries
-
- # Save playlist_index before re-ordering
- entries = [
- ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
- for i, entry in enumerate(entries, 1)
- if entry is not None]
- n_entries = len(entries)
-
- if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
- ie_result['playlist_count'] = n_entries
-
- if not playlistitems and (playliststart != 1 or playlistend):
- playlistitems = list(range(playliststart, playliststart + n_entries))
- ie_result['requested_entries'] = playlistitems
+ entries = resolved_entries = list(entries)
+ n_entries = len(resolved_entries)
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
+ if not ie_result.get('playlist_count'):
+ # Better to do this after potentially exhausting entries
+ ie_result['playlist_count'] = all_entries.get_full_count()
_infojson_written = False
write_playlist_files = self.params.get('allow_playlist_files', True)
if write_playlist_files and self.params.get('list_thumbnails'):
self.list_thumbnails(ie_result)
if write_playlist_files and not self.params.get('simulate'):
- ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
+ ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
_infojson_written = self._write_info_json(
'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
if _infojson_written is None:
@@ -1760,33 +1705,41 @@ class YoutubeDL:
# TODO: This should be passed to ThumbnailsConvertor if necessary
self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
- if self.params.get('playlistrandom', False):
+ if lazy:
+ if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
+ self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
+ elif self.params.get('playlistreverse'):
+ entries.reverse()
+ elif self.params.get('playlistrandom'):
random.shuffle(entries)
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+ self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
+ f'{format_field(ie_result, "playlist_count", " of %s")}')
- self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
- for i, entry_tuple in enumerate(entries, 1):
- playlist_index, entry = entry_tuple
- if 'playlist-index' in self.params.get('compat_opts', []):
- playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
+ for i, (playlist_index, entry) in enumerate(entries):
+ if lazy:
+ resolved_entries.append((playlist_index, entry))
+
+ # TODO: Add auto-generated fields
+ if not entry or self._match_entry(entry, incomplete=True) is not None:
+ continue
+
self.to_screen('[download] Downloading video %s of %s' % (
- self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
+ self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+
+ entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
+ if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
+ playlist_index = ie_result['requested_entries'][i]
+
+ entry_result = self.__process_iterable_entry(entry, download, {
+ 'n_entries': int_or_none(n_entries),
+ '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
'playlist_count': ie_result.get('playlist_count'),
'playlist_index': playlist_index,
- 'playlist_autonumber': i,
- 'playlist': playlist,
+ 'playlist_autonumber': i + 1,
+ 'playlist': title,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
@@ -1796,20 +1749,17 @@ class YoutubeDL:
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
- }
-
- if self._match_entry(entry, incomplete=True) is not None:
- continue
-
- entry_result = self.__process_iterable_entry(entry, download, extra)
+ })
if not entry_result:
failures += 1
if failures >= max_failures:
self.report_error(
- 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
+ f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
+ resolved_entries[i] = (playlist_index, entry_result)
+
+ # Update with processed data
+ ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
# Write the updated info to json
if _infojson_written is True and self._write_info_json(
@@ -1818,10 +1768,10 @@ class YoutubeDL:
return
ie_result = self.run_all_pps('playlist', ie_result)
- self.to_screen(f'[download] Finished downloading playlist: {playlist}')
+ self.to_screen(f'[download] Finished downloading playlist: {title}')
return ie_result
- @__handle_extraction_exceptions
+ @_handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)
@@ -1903,7 +1853,7 @@ class YoutubeDL:
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
- except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
+ except (DownloadError, OSError, ValueError) + network_exceptions:
success = False
finally:
if os.path.exists(temp_file.name):
@@ -1927,12 +1877,12 @@ class YoutubeDL:
and download
and (
not can_merge()
- or info_dict.get('is_live', False)
- or self.outtmpl_dict['default'] == '-'))
+ or info_dict.get('is_live') and not self.params.get('live_from_start')
+ or self.params['outtmpl']['default'] == '-'))
compat = (
prefer_best
or self.params.get('allow_multiple_audio_streams', False)
- or 'format-spec' in self.params.get('compat_opts', []))
+ or 'format-spec' in self.params['compat_opts'])
return (
'best/bestvideo+bestaudio' if prefer_best
@@ -2273,7 +2223,7 @@ class YoutubeDL:
def _calc_headers(self, info_dict):
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
- cookies = self._calc_cookies(info_dict)
+ cookies = self._calc_cookies(info_dict['url'])
if cookies:
res['Cookie'] = cookies
@@ -2284,8 +2234,8 @@ class YoutubeDL:
return res
- def _calc_cookies(self, info_dict):
- pr = sanitized_Request(info_dict['url'])
+ def _calc_cookies(self, url):
+ pr = sanitized_Request(url)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
@@ -2383,6 +2333,11 @@ class YoutubeDL:
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+ def _raise_pending_errors(self, info):
+ err = info.pop('__pending_error', None)
+ if err:
+ self.report_error(err, tb=False)
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
self._num_videos += 1
@@ -2399,10 +2354,10 @@ class YoutubeDL:
def sanitize_string_field(info, string_field):
field = info.get(string_field)
- if field is None or isinstance(field, compat_str):
+ if field is None or isinstance(field, str):
return
report_force_conversion(string_field, 'a string', 'string')
- info[string_field] = compat_str(field)
+ info[string_field] = str(field)
def sanitize_numeric_fields(info):
for numeric_field in self._NUMERIC_FIELDS:
@@ -2414,9 +2369,20 @@ class YoutubeDL:
sanitize_string_field(info_dict, 'id')
sanitize_numeric_fields(info_dict)
+ if info_dict.get('section_end') and info_dict.get('section_start') is not None:
+ info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
self.report_warning('"duration" field is negative, there is an error in extractor')
+ chapters = info_dict.get('chapters') or []
+ dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
+ for prev, current, next_ in zip(
+ (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)):
+ if current.get('start_time') is None:
+ current['start_time'] = prev.get('end_time')
+ if not current.get('end_time'):
+ current['end_time'] = next_.get('start_time')
+
if 'playlist' not in info_dict:
# It isn't part of a playlist
info_dict['playlist'] = None
@@ -2503,7 +2469,7 @@ class YoutubeDL:
sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if not format.get('format_id'):
- format['format_id'] = compat_str(i)
+ format['format_id'] = str(i)
else:
# Sanitize format_id from characters used in format selector expression
format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
@@ -2541,7 +2507,7 @@ class YoutubeDL:
format['dynamic_range'] = 'SDR'
if (info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')):
- format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
+ format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
# Add HTTP headers, so that external programs can use them from the
# json output
@@ -2588,7 +2554,7 @@ class YoutubeDL:
if list_only:
# Without this printing, -F --print-json will not work
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
- return
+ return info_dict
format_selector = self.format_selector
if format_selector is None:
@@ -2629,20 +2595,40 @@ class YoutubeDL:
# Process what we can, even without any available formats.
formats_to_download = [{}]
- best_format = formats_to_download[-1]
+ requested_ranges = self.params.get('download_ranges')
+ if requested_ranges:
+ requested_ranges = tuple(requested_ranges(info_dict, self))
+
+ best_format, downloaded_formats = formats_to_download[-1], []
if download:
if best_format:
- self.to_screen(
- f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
- + ', '.join([f['format_id'] for f in formats_to_download]))
+ def to_screen(*msg):
+ self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
+
+ to_screen(f'Downloading {len(formats_to_download)} format(s):',
+ (f['format_id'] for f in formats_to_download))
+ if requested_ranges:
+ to_screen(f'Downloading {len(requested_ranges)} time ranges:',
+ (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
max_downloads_reached = False
- for i, fmt in enumerate(formats_to_download):
- formats_to_download[i] = new_info = self._copy_infodict(info_dict)
+
+ for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
+ new_info = self._copy_infodict(info_dict)
new_info.update(fmt)
+ offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
+ if chapter or offset:
+ new_info.update({
+ 'section_start': offset + chapter.get('start_time', 0),
+ 'section_end': offset + min(chapter.get('end_time', duration), duration),
+ 'section_title': chapter.get('title'),
+ 'section_number': chapter.get('index'),
+ })
+ downloaded_formats.append(new_info)
try:
self.process_info(new_info)
except MaxDownloadsReached:
max_downloads_reached = True
+ self._raise_pending_errors(new_info)
# Remove copied info
for key, val in tuple(new_info.items()):
if info_dict.get(key) == val:
@@ -2650,12 +2636,12 @@ class YoutubeDL:
if max_downloads_reached:
break
- write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}
+ write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
assert write_archive.issubset({True, False, 'ignore'})
if True in write_archive and False not in write_archive:
self.record_download_archive(info_dict)
- info_dict['requested_downloads'] = formats_to_download
+ info_dict['requested_downloads'] = downloaded_formats
info_dict = self.run_all_pps('after_video', info_dict)
if max_downloads_reached:
raise MaxDownloadsReached()
@@ -2877,8 +2863,13 @@ class YoutubeDL:
# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
+ def check_max_downloads():
+ if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
+ raise MaxDownloadsReached()
+
if self.params.get('simulate'):
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
+ check_max_downloads()
return
if full_filename is None:
@@ -2982,12 +2973,8 @@ class YoutubeDL:
info_dict.clear()
info_dict.update(new_info)
- try:
- new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
- replace_info_dict(new_info)
- except PostProcessingError as err:
- self.report_error('Preprocessing: %s' % str(err))
- return
+ new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ replace_info_dict(new_info)
if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename
@@ -3009,7 +2996,16 @@ class YoutubeDL:
info_dict['ext'] = os.path.splitext(file)[1][1:]
return file
- success = True
+ fd, success = None, True
+ if info_dict.get('protocol') or info_dict.get('url'):
+ fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ if fd is not FFmpegFD and (
+ info_dict.get('section_start') or info_dict.get('section_end')):
+ msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
+ else 'You have requested downloading the video partially, but ffmpeg is not installed')
+ self.report_error(f'{msg}. Aborting')
+ return
+
if info_dict.get('requested_formats') is not None:
def compatible_formats(formats):
@@ -3042,7 +3038,7 @@ class YoutubeDL:
and info_dict.get('thumbnails')
# check with type instead of pp_key, __name__, or isinstance
# since we dont want any custom PPs to trigger this
- and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
+ and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
info_dict['ext'] = 'mkv'
self.report_warning(
'webm doesn\'t support embedding a thumbnail, mkv will be used')
@@ -3064,10 +3060,8 @@ class YoutubeDL:
dl_filename = existing_video_file(full_filename, temp_filename)
info_dict['__real_download'] = False
- downloaded = []
merger = FFmpegMergerPP(self)
-
- fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
+ downloaded = []
if dl_filename is not None:
self.report_file_already_downloaded(dl_filename)
elif fd:
@@ -3147,6 +3141,7 @@ class YoutubeDL:
self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
return
+ self._raise_pending_errors(info_dict)
if success and full_filename != '-':
def fixup():
@@ -3216,15 +3211,10 @@ class YoutubeDL:
return
info_dict['__write_download_archive'] = True
+ assert info_dict is original_infodict # Make sure the info_dict was modified in-place
if self.params.get('force_write_download_archive'):
info_dict['__write_download_archive'] = True
-
- # Make sure the info_dict was modified in-place
- assert info_dict is original_infodict
-
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None and self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
+ check_max_downloads()
def __download_wrapper(self, func):
@functools.wraps(func)
@@ -3246,7 +3236,7 @@ class YoutubeDL:
def download(self, url_list):
"""Download a given list of URLs."""
url_list = variadic(url_list) # Passing a single URL is a common mistake
- outtmpl = self.outtmpl_dict['default']
+ outtmpl = self.params['outtmpl']['default']
if (len(url_list) > 1
and outtmpl != '-'
and '%' not in outtmpl
@@ -3367,7 +3357,12 @@ class YoutubeDL:
def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info)
info['__files_to_move'] = files_to_move or {}
- info = self.run_all_pps(key, info)
+ try:
+ info = self.run_all_pps(key, info)
+ except PostProcessingError as err:
+ msg = f'Preprocessing: {err}'
+ info.setdefault('__pending_error', msg)
+ self.report_error(msg, is_error=False)
return info, info.pop('__files_to_move', None)
def post_process(self, filename, info, files_to_move=None):
@@ -3437,7 +3432,7 @@ class YoutubeDL:
def _list_format_headers(self, *headers):
if self.params.get('listformats_table', True) is not False:
- return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
+ return [self._format_out(header, self.Styles.HEADERS) for header in headers]
return headers
def _format_note(self, fdict):
@@ -3515,10 +3510,10 @@ class YoutubeDL:
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
- delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
+ delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
table = [
[
- self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
+ self._format_out(format_field(f, 'format_id'), self.Styles.ID),
format_field(f, 'ext'),
format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
format_field(f, 'fps', '\t%d'),
@@ -3530,15 +3525,15 @@ class YoutubeDL:
delim,
format_field(f, 'vcodec', default='unknown').replace(
'none', 'images' if f.get('acodec') == 'none'
- else self._format_screen('audio only', self.Styles.SUPPRESS)),
+ else self._format_out('audio only', self.Styles.SUPPRESS)),
format_field(f, 'vbr', '\t%dk'),
format_field(f, 'acodec', default='unknown').replace(
'none', '' if f.get('vcodec') == 'none'
- else self._format_screen('video only', self.Styles.SUPPRESS)),
+ else self._format_out('video only', self.Styles.SUPPRESS)),
format_field(f, 'abr', '\t%dk'),
format_field(f, 'asr', '\t%dHz'),
join_nonempty(
- self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
+ self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
format_field(f, 'language', '[%s]'),
join_nonempty(format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))),
@@ -3551,7 +3546,7 @@ class YoutubeDL:
return render_table(
header_line, table, hide_empty=True,
- delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
+ delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
def render_thumbnails_table(self, info_dict):
thumbnails = list(info_dict.get('thumbnails') or [])
@@ -3602,18 +3597,25 @@ class YoutubeDL:
if not self.params.get('verbose'):
return
+ # These imports can be slow. So import them only as needed
+ from .extractor.extractors import _LAZY_LOADER
+ from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
+
def get_encoding(stream):
ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
if not supports_terminal_sequences(stream):
- from .compat import WINDOWS_VT_MODE # Must be imported locally
+ from .utils import WINDOWS_VT_MODE # Must be imported locally
ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
return ret
- encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
+ encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
locale.getpreferredencoding(),
sys.getfilesystemencoding(),
- get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
- self.get_encoding())
+ self.get_encoding(),
+ ', '.join(
+ f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
+ if stream is not None and key != 'console')
+ )
logger = self.params.get('logger')
if logger:
@@ -3638,19 +3640,17 @@ class YoutubeDL:
write_debug('Plugins: %s' % [
'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
- if self.params.get('compat_opts'):
- write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
+ if self.params['compat_opts']:
+ write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
if source == 'source':
try:
- sp = Popen(
+ stdout, _, _ = Popen.run(
['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate_or_kill()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s' % out)
+ text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if re.fullmatch('[0-9a-f]+', stdout.strip()):
+ write_debug(f'Git HEAD: {stdout.strip()}')
except Exception:
with contextlib.suppress(Exception):
sys.exc_clear()
@@ -3724,7 +3724,7 @@ class YoutubeDL:
else:
proxies = {'http': opts_proxy, 'https': opts_proxy}
else:
- proxies = compat_urllib_request.getproxies()
+ proxies = urllib.request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
@@ -3740,13 +3740,13 @@ class YoutubeDL:
# default FileHandler and allows us to disable the file protocol, which
# can be used for malicious purposes (see
# https://github.com/ytdl-org/youtube-dl/issues/8227)
- file_handler = compat_urllib_request.FileHandler()
+ file_handler = urllib.request.FileHandler()
def file_open(*args, **kwargs):
- raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
+ raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
file_handler.file_open = file_open
- opener = compat_urllib_request.build_opener(
+ opener = urllib.request.build_opener(
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
# Delete the default user-agent header, which would otherwise apply in
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 386996e16..0c68f8571 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -3,15 +3,18 @@ f'You are using an unsupported version of Python. Only Python versions 3.6 and a
__license__ = 'CC0-1.0'
+import getpass
import itertools
+import optparse
import os
import re
import sys
-from .compat import compat_getpass, compat_os_name, compat_shlex_quote
+from .compat import compat_shlex_quote
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .downloader import FileDownloader
-from .extractor import GenericIE, list_extractor_classes
+from .downloader.external import get_external_downloader
+from .extractor import list_extractor_classes
from .extractor.adobepass import MSO_INFO
from .extractor.common import InfoExtractor
from .options import parseOpts
@@ -24,7 +27,7 @@ from .postprocessor import (
MetadataFromFieldPP,
MetadataParserPP,
)
-from .update import run_update
+from .update import Updater
from .utils import (
NO_DEFAULT,
POSTPROCESS_WHEN,
@@ -32,42 +35,47 @@ from .utils import (
DownloadCancelled,
DownloadError,
GeoUtils,
+ PlaylistEntries,
SameFileError,
decodeOption,
+ download_range_func,
expand_path,
float_or_none,
+ format_field,
int_or_none,
match_filter_func,
parse_duration,
preferredencoding,
read_batch_urls,
+ read_stdin,
render_table,
setproctitle,
std_headers,
traverse_obj,
+ variadic,
write_string,
)
from .YoutubeDL import YoutubeDL
+def _exit(status=0, *args):
+ for msg in args:
+ sys.stderr.write(msg)
+ raise SystemExit(status)
+
+
def get_urls(urls, batchfile, verbose):
# Batch file verification
batch_urls = []
if batchfile is not None:
try:
- if batchfile == '-':
- write_string('Reading URLs from stdin - EOF (%s) to end:\n' % (
- 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'))
- batchfd = sys.stdin
- else:
- batchfd = io.open(
- expand_path(batchfile),
- 'r', encoding='utf-8', errors='ignore')
- batch_urls = read_batch_urls(batchfd)
+ batch_urls = read_batch_urls(
+ read_stdin('URLs') if batchfile == '-'
+ else open(expand_path(batchfile), encoding='utf-8', errors='ignore'))
if verbose:
write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
- except IOError:
- sys.exit('ERROR: batch file %s could not be read' % batchfile)
+ except OSError:
+ _exit(f'ERROR: batch file {batchfile} could not be read')
_enc = preferredencoding()
return [
url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip()
@@ -75,6 +83,11 @@ def get_urls(urls, batchfile, verbose):
def print_extractor_information(opts, urls):
+ # Importing GenericIE is currently slow since it imports other extractors
+ # TODO: Move this back to module level after generalization of embed detection
+ from .extractor.generic import GenericIE
+
+ out = ''
if opts.list_extractors:
for ie in list_extractors(opts.age_limit):
write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout)
@@ -210,15 +223,11 @@ def validate_options(opts):
validate_regex('format sorting', f, InfoExtractor.FormatSort.regex)
# Postprocessor formats
- validate_in('audio format', opts.audioformat, ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS))
+ validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE)
validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
- validate_in('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)
- if opts.recodevideo is not None:
- opts.recodevideo = opts.recodevideo.replace(' ', '')
- validate_regex('video recode format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
- if opts.remuxvideo is not None:
- opts.remuxvideo = opts.remuxvideo.replace(' ', '')
- validate_regex('video remux format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
+ validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
+ validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
+ validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
if opts.audioquality:
opts.audioquality = opts.audioquality.strip('k').strip('K')
# int_or_none prevents inf, nan
@@ -240,6 +249,28 @@ def validate_options(opts):
opts.extractor_retries = parse_retries('extractor', opts.extractor_retries)
opts.file_access_retries = parse_retries('file access', opts.file_access_retries)
+ # Retry sleep function
+ def parse_sleep_func(expr):
+ NUMBER_RE = r'\d+(?:\.\d+)?'
+ op, start, limit, step, *_ = tuple(re.fullmatch(
+ rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?',
+ expr.strip()).groups()) + (None, None)
+
+ if op == 'exp':
+ return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf'))
+ else:
+ default_step = start if op or limit else 0
+ return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf'))
+
+ for key, expr in opts.retry_sleep.items():
+ if not expr:
+ del opts.retry_sleep[key]
+ continue
+ try:
+ opts.retry_sleep[key] = parse_sleep_func(expr)
+ except AttributeError:
+ raise ValueError(f'invalid {key} retry sleep expression {expr!r}')
+
# Bytes
def parse_bytes(name, value):
if value is None:
@@ -284,20 +315,25 @@ def validate_options(opts):
'Cannot download a video and extract audio into the same file! '
f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template')
- # Remove chapters
- remove_chapters_patterns, opts.remove_ranges = [], []
- for regex in opts.remove_chapters or []:
- if regex.startswith('*'):
- dur = list(map(parse_duration, regex[1:].split('-')))
- if len(dur) == 2 and all(t is not None for t in dur):
- opts.remove_ranges.append(tuple(dur))
+ def parse_chapters(name, value):
+ chapters, ranges = [], []
+ for regex in value or []:
+ if regex.startswith('*'):
+ for range in regex[1:].split(','):
+ dur = tuple(map(parse_duration, range.strip().split('-')))
+ if len(dur) == 2 and all(t is not None for t in dur):
+ ranges.append(dur)
+ else:
+ raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end')
continue
- raise ValueError(f'invalid --remove-chapters time range "{regex}". Must be of the form *start-end')
- try:
- remove_chapters_patterns.append(re.compile(regex))
- except re.error as err:
- raise ValueError(f'invalid --remove-chapters regex "{regex}" - {err}')
- opts.remove_chapters = remove_chapters_patterns
+ try:
+ chapters.append(re.compile(regex))
+ except re.error as err:
+ raise ValueError(f'invalid {name} regex "{regex}" - {err}')
+ return chapters, ranges
+
+ opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
+ opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
# Cookies from browser
if opts.cookiesfrombrowser:
@@ -341,6 +377,12 @@ def validate_options(opts):
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata)))
# Other options
+ if opts.playlist_items is not None:
+ try:
+ tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
+ except Exception as err:
+ raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
+
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
if geo_bypass_code is not None:
try:
@@ -361,6 +403,17 @@ def validate_options(opts):
if opts.no_sponsorblock:
opts.sponsorblock_mark = opts.sponsorblock_remove = set()
+ default_downloader = None
+ for proto, path in opts.external_downloader.items():
+ if path == 'native':
+ continue
+ ed = get_external_downloader(path)
+ if ed is None:
+ raise ValueError(
+ f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"')
+ elif ed and proto == 'default':
+ default_downloader = ed.get_basename()
+
warnings, deprecation_warnings = [], []
# Common mistake: -f best
@@ -371,13 +424,18 @@ def validate_options(opts):
'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
# --(postprocessor/downloader)-args without name
- def report_args_compat(name, value, key1, key2=None):
+ def report_args_compat(name, value, key1, key2=None, where=None):
if key1 in value and key2 not in value:
- warnings.append(f'{name} arguments given without specifying name. The arguments will be given to all {name}s')
+ warnings.append(f'{name.title()} arguments given without specifying name. '
+ f'The arguments will be given to {where or f"all {name}s"}')
return True
return False
- report_args_compat('external downloader', opts.external_downloader_args, 'default')
+ if report_args_compat('external downloader', opts.external_downloader_args,
+ 'default', where=default_downloader) and default_downloader:
+ # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1
+ opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default'))
+
if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'):
opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat')
opts.postprocessor_args.setdefault('sponskrub', [])
@@ -396,6 +454,9 @@ def validate_options(opts):
setattr(opts, opt1, default)
# Conflicting options
+ report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random')
+ report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist')
+ report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist')
report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl'))
@@ -470,9 +531,9 @@ def validate_options(opts):
# Ask for passwords
if opts.username is not None and opts.password is None:
- opts.password = compat_getpass('Type account password and press [Return]: ')
+ opts.password = getpass.getpass('Type account password and press [Return]: ')
if opts.ap_username is not None and opts.ap_password is None:
- opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
+ opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
return warnings, deprecation_warnings
@@ -626,7 +687,7 @@ def parse_options(argv=None):
final_ext = (
opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS
- else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best')
+ else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
else None)
return parser, opts, urls, {
@@ -682,6 +743,7 @@ def parse_options(argv=None):
'file_access_retries': opts.file_access_retries,
'fragment_retries': opts.fragment_retries,
'extractor_retries': opts.extractor_retries,
+ 'retry_sleep_functions': opts.retry_sleep,
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
'keep_fragments': opts.keep_fragments,
'concurrent_fragment_downloads': opts.concurrent_fragment_downloads,
@@ -696,6 +758,7 @@ def parse_options(argv=None):
'playlistend': opts.playlistend,
'playlistreverse': opts.playlist_reverse,
'playlistrandom': opts.playlist_random,
+ 'lazy_playlist': opts.lazy_playlist,
'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl.get('default') == '-',
'consoletitle': opts.consoletitle,
@@ -727,6 +790,7 @@ def parse_options(argv=None):
'verbose': opts.verbose,
'dump_intermediate_pages': opts.dump_intermediate_pages,
'write_pages': opts.write_pages,
+ 'load_pages': opts.load_pages,
'test': opts.test,
'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
@@ -775,6 +839,8 @@ def parse_options(argv=None):
'max_sleep_interval': opts.max_sleep_interval,
'sleep_interval_subtitles': opts.sleep_interval_subtitles,
'external_downloader': opts.external_downloader,
+ 'download_ranges': opts.download_ranges,
+ 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts,
'list_thumbnails': opts.list_thumbnails,
'playlist_items': opts.playlist_items,
'xattr_set_filesize': opts.xattr_set_filesize,
@@ -813,52 +879,66 @@ def _real_main(argv=None):
if opts.dump_user_agent:
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
write_string(f'{ua}\n', out=sys.stdout)
- sys.exit(0)
+ return
if print_extractor_information(opts, all_urls):
- sys.exit(0)
+ return
with YoutubeDL(ydl_opts) as ydl:
+ pre_process = opts.update_self or opts.rm_cachedir
actual_use = all_urls or opts.load_info_filename
- # Remove cache dir
if opts.rm_cachedir:
ydl.cache.remove()
- # Maybe do nothing
+ updater = Updater(ydl)
+ if opts.update_self and updater.update() and actual_use:
+ if updater.cmd:
+ return updater.restart()
+ # This code is reachable only for zip variant in py < 3.10
+ # It makes sense to exit here, but the old behavior is to continue
+ ydl.report_warning('Restart yt-dlp to use the updated version')
+ # return 100, 'ERROR: The program must exit for the update to complete'
+
if not actual_use:
+ if pre_process:
+ return ydl._download_retcode
+
ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
parser.error(
'You must provide at least one URL.\n'
'Type yt-dlp --help to see a list of all options.')
+ parser.destroy()
try:
if opts.load_info_filename is not None:
- retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
+ return ydl.download_with_info_file(expand_path(opts.load_info_filename))
else:
- retcode = ydl.download(all_urls)
+ return ydl.download(all_urls)
except DownloadCancelled:
ydl.to_screen('Aborting remaining downloads')
- retcode = 101
-
- sys.exit(retcode)
+ return 101
def main(argv=None):
try:
- _real_main(argv)
+ _exit(*variadic(_real_main(argv)))
except DownloadError:
- sys.exit(1)
+ _exit(1)
except SameFileError as e:
- sys.exit(f'ERROR: {e}')
+ _exit(f'ERROR: {e}')
except KeyboardInterrupt:
- sys.exit('\nERROR: Interrupted by user')
+ _exit('\nERROR: Interrupted by user')
except BrokenPipeError as e:
# https://docs.python.org/3/library/signal.html#note-on-sigpipe
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
- sys.exit(f'\nERROR: {e}')
+ _exit(f'\nERROR: {e}')
+ except optparse.OptParseError as e:
+ _exit(2, f'\n{e}')
+
+from .extractor import gen_extractors, list_extractors
__all__ = [
'main',
diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py
index c9d275b86..ff5d71d3c 100644
--- a/yt_dlp/__main__.py
+++ b/yt_dlp/__main__.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
+
# Execute with
# $ python -m yt_dlp
diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py
index d0e6d7549..b3f504977 100644
--- a/yt_dlp/aes.py
+++ b/yt_dlp/aes.py
@@ -1,6 +1,7 @@
+import base64
from math import ceil
-from .compat import compat_b64decode, compat_ord
+from .compat import compat_ord
from .dependencies import Cryptodome_AES
from .utils import bytes_to_intlist, intlist_to_bytes
@@ -264,7 +265,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
"""
NONCE_LENGTH_BYTES = 8
- data = bytes_to_intlist(compat_b64decode(data))
+ data = bytes_to_intlist(base64.b64decode(data))
password = bytes_to_intlist(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py
index e3f8a7dab..83351b797 100644
--- a/yt_dlp/cache.py
+++ b/yt_dlp/cache.py
@@ -6,7 +6,6 @@ import re
import shutil
import traceback
-from .compat import compat_getenv
from .utils import expand_path, write_json_file
@@ -17,7 +16,7 @@ class Cache:
def _get_root_dir(self):
res = self._ydl.params.get('cachedir')
if res is None:
- cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
+ cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache')
res = os.path.join(cache_root, 'yt-dlp')
return expand_path(res)
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index a0cd62110..9f8e8c3e5 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -1,6 +1,4 @@
-import contextlib
import os
-import subprocess
import sys
import warnings
import xml.etree.ElementTree as etree
@@ -9,10 +7,14 @@ from . import re
from ._deprecated import * # noqa: F401, F403
from .compat_utils import passthrough_module
-
# XXX: Implement this the same way as other DeprecationWarnings without circular import
-passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
- DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=2))
+try:
+ passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn(
+ DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=2))
+ HAS_LEGACY = True
+except ModuleNotFoundError:
+ # Keep working even without _legacy module
+ HAS_LEGACY = False
del passthrough_module
@@ -52,7 +54,7 @@ if compat_os_name == 'nt' and sys.version_info < (3, 8):
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
- return path
+ return os.path.realpath(path)
else:
compat_realpath = os.path.realpath
@@ -74,17 +76,3 @@ if compat_os_name in ('nt', 'ce'):
return userhome + path[i:]
else:
compat_expanduser = os.path.expanduser
-
-
-WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
-
-
-def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
- if compat_os_name != 'nt':
- return
- global WINDOWS_VT_MODE
- startupinfo = subprocess.STARTUPINFO()
- startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
- with contextlib.suppress(Exception):
- subprocess.Popen('', shell=True, startupinfo=startupinfo).wait()
- WINDOWS_VT_MODE = True
diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py
index 390f76577..342f1f80d 100644
--- a/yt_dlp/compat/_deprecated.py
+++ b/yt_dlp/compat/_deprecated.py
@@ -1,52 +1,16 @@
"""Deprecated - New code should avoid these"""
import base64
-import getpass
-import html
-import html.parser
-import http
-import http.client
-import http.cookiejar
-import http.cookies
-import http.server
-import itertools
-import os
-import shutil
-import struct
-import tokenize
-import urllib
+import urllib.error
+import urllib.parse
+
+compat_str = str
compat_b64decode = base64.b64decode
-compat_chr = chr
-compat_cookiejar = http.cookiejar
-compat_cookiejar_Cookie = http.cookiejar.Cookie
-compat_cookies_SimpleCookie = http.cookies.SimpleCookie
-compat_get_terminal_size = shutil.get_terminal_size
-compat_getenv = os.getenv
-compat_getpass = getpass.getpass
-compat_html_entities = html.entities
-compat_html_entities_html5 = html.entities.html5
-compat_HTMLParser = html.parser.HTMLParser
-compat_http_client = http.client
-compat_http_server = http.server
+
compat_HTTPError = urllib.error.HTTPError
-compat_itertools_count = itertools.count
+compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
-compat_str = str
-compat_struct_pack = struct.pack
-compat_struct_unpack = struct.unpack
-compat_tokenize_tokenize = tokenize.tokenize
-compat_urllib_error = urllib.error
compat_urllib_parse_unquote = urllib.parse.unquote
-compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
-compat_urllib_request = urllib.request
-compat_urlparse = compat_urllib_parse = urllib.parse
-
-
-def compat_setenv(key, value, env=os.environ):
- env[key] = value
-
-
-__all__ = [x for x in globals() if x.startswith('compat_')]
diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py
index ce24760e5..49bb13a3c 100644
--- a/yt_dlp/compat/_legacy.py
+++ b/yt_dlp/compat/_legacy.py
@@ -2,18 +2,27 @@
import collections
import ctypes
-import http
+import getpass
+import html.entities
+import html.parser
import http.client
import http.cookiejar
import http.cookies
import http.server
+import itertools
+import os
import shlex
+import shutil
import socket
import struct
-import urllib
+import tokenize
+import urllib.error
+import urllib.parse
+import urllib.request
import xml.etree.ElementTree as etree
from subprocess import DEVNULL
+from .compat_utils import passthrough_module # isort: split
from .asyncio import run as compat_asyncio_run # noqa: F401
from .re import Pattern as compat_Pattern # noqa: F401
from .re import match as compat_Match # noqa: F401
@@ -21,6 +30,8 @@ from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401
from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401
+passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode'))
+
# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE
# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines
@@ -28,12 +39,17 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
return ctypes.WINFUNCTYPE(*args, **kwargs)
+def compat_setenv(key, value, env=os.environ):
+ env[key] = value
+
+
compat_basestring = str
compat_collections_abc = collections.abc
compat_cookies = http.cookies
compat_etree_Element = etree.Element
compat_etree_register_namespace = etree.register_namespace
compat_filter = filter
+compat_getenv = os.getenv
compat_input = input
compat_integer_types = (int, )
compat_kwargs = lambda kwargs: kwargs
@@ -49,9 +65,28 @@ compat_urllib_parse_quote_plus = urllib.parse.quote_plus
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
compat_urllib_request_DataHandler = urllib.request.DataHandler
+compat_urllib_request = urllib.request
compat_urllib_response = urllib.response
compat_urlretrieve = urllib.request.urlretrieve
compat_xml_parse_error = etree.ParseError
compat_xpath = lambda xpath: xpath
compat_zip = zip
workaround_optparse_bug9161 = lambda: None
+compat_getpass = getpass.getpass
+compat_chr = chr
+compat_urllib_parse = urllib.parse
+compat_itertools_count = itertools.count
+compat_cookiejar = http.cookiejar
+compat_cookiejar_Cookie = http.cookiejar.Cookie
+compat_cookies_SimpleCookie = http.cookies.SimpleCookie
+compat_get_terminal_size = shutil.get_terminal_size
+compat_html_entities = html.entities
+compat_html_entities_html5 = html.entities.html5
+compat_tokenize_tokenize = tokenize.tokenize
+compat_HTMLParser = html.parser.HTMLParser
+compat_http_client = http.client
+compat_http_server = http.server
+compat_struct_pack = struct.pack
+compat_struct_unpack = struct.unpack
+compat_urllib_error = urllib.error
+compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py
index b1d58f5b9..82e176281 100644
--- a/yt_dlp/compat/compat_utils.py
+++ b/yt_dlp/compat/compat_utils.py
@@ -4,7 +4,6 @@ import importlib
import sys
import types
-
_NO_ATTRIBUTE = object()
_Package = collections.namedtuple('Package', ('name', 'version'))
@@ -31,9 +30,9 @@ def _is_package(module):
return True
-def passthrough_module(parent, child, *, callback=lambda _: None):
+def passthrough_module(parent, child, allowed_attributes=None, *, callback=lambda _: None):
parent_module = importlib.import_module(parent)
- child_module = importlib.import_module(child, parent)
+ child_module = None # Import child module only as needed
class PassthroughModule(types.ModuleType):
def __getattr__(self, attr):
@@ -41,19 +40,30 @@ def passthrough_module(parent, child, *, callback=lambda _: None):
with contextlib.suppress(ImportError):
return importlib.import_module(f'.{attr}', parent)
- ret = _NO_ATTRIBUTE
+ ret = self.__from_child(attr)
+ if ret is _NO_ATTRIBUTE:
+ raise AttributeError(f'module {parent} has no attribute {attr}')
+ callback(attr)
+ return ret
+
+ def __from_child(self, attr):
+ if allowed_attributes is None:
+ if attr.startswith('__') and attr.endswith('__'):
+ return _NO_ATTRIBUTE
+ elif attr not in allowed_attributes:
+ return _NO_ATTRIBUTE
+
+ nonlocal child_module
+ child_module = child_module or importlib.import_module(child, parent)
+
with contextlib.suppress(AttributeError):
- ret = getattr(child_module, attr)
+ return getattr(child_module, attr)
if _is_package(child_module):
with contextlib.suppress(ImportError):
- ret = importlib.import_module(f'.{attr}', child)
-
- if ret is _NO_ATTRIBUTE:
- raise AttributeError(f'module {parent} has no attribute {attr}')
+ return importlib.import_module(f'.{attr}', child)
- callback(attr)
- return ret
+ return _NO_ATTRIBUTE
# Python 3.6 does not have module level __getattr__
# https://peps.python.org/pep-0562/
diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py
new file mode 100644
index 000000000..ec003ea90
--- /dev/null
+++ b/yt_dlp/compat/functools.py
@@ -0,0 +1,26 @@
+# flake8: noqa: F405
+from functools import * # noqa: F403
+
+from .compat_utils import passthrough_module
+
+passthrough_module(__name__, 'functools')
+del passthrough_module
+
+try:
+ cache # >= 3.9
+except NameError:
+ cache = lru_cache(maxsize=None)
+
+try:
+ cached_property # >= 3.8
+except NameError:
+ class cached_property:
+ def __init__(self, func):
+ update_wrapper(self, func)
+ self.func = func
+
+ def __get__(self, instance, _):
+ if instance is None:
+ return self
+ setattr(instance, self.func.__name__, self.func(instance))
+ return getattr(instance, self.func.__name__)
diff --git a/yt_dlp/compat/imghdr.py b/yt_dlp/compat/imghdr.py
new file mode 100644
index 000000000..734b0d876
--- /dev/null
+++ b/yt_dlp/compat/imghdr.py
@@ -0,0 +1,14 @@
+tests = {
+ 'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
+ 'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
+ 'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
+}
+
+
+def what(path):
+ """Detect format of image (Currently supports jpeg, png, webp only)
+ Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
+ """
+ with open(path, 'rb') as f:
+ head = f.read(12)
+ return next((type_ for type_, test in tests.items() if test(head)), None)
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 1598828f2..df8f97b44 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1,5 +1,7 @@
+import base64
import contextlib
import ctypes
+import http.cookiejar
import json
import os
import shutil
@@ -17,7 +19,6 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7,
)
-from .compat import compat_b64decode, compat_cookiejar_Cookie
from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage,
@@ -63,7 +64,7 @@ class YDLLogger:
# Do not print to files/pipes, loggers, or when --no-progress is used
if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
return
- file = self._ydl._out_files['error']
+ file = self._ydl._out_files.error
try:
if not file.isatty():
return
@@ -142,7 +143,7 @@ def _extract_firefox_cookies(profile, logger):
total_cookie_count = len(table)
for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
- cookie = compat_cookiejar_Cookie(
+ cookie = http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
@@ -156,30 +157,16 @@ def _extract_firefox_cookies(profile, logger):
def _firefox_browser_dir():
- if sys.platform in ('linux', 'linux2'):
- return os.path.expanduser('~/.mozilla/firefox')
- elif sys.platform == 'win32':
+ if sys.platform in ('cygwin', 'win32'):
return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
elif sys.platform == 'darwin':
return os.path.expanduser('~/Library/Application Support/Firefox')
- else:
- raise ValueError(f'unsupported platform: {sys.platform}')
+ return os.path.expanduser('~/.mozilla/firefox')
def _get_chromium_based_browser_settings(browser_name):
# https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
- if sys.platform in ('linux', 'linux2'):
- config = _config_home()
- browser_dir = {
- 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
- 'chrome': os.path.join(config, 'google-chrome'),
- 'chromium': os.path.join(config, 'chromium'),
- 'edge': os.path.join(config, 'microsoft-edge'),
- 'opera': os.path.join(config, 'opera'),
- 'vivaldi': os.path.join(config, 'vivaldi'),
- }[browser_name]
-
- elif sys.platform == 'win32':
+ if sys.platform in ('cygwin', 'win32'):
appdata_local = os.path.expandvars('%LOCALAPPDATA%')
appdata_roaming = os.path.expandvars('%APPDATA%')
browser_dir = {
@@ -203,7 +190,15 @@ def _get_chromium_based_browser_settings(browser_name):
}[browser_name]
else:
- raise ValueError(f'unsupported platform: {sys.platform}')
+ config = _config_home()
+ browser_dir = {
+ 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
+ 'chrome': os.path.join(config, 'google-chrome'),
+ 'chromium': os.path.join(config, 'chromium'),
+ 'edge': os.path.join(config, 'microsoft-edge'),
+ 'opera': os.path.join(config, 'opera'),
+ 'vivaldi': os.path.join(config, 'vivaldi'),
+ }[browser_name]
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
# dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
@@ -303,7 +298,7 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
if value is None:
return is_encrypted, None
- return is_encrypted, compat_cookiejar_Cookie(
+ return is_encrypted, http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
@@ -343,14 +338,11 @@ class ChromeCookieDecryptor:
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
- if sys.platform in ('linux', 'linux2'):
- return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
- elif sys.platform == 'darwin':
+ if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger)
- elif sys.platform == 'win32':
+ elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger)
- else:
- raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}')
+ return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
@@ -598,7 +590,7 @@ def _parse_safari_cookies_record(data, jar, logger):
p.skip_to(record_size, 'space at the end of the record')
- cookie = compat_cookiejar_Cookie(
+ cookie = http.cookiejar.Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
@@ -718,21 +710,19 @@ def _get_kwallet_network_wallet(logger):
"""
default_wallet = 'kdewallet'
try:
- proc = Popen([
+ stdout, _, returncode = Popen.run([
'dbus-send', '--session', '--print-reply=literal',
'--dest=org.kde.kwalletd5',
'/modules/kwalletd5',
'org.kde.KWallet.networkWallet'
- ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+ ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
- stdout, stderr = proc.communicate_or_kill()
- if proc.returncode != 0:
+ if returncode:
logger.warning('failed to read NetworkWallet')
return default_wallet
else:
- network_wallet = stdout.decode().strip()
- logger.debug(f'NetworkWallet = "{network_wallet}"')
- return network_wallet
+ logger.debug(f'NetworkWallet = "{stdout.strip()}"')
+ return stdout.strip()
except Exception as e:
logger.warning(f'exception while obtaining NetworkWallet: {e}')
return default_wallet
@@ -750,17 +740,16 @@ def _get_kwallet_password(browser_keyring_name, logger):
network_wallet = _get_kwallet_network_wallet(logger)
try:
- proc = Popen([
+ stdout, _, returncode = Popen.run([
'kwallet-query',
'--read-password', f'{browser_keyring_name} Safe Storage',
'--folder', f'{browser_keyring_name} Keys',
network_wallet
], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
- stdout, stderr = proc.communicate_or_kill()
- if proc.returncode != 0:
- logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult '
- 'the kwallet-query man page for details')
+ if returncode:
+ logger.error(f'kwallet-query failed with return code {returncode}. '
+ 'Please consult the kwallet-query man page for details')
return b''
else:
if stdout.lower().startswith(b'failed to read'):
@@ -775,9 +764,7 @@ def _get_kwallet_password(browser_keyring_name, logger):
return b''
else:
logger.debug('password found')
- if stdout[-1:] == b'\n':
- stdout = stdout[:-1]
- return stdout
+ return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
return b''
@@ -824,17 +811,13 @@ def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
def _get_mac_keyring_password(browser_keyring_name, logger):
logger.debug('using find-generic-password to obtain password from OSX keychain')
try:
- proc = Popen(
+ stdout, _, _ = Popen.run(
['security', 'find-generic-password',
'-w', # write password to stdout
'-a', browser_keyring_name, # match 'account'
'-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
-
- stdout, stderr = proc.communicate_or_kill()
- if stdout[-1:] == b'\n':
- stdout = stdout[:-1]
- return stdout
+ return stdout.rstrip(b'\n')
except Exception as e:
logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
return None
@@ -853,7 +836,7 @@ def _get_windows_v10_key(browser_root, logger):
except KeyError:
logger.error('no encrypted key in Local State')
return None
- encrypted_key = compat_b64decode(base64_key)
+ encrypted_key = base64.b64decode(base64_key)
prefix = b'DPAPI'
if not encrypted_key.startswith(prefix):
logger.error('invalid key')
diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py
index 5aba303dd..a7dc6c9d0 100644
--- a/yt_dlp/downloader/__init__.py
+++ b/yt_dlp/downloader/__init__.py
@@ -1,4 +1,3 @@
-from ..compat import compat_str
from ..utils import NO_DEFAULT, determine_protocol
@@ -85,13 +84,13 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
if default is NO_DEFAULT:
default = HttpFD
- # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
- # return FFmpegFD
+ if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
+ return FFmpegFD
info_dict['protocol'] = protocol
downloaders = params.get('external_downloader')
external_downloader = (
- downloaders if isinstance(downloaders, compat_str) or downloaders is None
+ downloaders if isinstance(downloaders, str) or downloaders is None
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
if external_downloader is None:
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index 1f14ebb3a..3a0a014ef 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -15,14 +15,18 @@ from ..utils import (
NUMBER_RE,
LockingUnsupportedError,
Namespace,
+ classproperty,
decodeArgument,
encodeFilename,
error_to_compat_str,
+ float_or_none,
format_bytes,
+ join_nonempty,
sanitize_open,
shell_quote,
timeconvert,
timetuple_from_msec,
+ try_call,
)
@@ -41,6 +45,7 @@ class FileDownloader:
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
+ continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
file_access_retries: Number of times to retry on file access error
@@ -64,6 +69,7 @@ class FileDownloader:
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
progress_template: See YoutubeDL.py
+ retry_sleep_functions: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
@@ -98,12 +104,16 @@ class FileDownloader:
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
- @property
- def FD_NAME(self):
- return re.sub(r'(?<!^)(?=[A-Z])', '_', type(self).__name__[:-2]).lower()
+ __to_screen = to_screen
+
+ @classproperty
+ def FD_NAME(cls):
+ return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
@staticmethod
def format_seconds(seconds):
+ if seconds is None:
+ return ' Unknown'
time = timetuple_from_msec(seconds * 1000)
if time.hours > 99:
return '--:--:--'
@@ -111,6 +121,8 @@ class FileDownloader:
return '%02d:%02d' % time[1:-1]
return '%02d:%02d:%02d' % time[:-1]
+ format_eta = format_seconds
+
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
@@ -119,11 +131,7 @@ class FileDownloader:
@staticmethod
def format_percent(percent):
- if percent is None:
- return '---.-%'
- elif percent == 100:
- return '100%'
- return '%6s' % ('%3.1f%%' % percent)
+ return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod
def calc_eta(start, now, total, current):
@@ -138,12 +146,6 @@ class FileDownloader:
return int((float(total) - float(current)) / rate)
@staticmethod
- def format_eta(eta):
- if eta is None:
- return '--:--'
- return FileDownloader.format_seconds(eta)
-
- @staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
@@ -152,13 +154,11 @@ class FileDownloader:
@staticmethod
def format_speed(speed):
- if speed is None:
- return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % format_bytes(speed))
+ return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
@staticmethod
def format_retries(retries):
- return 'inf' if retries == float('inf') else '%.0f' % retries
+ return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def best_block_size(elapsed_time, bytes):
@@ -232,7 +232,8 @@ class FileDownloader:
self.to_screen(
f'[download] Unable to {action} file due to file access error. '
f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
- time.sleep(0.01)
+ if not self.sleep_retry('file_access', retry):
+ time.sleep(0.01)
return inner
return outer
@@ -282,9 +283,9 @@ class FileDownloader:
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
- self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
+ self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else:
- self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
+ self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
def _finish_multiline_status(self):
@@ -301,7 +302,7 @@ class FileDownloader:
)
def _report_progress_status(self, s, default_template):
- for name, style in self.ProgressStyles._asdict().items():
+ for name, style in self.ProgressStyles.items_:
name = f'_{name}_str'
if name not in s:
continue
@@ -325,63 +326,52 @@ class FileDownloader:
self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
def report_progress(self, s):
+ def with_fields(*tups, default=''):
+ for *fields, tmpl in tups:
+ if all(s.get(f) is not None for f in fields):
+ return tmpl
+ return default
+
if s['status'] == 'finished':
if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- s['_percent_str'] = self.format_percent(100)
- self._report_progress_status(s, msg_template)
- return
+ s.update({
+ '_total_bytes_str': format_bytes(s.get('total_bytes')),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ '_percent_str': self.format_percent(100),
+ })
+ self._report_progress_status(s, join_nonempty(
+ '100%%',
+ with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
+ with_fields(('elapsed', 'in %(_elapsed_str)s')),
+ delim=' '))
if s['status'] != 'downloading':
return
- if s.get('eta') is not None:
- s['_eta_str'] = self.format_eta(s['eta'])
- else:
- s['_eta_str'] = 'Unknown'
-
- if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
- elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
- else:
- if s.get('downloaded_bytes') == 0:
- s['_percent_str'] = self.format_percent(0)
- else:
- s['_percent_str'] = 'Unknown %'
-
- if s.get('speed') is not None:
- s['_speed_str'] = self.format_speed(s['speed'])
- else:
- s['_speed_str'] = 'Unknown speed'
-
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
- elif s.get('total_bytes_estimate') is not None:
- s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
- msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
- else:
- if s.get('downloaded_bytes') is not None:
- s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
- if s.get('elapsed'):
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
- else:
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
- else:
- msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
- if s.get('fragment_index') and s.get('fragment_count'):
- msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
- elif s.get('fragment_index'):
- msg_template += ' (frag %(fragment_index)s)'
+ s.update({
+ '_eta_str': self.format_eta(s.get('eta')),
+ '_speed_str': self.format_speed(s.get('speed')),
+ '_percent_str': self.format_percent(try_call(
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
+ lambda: s['downloaded_bytes'] == 0 and 0)),
+ '_total_bytes_str': format_bytes(s.get('total_bytes')),
+ '_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')),
+ '_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ })
+
+ msg_template = with_fields(
+ ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
+ ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
+ default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
+
+ msg_template += with_fields(
+ ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
+ ('fragment_index', ' (frag %(fragment_index)s)'))
self._report_progress_status(s, msg_template)
def report_resuming_byte(self, resume_len):
@@ -390,14 +380,23 @@ class FileDownloader:
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
- self.to_screen(
+ self.__to_screen(
'[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
+ self.sleep_retry('http', count)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
+ def sleep_retry(self, retry_type, count):
+ sleep_func = self.params.get('retry_sleep_functions', {}).get(retry_type)
+ delay = float_or_none(sleep_func(n=count - 1)) if sleep_func else None
+ if delay:
+ self.__to_screen(f'Sleeping {delay:.2f} seconds ...')
+ time.sleep(delay)
+ return sleep_func is not None
+
@staticmethod
def supports_manifest(manifest):
""" Whether the downloader can download the fragments from the manifest.
diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py
index e6efae485..a6da26f09 100644
--- a/yt_dlp/downloader/dash.py
+++ b/yt_dlp/downloader/dash.py
@@ -1,7 +1,7 @@
import time
+from . import get_suitable_downloader
from .fragment import FragmentFD
-from ..downloader import get_suitable_downloader
from ..utils import urljoin
@@ -73,6 +73,7 @@ class DashSegmentsFD(FragmentFD):
yield {
'frag_index': frag_index,
+ 'fragment_count': fragment.get('fragment_count'),
'index': i,
'url': fragment_url,
}
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index 85c6a6977..f84a17f23 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -1,3 +1,4 @@
+import enum
import os.path
import re
import subprocess
@@ -5,7 +6,7 @@ import sys
import time
from .fragment import FragmentFD
-from ..compat import compat_setenv, compat_str
+from ..compat import functools
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
Popen,
@@ -24,9 +25,15 @@ from ..utils import (
)
+class Features(enum.Enum):
+ TO_STDOUT = enum.auto()
+ MULTIPLE_FORMATS = enum.auto()
+
+
class ExternalFD(FragmentFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
- can_download_to_stdout = False
+ SUPPORTED_FEATURES = ()
+ _CAPTURE_STDERR = True
def real_download(self, filename, info_dict):
self.report_destination(filename)
@@ -74,7 +81,7 @@ class ExternalFD(FragmentFD):
def EXE_NAME(cls):
return cls.get_basename()
- @property
+ @functools.cached_property
def exe(self):
return self.EXE_NAME
@@ -90,9 +97,11 @@ class ExternalFD(FragmentFD):
@classmethod
def supports(cls, info_dict):
- return (
- (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
- and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
+ return all((
+ not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
+ '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
+ all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
+ ))
@classmethod
def can_download(cls, info_dict, path=None):
@@ -119,29 +128,31 @@ class ExternalFD(FragmentFD):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
- p = Popen(cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate_or_kill()
- if p.returncode != 0:
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
+ _, stderr, returncode = Popen.run(
+ cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
+ if returncode and stderr:
+ self.to_stderr(stderr)
+ return returncode
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
count = 0
while count <= fragment_retries:
- p = Popen(cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate_or_kill()
- if p.returncode == 0:
+ _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
+ if not returncode:
break
+
# TODO: Decide whether to retry based on error code
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
- self.to_stderr(stderr.decode('utf-8', 'replace'))
+ if stderr:
+ self.to_stderr(stderr)
count += 1
if count <= fragment_retries:
self.to_screen(
'[%s] Got error. Retrying fragments (attempt %d of %s)...'
% (self.get_basename(), count, self.format_retries(fragment_retries)))
+ self.sleep_retry('fragment', count)
if count > fragment_retries:
if not skip_unavailable_fragments:
self.report_error('Giving up after %s fragment retries' % fragment_retries)
@@ -170,6 +181,7 @@ class ExternalFD(FragmentFD):
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
+ _CAPTURE_STDERR = False # curl writes the progress to stderr
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
@@ -194,16 +206,6 @@ class CurlFD(ExternalFD):
cmd += ['--', info_dict['url']]
return cmd
- def _call_downloader(self, tmpfilename, info_dict):
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- # curl writes the progress to stderr so don't capture it.
- p = Popen(cmd)
- p.communicate_or_kill()
- return p.returncode
-
class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
@@ -322,7 +324,7 @@ class HttpieFD(ExternalFD):
class FFmpegFD(ExternalFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
- can_download_to_stdout = True
+ SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
@classmethod
def available(cls, path=None):
@@ -330,10 +332,6 @@ class FFmpegFD(ExternalFD):
# Fixme: This may be wrong when --ffmpeg-location is used
return FFmpegPostProcessor().available
- @classmethod
- def supports(cls, info_dict):
- return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
-
def on_process_started(self, proc, stdin):
""" Override this in subclasses """
pass
@@ -378,13 +376,6 @@ class FFmpegFD(ExternalFD):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
- # start_time = info_dict.get('start_time') or 0
- # if start_time:
- # args += ['-ss', compat_str(start_time)]
- # end_time = info_dict.get('end_time')
- # if end_time:
- # args += ['-t', compat_str(end_time - start_time)]
-
http_headers = None
if info_dict.get('http_headers'):
youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
@@ -411,8 +402,8 @@ class FFmpegFD(ExternalFD):
# We could switch to the following code if we are able to detect version properly
# args += ['-http_proxy', proxy]
env = os.environ.copy()
- compat_setenv('HTTP_PROXY', proxy, env=env)
- compat_setenv('http_proxy', proxy, env=env)
+ env['HTTP_PROXY'] = proxy
+ env['http_proxy'] = proxy
protocol = info_dict.get('protocol')
@@ -442,25 +433,31 @@ class FFmpegFD(ExternalFD):
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
- elif isinstance(conn, compat_str):
+ elif isinstance(conn, str):
args += ['-rtmp_conn', conn]
+ start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
+
for i, url in enumerate(urls):
- # We need to specify headers for each http input stream
- # otherwise, it will only be applied to the first.
- # https://github.com/yt-dlp/yt-dlp/issues/2696
if http_headers is not None and re.match(r'^https?://', url):
args += http_headers
+ if start_time:
+ args += ['-ss', str(start_time)]
+ if end_time:
+ args += ['-t', str(end_time - start_time)]
+
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
- args += ['-c', 'copy']
+ if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
+ args += ['-c', 'copy']
+
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}'])
if self.params.get('test', False):
- args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+ args += ['-fs', str(self._TEST_FILE_SIZE)]
ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
@@ -495,24 +492,23 @@ class FFmpegFD(ExternalFD):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
- proc = Popen(args, stdin=subprocess.PIPE, env=env)
- if url in ('-', 'pipe:'):
- self.on_process_started(proc, proc.stdin)
- try:
- retval = proc.wait()
- except BaseException as e:
- # subprocces.run would send the SIGKILL signal to ffmpeg and the
- # mp4 file couldn't be played, but if we ask ffmpeg to quit it
- # produces a file that is playable (this is mostly useful for live
- # streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
- if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
- proc.communicate_or_kill(b'q')
- else:
- proc.kill()
- proc.wait()
- raise
- return retval
+ with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
+ if url in ('-', 'pipe:'):
+ self.on_process_started(proc, proc.stdin)
+ try:
+ retval = proc.wait()
+ except BaseException as e:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
+ proc.communicate_or_kill(b'q')
+ else:
+ proc.kill(timeout=None)
+ raise
+ return retval
class AVconvFD(FFmpegFD):
diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py
index 3629d63f5..770354de7 100644
--- a/yt_dlp/downloader/f4m.py
+++ b/yt_dlp/downloader/f4m.py
@@ -1,17 +1,13 @@
+import base64
import io
import itertools
+import struct
import time
+import urllib.error
+import urllib.parse
from .fragment import FragmentFD
-from ..compat import (
- compat_b64decode,
- compat_etree_fromstring,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring
from ..utils import fix_xml_ampersands, xpath_text
@@ -35,13 +31,13 @@ class FlvReader(io.BytesIO):
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
- return compat_struct_unpack('!Q', self.read_bytes(8))[0]
+ return struct.unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
- return compat_struct_unpack('!I', self.read_bytes(4))[0]
+ return struct.unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
- return compat_struct_unpack('!B', self.read_bytes(1))[0]
+ return struct.unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
@@ -203,11 +199,11 @@ def build_fragments_list(boot_info):
def write_unsigned_int(stream, val):
- stream.write(compat_struct_pack('!I', val))
+ stream.write(struct.pack('!I', val))
def write_unsigned_int_24(stream, val):
- stream.write(compat_struct_pack('!I', val)[1:])
+ stream.write(struct.pack('!I', val)[1:])
def write_flv_header(stream):
@@ -301,12 +297,12 @@ class F4mFD(FragmentFD):
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
bootstrap_url = node.get('url')
if bootstrap_url:
- bootstrap_url = compat_urlparse.urljoin(
+ bootstrap_url = urllib.parse.urljoin(
base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
- bootstrap = compat_b64decode(node.text)
+ bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
@@ -336,14 +332,14 @@ class F4mFD(FragmentFD):
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
- base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
+ base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
- metadata = compat_b64decode(metadata_node.text)
+ metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
@@ -371,7 +367,7 @@ class F4mFD(FragmentFD):
if not live:
write_metadata_tag(dest_stream, metadata)
- base_url_parsed = compat_urllib_parse_urlparse(base_url)
+ base_url_parsed = urllib.parse.urlparse(base_url)
self._start_frag_download(ctx, info_dict)
@@ -391,9 +387,10 @@ class F4mFD(FragmentFD):
query.append(info_dict['extra_param_to_segment_url'])
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
try:
- success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+ success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
if not success:
return False
+ down_data = self._read_fragment(ctx)
reader = FlvReader(down_data)
while True:
try:
@@ -410,7 +407,7 @@ class F4mFD(FragmentFD):
if box_type == b'mdat':
self._append_fragment(ctx, box_data)
break
- except compat_urllib_error.HTTPError as err:
+ except urllib.error.HTTPError as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index 4655f067f..3535e0e7d 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -4,12 +4,14 @@ import http.client
import json
import math
import os
+import struct
import time
+import urllib.error
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
-from ..compat import compat_os_name, compat_struct_pack, compat_urllib_error
+from ..compat import compat_os_name
from ..utils import (
DownloadError,
encodeFilename,
@@ -23,11 +25,7 @@ class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
- console_title = to_screen
-
- def report_retry(self, err, count, retries):
- super().to_screen(
- f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
+ to_console_title = to_screen
class FragmentFD(FileDownloader):
@@ -70,6 +68,7 @@ class FragmentFD(FileDownloader):
self.to_screen(
'\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
+ self.sleep_retry('fragment', count)
def report_skip_fragment(self, frag_index, err=None):
err = f' {err};' if err else ''
@@ -168,18 +167,11 @@ class FragmentFD(FileDownloader):
total_frags_str = 'unknown (live)'
self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
self.report_destination(ctx['filename'])
- dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': self.params.get('continuedl', True),
- 'quiet': self.params.get('quiet'),
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit'),
- 'retries': self.params.get('retries', 0),
- 'nopart': self.params.get('nopart', False),
- 'test': False,
- }
- )
+ dl = HttpQuietDownloader(self.ydl, {
+ **self.params,
+ 'noprogress': True,
+ 'test': False,
+ })
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
resume_len = 0
@@ -252,6 +244,9 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
+ if not total_frags and ctx.get('fragment_count'):
+ state['fragment_count'] = ctx['fragment_count']
+
if ctx_id is not None and s.get('ctx_id') != ctx_id:
return
@@ -355,7 +350,7 @@ class FragmentFD(FileDownloader):
decrypt_info = fragment.get('decrypt_info')
if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
return frag_content
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
+ iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence'])
decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
@@ -460,10 +455,11 @@ class FragmentFD(FileDownloader):
fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
while count <= fragment_retries:
try:
+ ctx['fragment_count'] = fragment.get('fragment_count')
if self._download_fragment(ctx, fragment['url'], info_dict, headers):
break
return
- except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
+ except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
@@ -506,12 +502,20 @@ class FragmentFD(FileDownloader):
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
- for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
- ctx['fragment_filename_sanitized'] = frag_filename
- ctx['fragment_index'] = frag_index
- result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
- if not result:
- return False
+ try:
+ for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ ctx.update({
+ 'fragment_filename_sanitized': frag_filename,
+ 'fragment_index': frag_index,
+ })
+ if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
+ return False
+ except KeyboardInterrupt:
+ self._finish_multiline_status()
+ self.report_error(
+ 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
+ pool.shutdown(wait=False)
+ raise
else:
for fragment in fragments:
if not interrupt_trigger[0]:
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index 0bd2f121c..f54b3f473 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -1,13 +1,13 @@
import binascii
import io
import re
+import urllib.parse
+from . import get_suitable_downloader
from .external import FFmpegFD
from .fragment import FragmentFD
from .. import webvtt
-from ..compat import compat_urlparse
from ..dependencies import Cryptodome_AES
-from ..downloader import get_suitable_downloader
from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query
@@ -61,12 +61,18 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
- if can_download and not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s:
- if FFmpegFD.available():
+ if can_download:
+ has_ffmpeg = FFmpegFD.available()
+ no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s
+ if no_crypto and has_ffmpeg:
can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available'
- else:
+ elif no_crypto:
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; '
'Decryption will be performed natively, but will be extremely slow')
+ elif re.search(r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
+ install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
+ message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
+ f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download:
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
@@ -140,7 +146,7 @@ class HlsFD(FragmentFD):
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
- extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@@ -162,7 +168,7 @@ class HlsFD(FragmentFD):
frag_url = (
line
if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
+ else urllib.parse.urljoin(man_url, line))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@@ -187,7 +193,7 @@ class HlsFD(FragmentFD):
frag_url = (
map_info.get('URI')
if re.match(r'^https?://', map_info.get('URI'))
- else compat_urlparse.urljoin(man_url, map_info.get('URI')))
+ else urllib.parse.urljoin(man_url, map_info.get('URI')))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@@ -215,7 +221,7 @@ class HlsFD(FragmentFD):
if 'IV' in decrypt_info:
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
if not re.match(r'^https?://', decrypt_info['URI']):
- decrypt_info['URI'] = compat_urlparse.urljoin(
+ decrypt_info['URI'] = urllib.parse.urljoin(
man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index 12a2f0cc7..6b59320b8 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -1,11 +1,12 @@
+import http.client
import os
import random
import socket
import ssl
import time
+import urllib.error
from .common import FileDownloader
-from ..compat import compat_http_client, compat_urllib_error
from ..utils import (
ContentTooShortError,
ThrottledDownload,
@@ -24,7 +25,7 @@ RESPONSE_READ_EXCEPTIONS = (
socket.timeout, # compat: py < 3.10
ConnectionError,
ssl.SSLError,
- compat_http_client.HTTPException
+ http.client.HTTPException
)
@@ -136,20 +137,18 @@ class HttpFD(FileDownloader):
if has_range:
content_range = ctx.data.headers.get('Content-Range')
content_range_start, content_range_end, content_len = parse_http_range(content_range)
- if content_range_start is not None and range_start == content_range_start:
- # Content-Range is present and matches requested Range, resume is possible
- accept_content_len = (
+ # Content-Range is present and matches requested Range, resume is possible
+ if range_start == content_range_start and (
# Non-chunked download
not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
or content_range_end == range_end
- or content_len < range_end)
- if accept_content_len:
- ctx.content_len = content_len
- if content_len or req_end:
- ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
- return
+ or content_len < range_end):
+ ctx.content_len = content_len
+ if content_len or req_end:
+ ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
+ return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
@@ -157,7 +156,7 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
- except compat_urllib_error.HTTPError as err:
+ except urllib.error.HTTPError as err:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
@@ -165,7 +164,7 @@ class HttpFD(FileDownloader):
ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
- except compat_urllib_error.HTTPError as err:
+ except urllib.error.HTTPError as err:
if err.code < 500 or err.code >= 600:
raise
else:
@@ -198,7 +197,7 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
- except compat_urllib_error.URLError as err:
+ except urllib.error.URLError as err:
if isinstance(err.reason, ssl.CertificateError):
raise
raise RetryDownload(err)
diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index 9efc5e4d9..8a0071ab3 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -2,9 +2,9 @@ import binascii
import io
import struct
import time
+import urllib.error
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
u8 = struct.Struct('>B')
u88 = struct.Struct('>Bx')
@@ -268,7 +268,7 @@ class IsmFD(FragmentFD):
extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
break
- except compat_urllib_error.HTTPError as err:
+ except urllib.error.HTTPError as err:
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py
index 5e9dda03d..77ed39e5b 100644
--- a/yt_dlp/downloader/niconico.py
+++ b/yt_dlp/downloader/niconico.py
@@ -1,8 +1,7 @@
import threading
+from . import get_suitable_downloader
from .common import FileDownloader
-from ..downloader import get_suitable_downloader
-from ..extractor.niconico import NiconicoIE
from ..utils import sanitized_Request
@@ -10,8 +9,9 @@ class NiconicoDmcFD(FileDownloader):
""" Downloading niconico douga from DMC with heartbeat """
def real_download(self, filename, info_dict):
- self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+ from ..extractor.niconico import NiconicoIE
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py
index 3464eeef9..0e0952599 100644
--- a/yt_dlp/downloader/rtmp.py
+++ b/yt_dlp/downloader/rtmp.py
@@ -4,7 +4,6 @@ import subprocess
import time
from .common import FileDownloader
-from ..compat import compat_str
from ..utils import (
Popen,
check_executable,
@@ -92,8 +91,7 @@ class RtmpFD(FileDownloader):
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
- proc.kill()
- proc.wait()
+ proc.kill(timeout=None)
raise
url = info_dict['url']
@@ -144,7 +142,7 @@ class RtmpFD(FileDownloader):
if isinstance(conn, list):
for entry in conn:
basic_args += ['--conn', entry]
- elif isinstance(conn, compat_str):
+ elif isinstance(conn, str):
basic_args += ['--conn', conn]
if protocol is not None:
basic_args += ['--protocol', protocol]
diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py
index cc528029d..5334c6c95 100644
--- a/yt_dlp/downloader/youtube_live_chat.py
+++ b/yt_dlp/downloader/youtube_live_chat.py
@@ -1,9 +1,8 @@
import json
import time
+import urllib.error
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
@@ -26,7 +25,9 @@ class YoutubeLiveChatFD(FragmentFD):
'total_frags': None,
}
- ie = YT_BaseIE(self.ydl)
+ from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+ ie = YoutubeBaseInfoExtractor(self.ydl)
start_time = int(time.time() * 1000)
@@ -127,7 +128,7 @@ class YoutubeLiveChatFD(FragmentFD):
elif info_dict['protocol'] == 'youtube_live_chat':
continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
return True, continuation_id, offset, click_tracking_params
- except compat_urllib_error.HTTPError as err:
+ except urllib.error.HTTPError as err:
count += 1
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py
index afd3d05ac..6bfa4bd7b 100644
--- a/yt_dlp/extractor/__init__.py
+++ b/yt_dlp/extractor/__init__.py
@@ -1,32 +1,15 @@
-import contextlib
-import os
+from ..compat.compat_utils import passthrough_module
-from ..utils import load_plugins
-
-_LAZY_LOADER = False
-if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
- with contextlib.suppress(ImportError):
- from .lazy_extractors import * # noqa: F403
- from .lazy_extractors import _ALL_CLASSES
- _LAZY_LOADER = True
-
-if not _LAZY_LOADER:
- from .extractors import * # noqa: F403
- _ALL_CLASSES = [ # noqa: F811
- klass
- for name, klass in globals().items()
- if name.endswith('IE') and name != 'GenericIE'
- ]
- _ALL_CLASSES.append(GenericIE) # noqa: F405
-
-_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
-_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
+passthrough_module(__name__, '.extractors')
+del passthrough_module
def gen_extractor_classes():
""" Return a list of supported extractors.
The order does matter; the first extractor matched is the one handling the URL.
"""
+ from .extractors import _ALL_CLASSES
+
return _ALL_CLASSES
@@ -39,10 +22,12 @@ def gen_extractors():
def list_extractor_classes(age_limit=None):
"""Return a list of extractors that are suitable for the given age, sorted by extractor name"""
+ from .generic import GenericIE
+
yield from sorted(filter(
- lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405
+ lambda ie: ie.is_suitable(age_limit) and ie != GenericIE,
gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower())
- yield GenericIE # noqa: F405
+ yield GenericIE
def list_extractors(age_limit=None):
@@ -52,4 +37,6 @@ def list_extractors(age_limit=None):
def get_info_extractor(ie_name):
"""Returns the info extractor class with the given ie_name"""
- return globals()[ie_name + 'IE']
+ from . import extractors
+
+ return getattr(extractors, f'{ie_name}IE')
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
new file mode 100644
index 000000000..37328dfc8
--- /dev/null
+++ b/yt_dlp/extractor/_extractors.py
@@ -0,0 +1,2198 @@
+# flake8: noqa: F401
+
+from .abc import (
+ ABCIE,
+ ABCIViewIE,
+ ABCIViewShowSeriesIE,
+)
+from .abcnews import (
+ AbcNewsIE,
+ AbcNewsVideoIE,
+)
+from .abcotvs import (
+ ABCOTVSIE,
+ ABCOTVSClipsIE,
+)
+from .abematv import (
+ AbemaTVIE,
+ AbemaTVTitleIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
+from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
+from .adobetv import (
+ AdobeTVEmbedIE,
+ AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
+ AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import (
+ AENetworksIE,
+ AENetworksCollectionIE,
+ AENetworksShowIE,
+ HistoryTopicIE,
+ HistoryPlayerIE,
+ BiographyIE,
+)
+from .afreecatv import (
+ AfreecaTVIE,
+ AfreecaTVLiveIE,
+ AfreecaTVUserIE,
+)
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .amara import AmaraIE
+from .alura import (
+ AluraIE,
+ AluraCourseIE
+)
+from .amcnetworks import AMCNetworksIE
+from .amazon import AmazonStoreIE
+from .americastestkitchen import (
+ AmericasTestKitchenIE,
+ AmericasTestKitchenSeasonIE,
+)
+from .animeondemand import AnimeOnDemandIE
+from .anvato import AnvatoIE
+from .aol import AolIE
+from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
+from .alsace20tv import (
+ Alsace20TVIE,
+ Alsace20TVEmbedIE,
+)
+from .apa import APAIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
+from .applepodcasts import ApplePodcastsIE
+from .archiveorg import (
+ ArchiveOrgIE,
+ YoutubeWebArchiveIE,
+)
+from .arcpublishing import ArcPublishingIE
+from .arkena import ArkenaIE
+from .ard import (
+ ARDBetaMediathekIE,
+ ARDIE,
+ ARDMediathekIE,
+)
+from .arte import (
+ ArteTVIE,
+ ArteTVEmbedIE,
+ ArteTVPlaylistIE,
+ ArteTVCategoryIE,
+)
+from .arnes import ArnesIE
+from .asiancrush import (
+ AsianCrushIE,
+ AsianCrushPlaylistIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atscaleconf import AtScaleConfEventIE
+from .atttechchannel import ATTTechChannelIE
+from .atvat import ATVAtIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .audius import (
+ AudiusIE,
+ AudiusTrackIE,
+ AudiusPlaylistIE,
+ AudiusProfileIE,
+)
+from .awaan import (
+ AWAANIE,
+ AWAANVideoIE,
+ AWAANLiveIE,
+ AWAANSeasonIE,
+)
+from .azmedien import AZMedienIE
+from .baidu import BaiduVideoIE
+from .banbye import (
+ BanByeIE,
+ BanByeChannelIE,
+)
+from .bandaichannel import BandaiChannelIE
+from .bandcamp import (
+ BandcampIE,
+ BandcampAlbumIE,
+ BandcampWeeklyIE,
+ BandcampUserIE,
+)
+from .bannedvideo import BannedVideoIE
+from .bbc import (
+ BBCCoUkIE,
+ BBCCoUkArticleIE,
+ BBCCoUkIPlayerEpisodesIE,
+ BBCCoUkIPlayerGroupIE,
+ BBCCoUkPlaylistIE,
+ BBCIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
+from .beatport import BeatportIE
+from .bet import BetIE
+from .bfi import BFIPlayerIE
+from .bfmtv import (
+ BFMTVIE,
+ BFMTVLiveIE,
+ BFMTVArticleIE,
+)
+from .bibeltv import BibelTVIE
+from .bigflix import BigflixIE
+from .bigo import BigoIE
+from .bild import BildIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliSearchIE,
+ BilibiliCategoryIE,
+ BiliBiliBangumiIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
+ BilibiliChannelIE,
+ BiliIntlIE,
+ BiliIntlSeriesIE,
+ BiliLiveIE,
+)
+from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+ BitChuteIE,
+ BitChuteChannelIE,
+)
+from .bitwave import (
+ BitwaveReplayIE,
+ BitwaveStreamIE,
+)
+from .biqle import BIQLEIE
+from .blackboardcollaborate import BlackboardCollaborateIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
+from .blogger import BloggerIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bongacams import BongaCamsIE
+from .bostonglobe import BostonGlobeIE
+from .box import BoxIE
+from .bpb import BpbIE
+from .br import (
+ BRIE,
+ BRMediathekIE,
+)
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .breitbart import BreitBartIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .businessinsider import BusinessInsiderIE
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .cableav import CableAVIE
+from .callin import CallinIE
+from .caltrans import CaltransIE
+from .cam4 import CAM4IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
+from .cammodels import CamModelsIE
+from .camwithher import CamWithHerIE
+from .canalalpha import CanalAlphaIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import (
+ CanvasIE,
+ CanvasEenIE,
+ VrtNUIE,
+ DagelijkseKostIE,
+)
+from .carambatv import (
+ CarambaTVIE,
+ CarambaTVPageIE,
+)
+from .cartoonnetwork import CartoonNetworkIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+ CBCGemIE,
+ CBCGemPlaylistIE,
+ CBCGemLiveIE,
+)
+from .cbs import CBSIE
+from .cbslocal import (
+ CBSLocalIE,
+ CBSLocalArticleIE,
+)
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+ CBSNewsEmbedIE,
+ CBSNewsIE,
+ CBSNewsLiveVideoIE,
+)
+from .cbssports import (
+ CBSSportsEmbedIE,
+ CBSSportsIE,
+ TwentyFourSevenSportsIE,
+)
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
+from .ccma import CCMAIE
+from .cctv import CCTVIE
+from .cda import CDAIE
+from .ceskatelevize import CeskaTelevizeIE
+from .cgtn import CGTNIE
+from .channel9 import Channel9IE
+from .charlierose import CharlieRoseIE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chingari import (
+ ChingariIE,
+ ChingariUserIE,
+)
+from .chirbit import (
+ ChirbitIE,
+ ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
+from .ciscowebex import CiscoWebexIE
+from .cjsw import CJSWIE
+from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
+from .cliprs import ClipRsIE
+from .clipsyndicate import ClipsyndicateIE
+from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import (
+ CNBCIE,
+ CNBCVideoIE,
+)
+from .cnn import (
+ CNNIE,
+ CNNBlogsIE,
+ CNNArticleIE,
+)
+from .coub import CoubIE
+from .comedycentral import (
+ ComedyCentralIE,
+ ComedyCentralTVIE,
+)
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+ ViewSourceIE,
+)
+from .condenast import CondeNastIE
+from .contv import CONtvIE
+from .corus import CorusIE
+from .cpac import (
+ CPACIE,
+ CPACPlaylistIE,
+)
+from .cozytv import CozyTVIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .craftsy import CraftsyIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crowdbunker import (
+ CrowdBunkerIE,
+ CrowdBunkerChannelIE,
+)
+from .crunchyroll import (
+ CrunchyrollIE,
+ CrunchyrollShowPlaylistIE,
+ CrunchyrollBetaIE,
+ CrunchyrollBetaShowIE,
+)
+from .cspan import CSpanIE, CSpanCongressIE
+from .ctsnews import CtsNewsIE
+from .ctv import CTVIE
+from .ctvnews import CTVNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionsIE,
+ CuriosityStreamSeriesIE,
+)
+from .cwtv import CWTVIE
+from .cybrary import (
+ CybraryIE,
+ CybraryCourseIE
+)
+from .daftsex import DaftsexIE
+from .dailymail import DailyMailIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+)
+from .dailywire import (
+ DailyWireIE,
+ DailyWirePodcastIE,
+)
+from .damtomo import (
+ DamtomoRecordIE,
+ DamtomoVideoIE,
+)
+from .daum import (
+ DaumIE,
+ DaumClipIE,
+ DaumPlaylistIE,
+ DaumUserIE,
+)
+from .daystar import DaystarClipIE
+from .dbtv import DBTVIE
+from .dctp import DctpTvIE
+from .deezer import (
+ DeezerPlaylistIE,
+ DeezerAlbumIE,
+)
+from .democracynow import DemocracynowIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digg import DiggIE
+from .dotsub import DotsubIE
+from .douyutv import (
+ DouyuShowIE,
+ DouyuTVIE,
+)
+from .dplay import (
+ DPlayIE,
+ DiscoveryPlusIE,
+ HGTVDeIE,
+ GoDiscoveryIE,
+ TravelChannelIE,
+ CookingChannelIE,
+ HGTVUsaIE,
+ FoodNetworkIE,
+ InvestigationDiscoveryIE,
+ DestinationAmericaIE,
+ AmHistoryChannelIE,
+ ScienceChannelIE,
+ DIYNetworkIE,
+ DiscoveryLifeIE,
+ AnimalPlanetIE,
+ TLCIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryNetworksDeIE,
+ DiscoveryPlusItalyIE,
+ DiscoveryPlusItalyShowIE,
+ DiscoveryPlusIndiaShowIE,
+)
+from .dreisat import DreiSatIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+)
+from .dtube import DTubeIE
+from .dvtv import DVTVIE
+from .duboku import (
+ DubokuIE,
+ DubokuPlaylistIE
+)
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .digitalconcerthall import DigitalConcertHallIE
+from .discovery import DiscoveryIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
+from .doodstream import DoodStreamIE
+from .dropbox import DropboxIE
+from .dropout import (
+ DropoutSeasonIE,
+ DropoutIE
+)
+from .dw import (
+ DWIE,
+ DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentube import (
+ EllenTubeIE,
+ EllenTubeVideoIE,
+ EllenTubePlaylistIE,
+)
+from .elonet import ElonetIE
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .epicon import (
+ EpiconIE,
+ EpiconSeriesIE,
+)
+from .eporner import EpornerIE
+from .eroprofile import (
+ EroProfileIE,
+ EroProfileAlbumIE,
+)
+from .ertgr import (
+ ERTFlixCodenameIE,
+ ERTFlixIE,
+ ERTWebtvEmbedIE,
+)
+from .escapist import EscapistIE
+from .espn import (
+ ESPNIE,
+ WatchESPNIE,
+ ESPNArticleIE,
+ FiveThirtyEightIE,
+ ESPNCricInfoIE,
+)
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .europeantour import EuropeanTourIE
+from .euscreen import EUScreenIE
+from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
+from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+ FacebookRedirectURLIE,
+)
+from .fancode import (
+ FancodeVodIE,
+ FancodeLiveIE
+)
+
+from .faz import FazIE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+ FC2LiveIE,
+)
+from .fczenit import FczenitIE
+from .fifa import FifaIE
+from .filmmodu import FilmmoduIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
+from .filmweb import FilmwebIE
+from .firsttv import FirstTVIE
+from .fivetv import FiveTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
+from .fourzerostudio import (
+ FourZeroStudioArchiveIE,
+ FourZeroStudioClipIE,
+)
+from .fox import FOXIE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
+from .foxgay import FoxgayIE
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsArticleIE,
+)
+from .foxsports import FoxSportsIE
+from .fptplay import FptplayIE
+from .franceinter import FranceInterIE
+from .francetv import (
+ FranceTVIE,
+ FranceTVSiteIE,
+ FranceTVInfoIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .frontendmasters import (
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+ FrontendMastersCourseIE
+)
+from .freetv import (
+ FreeTvIE,
+ FreeTvMoviesIE,
+)
+from .fujitv import FujiTVFODPlus7IE
+from .funimation import (
+ FunimationIE,
+ FunimationPageIE,
+ FunimationShowIE,
+)
+from .funk import FunkIE
+from .fusion import FusionIE
+from .fuyintv import FuyinTVIE
+from .gab import (
+ GabTVIE,
+ GabIE,
+)
+from .gaia import GaiaIE
+from .gameinformer import GameInformerIE
+from .gamejolt import (
+ GameJoltIE,
+ GameJoltUserIE,
+ GameJoltGameIE,
+ GameJoltGameSoundtrackIE,
+ GameJoltCommunityIE,
+ GameJoltSearchIE,
+)
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .gedidigital import GediDigitalIE
+from .generic import GenericIE
+from .gettr import (
+ GettrIE,
+ GettrStreamingIE,
+)
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+ GloboIE,
+ GloboArticleIE,
+)
+from .go import GoIE
+from .godtube import GodTubeIE
+from .gofile import GofileIE
+from .golem import GolemIE
+from .goodgame import GoodGameIE
+from .googledrive import (
+ GoogleDriveIE,
+ GoogleDriveFolderIE,
+)
+from .googlepodcasts import (
+ GooglePodcastsIE,
+ GooglePodcastsFeedIE,
+)
+from .googlesearch import GoogleSearchIE
+from .gopro import GoProIE
+from .goshgay import GoshgayIE
+from .gotostage import GoToStageIE
+from .gputechconf import GPUTechConfIE
+from .gronkh import (
+ GronkhIE,
+ GronkhFeedIE,
+ GronkhVodsIE
+)
+from .groupon import GrouponIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
+from .hidive import HiDiveIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hitrecord import HitRecordIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPrefixIE,
+ HotStarPlaylistIE,
+ HotStarSeriesIE,
+)
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .hrfensehen import HRFernsehenIE
+from .hrti import (
+ HRTiIE,
+ HRTiPlaylistIE,
+)
+from .hse import (
+ HSEShowIE,
+ HSEProductIE,
+)
+from .huajiao import HuajiaoIE
+from .huya import HuyaLiveIE
+from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+ HungamaAlbumPlaylistIE,
+)
+from .hypem import HypemIE
+from .icareus import IcareusIE
+from .ichinanalive import (
+ IchinanaLiveIE,
+ IchinanaLiveClipIE,
+)
+from .ign import (
+ IGNIE,
+ IGNVideoIE,
+ IGNArticleIE,
+)
+from .iheart import (
+ IHeartRadioIE,
+ IHeartRadioPodcastIE,
+)
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
+from .imgur import (
+ ImgurIE,
+ ImgurAlbumIE,
+ ImgurGalleryIE,
+)
+from .ina import InaIE
+from .inc import IncIE
+from .indavideo import IndavideoEmbedIE
+from .infoq import InfoQIE
+from .instagram import (
+ InstagramIE,
+ InstagramIOSIE,
+ InstagramUserIE,
+ InstagramTagIE,
+ InstagramStoryIE,
+)
+from .internazionale import InternazionaleIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import (
+ IPrimaIE,
+ IPrimaCNNIE
+)
+from .iqiyi import (
+ IqiyiIE,
+ IqIE,
+ IqAlbumIE
+)
+from .itprotv import (
+ ITProTVIE,
+ ITProTVCourseIE
+)
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .iwara import (
+ IwaraIE,
+ IwaraPlaylistIE,
+ IwaraUserIE,
+)
+from .ixigua import IxiguaIE
+from .izlesene import IzleseneIE
+from .jable import (
+ JableIE,
+ JablePlaylistIE,
+)
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .joj import JojIE
+from .jwplatform import JWPlatformIE
+from .kakao import KakaoIE
+from .kaltura import KalturaIE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .kelbyone import KelbyOneIE
+from .ketnet import KetnetIE
+from .khanacademy import (
+ KhanAcademyIE,
+ KhanAcademyUnitIE,
+)
+from .kicker import KickerIE
+from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
+from .kinopoisk import KinoPoiskIE
+from .konserthusetplay import KonserthusetPlayIE
+from .koo import KooIE
+from .kth import KTHIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
+from .la7 import (
+ LA7IE,
+ LA7PodcastEpisodeIE,
+ LA7PodcastIE,
+)
+from .laola1tv import (
+ Laola1TvEmbedIE,
+ Laola1TvIE,
+ EHFTVIE,
+ ITTFIE,
+)
+from .lastfm import (
+ LastFMIE,
+ LastFMPlaylistIE,
+ LastFMUserIE,
+)
+from .lbry import (
+ LBRYIE,
+ LBRYChannelIE,
+)
+from .lci import LCIIE
+from .lcp import (
+ LcpPlayIE,
+ LcpIE,
+)
+from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
+from .leeco import (
+ LeIE,
+ LePlaylistIE,
+ LetvCloudIE,
+)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
+from .likee import (
+ LikeeIE,
+ LikeeUserIE
+)
+from .limelight import (
+ LimelightMediaIE,
+ LimelightChannelIE,
+ LimelightChannelListIE,
+)
+from .line import (
+ LineLiveIE,
+ LineLiveChannelIE,
+)
+from .linkedin import (
+ LinkedInIE,
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .linuxacademy import LinuxAcademyIE
+from .litv import LiTVIE
+from .livejournal import LiveJournalIE
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
+from .lnkgo import (
+ LnkGoIE,
+ LnkIE,
+)
+from .localnews8 import LocalNews8IE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import (
+ LRTVODIE,
+ LRTStreamIE
+)
+from .lynda import (
+ LyndaIE,
+ LyndaCourseIE
+)
+from .m6 import M6IE
+from .magentamusik360 import MagentaMusik360IE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
+from .mainstreaming import MainStreamingIE
+from .malltv import MallTVIE
+from .mangomolo import (
+ MangomoloVideoIE,
+ MangomoloLiveIE,
+)
+from .manoto import (
+ ManotoTVIE,
+ ManotoTVShowIE,
+ ManotoTVLiveIE,
+)
+from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
+from .massengeschmacktv import MassengeschmackTVIE
+from .masters import MastersIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .medaltv import MedalTVIE
+from .mediaite import MediaiteIE
+from .mediaklikk import MediaKlikkIE
+from .mediaset import (
+ MediasetIE,
+ MediasetShowIE,
+)
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
+from .medici import MediciIE
+from .megaphone import MegaphoneIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
+from .meta import METAIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .miaopai import MiaoPaiIE
+from .microsoftstream import MicrosoftStreamIE
+from .microsoftvirtualacademy import (
+ MicrosoftVirtualAcademyIE,
+ MicrosoftVirtualAcademyCourseIE,
+)
+from .mildom import (
+ MildomIE,
+ MildomVodIE,
+ MildomClipIE,
+ MildomUserVodIE,
+)
+from .minds import (
+ MindsIE,
+ MindsChannelIE,
+ MindsGroupIE,
+)
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mirrativ import (
+ MirrativIE,
+ MirrativUserIE,
+)
+from .mirrorcouk import MirrorCoUKIE
+from .mit import TechTVMITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixch import (
+ MixchIE,
+ MixchArchiveIE,
+)
+from .mixcloud import (
+ MixcloudIE,
+ MixcloudUserIE,
+ MixcloudPlaylistIE,
+)
+from .mlb import (
+ MLBIE,
+ MLBVideoIE,
+)
+from .mlssoccer import MLSSoccerIE
+from .mnet import MnetIE
+from .moevideo import MoeVideoIE
+from .mofosex import (
+ MofosexIE,
+ MofosexEmbedIE,
+)
+from .mojvideo import MojvideoIE
+from .morningstar import MorningstarIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE
+)
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviepilot import MoviepilotIE
+from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
+from .msn import MSNIE
+from .mtv import (
+ MTVIE,
+ MTVVideoIE,
+ MTVServicesEmbeddedIE,
+ MTVDEIE,
+ MTVJapanIE,
+ MTVItaliaIE,
+ MTVItaliaProgrammaIE,
+)
+from .muenchentv import MuenchenTVIE
+from .murrtube import MurrtubeIE, MurrtubeUserIE
+from .musescore import MuseScoreIE
+from .musicdex import (
+ MusicdexSongIE,
+ MusicdexAlbumIE,
+ MusicdexArtistIE,
+ MusicdexPlaylistIE,
+)
+from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mxplayer import (
+ MxplayerIE,
+ MxplayerShowIE,
+)
+from .mychannels import MyChannelsIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import (
+ MyviIE,
+ MyviEmbedIE,
+)
+from .myvideoge import MyVideoGeIE
+from .myvidster import MyVidsterIE
+from .n1 import (
+ N1InfoAssetIE,
+ N1InfoIIE,
+)
+from .nate import (
+ NateIE,
+ NateProgramIE,
+)
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
+from .naver import (
+ NaverIE,
+ NaverLiveIE,
+ NaverNowIE,
+)
+from .nba import (
+ NBAWatchEmbedIE,
+ NBAWatchIE,
+ NBAWatchCollectionIE,
+ NBAEmbedIE,
+ NBAIE,
+ NBAChannelIE,
+)
+from .nbc import (
+ NBCIE,
+ NBCNewsIE,
+ NBCOlympicsIE,
+ NBCOlympicsStreamIE,
+ NBCSportsIE,
+ NBCSportsStreamIE,
+ NBCSportsVPlayerIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
+ NDREmbedBaseIE,
+ NDREmbedIE,
+ NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .nebula import (
+ NebulaIE,
+ NebulaSubscriptionsIE,
+ NebulaChannelIE,
+)
+from .nerdcubed import NerdCubedFeedIE
+from .netzkino import NetzkinoIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
+from .netverse import (
+ NetverseIE,
+ NetversePlaylistIE,
+)
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+ NewgroundsUserIE,
+)
+from .newstube import NewstubeIE
+from .newsy import NewsyIE
+from .nextmedia import (
+ NextMediaIE,
+ NextMediaActionNewsIE,
+ AppleDailyIE,
+ NextTVIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nfb import NFBIE
+from .nfhsnetwork import NFHSNetworkIE
+from .nfl import (
+ NFLIE,
+ NFLArticleIE,
+)
+from .nhk import (
+ NhkVodIE,
+ NhkVodProgramIE,
+ NhkForSchoolBangumiIE,
+ NhkForSchoolSubjectIE,
+ NhkForSchoolProgramListIE,
+)
+from .nhl import NHLIE
+from .nick import (
+ NickIE,
+ NickBrIE,
+ NickDeIE,
+ NickNightIE,
+ NickRuIE,
+)
+from .niconico import (
+ NiconicoIE,
+ NiconicoPlaylistIE,
+ NiconicoUserIE,
+ NiconicoSeriesIE,
+ NiconicoHistoryIE,
+ NicovideoSearchDateIE,
+ NicovideoSearchIE,
+ NicovideoSearchURLIE,
+ NicovideoTagURLIE,
+)
+from .ninecninemedia import (
+ NineCNineMediaIE,
+ CPTwentyFourIE,
+)
+from .ninegag import NineGagIE
+from .ninenow import NineNowIE
+from .nintendo import NintendoIE
+from .nitter import NitterIE
+from .njpwworld import NJPWWorldIE
+from .nobelprize import NobelPrizeIE
+from .nonktube import NonkTubeIE
+from .noodlemagazine import NoodleMagazineIE
+from .noovo import NoovoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nova import (
+ NovaEmbedIE,
+ NovaIE,
+)
+from .novaplay import NovaPlayIE
+from .nowness import (
+ NownessIE,
+ NownessPlaylistIE,
+ NownessSeriesIE,
+)
+from .noz import NozIE
+from .npo import (
+ AndereTijdenIE,
+ NPOIE,
+ NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
+ SchoolTVIE,
+ HetKlokhuisIE,
+ VPROIE,
+ WNLIE,
+)
+from .npr import NprIE
+from .nrk import (
+ NRKIE,
+ NRKPlaylistIE,
+ NRKSkoleIE,
+ NRKTVIE,
+ NRKTVDirekteIE,
+ NRKRadioPodkastIE,
+ NRKTVEpisodeIE,
+ NRKTVEpisodesIE,
+ NRKTVSeasonIE,
+ NRKTVSeriesIE,
+)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+ NYTimesCookingIE,
+)
+from .nuvid import NuvidIE
+from .nzherald import NZHeraldIE
+from .nzz import NZZIE
+from .odatv import OdaTVIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oktoberfesttv import OktoberfestTVIE
+from .olympics import OlympicsReplayIE
+from .on24 import On24IE
+from .ondemandkorea import OnDemandKoreaIE
+from .onefootball import OneFootballIE
+from .onet import (
+ OnetIE,
+ OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
+)
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+ OoyalaIE,
+ OoyalaExternalIE,
+)
+from .opencast import (
+ OpencastIE,
+ OpencastPlaylistIE,
+)
+from .openrec import (
+ OpenRecIE,
+ OpenRecCaptureIE,
+ OpenRecMovieIE,
+)
+from .ora import OraTVIE
+from .orf import (
+ ORFTVthekIE,
+ ORFFM4IE,
+ ORFFM4StoryIE,
+ ORFOE1IE,
+ ORFOE3IE,
+ ORFNOEIE,
+ ORFWIEIE,
+ ORFBGLIE,
+ ORFOOEIE,
+ ORFSTMIE,
+ ORFKTNIE,
+ ORFSBGIE,
+ ORFTIRIE,
+ ORFVBGIE,
+ ORFIPTVIE,
+)
+from .outsidetv import OutsideTVIE
+from .packtpub import (
+ PacktPubIE,
+ PacktPubCourseIE,
+)
+from .palcomp3 import (
+ PalcoMP3IE,
+ PalcoMP3ArtistIE,
+ PalcoMP3VideoIE,
+)
+from .pandoratv import PandoraTVIE
+from .panopto import (
+ PanoptoIE,
+ PanoptoListIE,
+ PanoptoPlaylistIE
+)
+from .paramountplus import (
+ ParamountPlusIE,
+ ParamountPlusSeriesIE,
+)
+from .parliamentliveuk import ParliamentLiveUKIE
+from .parlview import ParlviewIE
+from .patreon import (
+ PatreonIE,
+ PatreonUserIE
+)
+from .pbs import PBSIE
+from .pearvideo import PearVideoIE
+from .peekvids import PeekVidsIE, PlayVidsIE
+from .peertube import (
+ PeerTubeIE,
+ PeerTubePlaylistIE,
+)
+from .peertv import PeerTVIE
+from .peloton import (
+ PelotonIE,
+ PelotonLiveIE
+)
+from .people import PeopleIE
+from .performgroup import PerformGroupIE
+from .periscope import (
+ PeriscopeIE,
+ PeriscopeUserIE,
+)
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .piapro import PiaproIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
+from .piksel import PikselIE
+from .pinkbike import PinkbikeIE
+from .pinterest import (
+ PinterestIE,
+ PinterestCollectionIE,
+)
+from .pixivsketch import (
+ PixivSketchIE,
+ PixivSketchUserIE,
+)
+from .pladform import PladformIE
+from .planetmarathi import PlanetMarathiIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
+from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
+from .plays import PlaysTVIE
+from .playstuff import PlayStuffIE
+from .playsuisse import PlaySuisseIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .plutotv import PlutoTVIE
+from .pluralsight import (
+ PluralsightIE,
+ PluralsightCourseIE,
+)
+from .podchaser import PodchaserIE
+from .podomatic import PodomaticIE
+from .pokemon import (
+ PokemonIE,
+ PokemonWatchIE,
+)
+from .pokergo import (
+ PokerGoIE,
+ PokerGoCollectionIE,
+)
+from .polsatgo import PolsatGoIE
+from .polskieradio import (
+ PolskieRadioIE,
+ PolskieRadioCategoryIE,
+ PolskieRadioPlayerIE,
+ PolskieRadioPodcastIE,
+ PolskieRadioPodcastListIE,
+ PolskieRadioRadioKierowcowIE,
+)
+from .popcorntimes import PopcorntimesIE
+from .popcorntv import PopcornTVIE
+from .porn91 import Porn91IE
+from .porncom import PornComIE
+from .pornflip import PornFlipIE
+from .pornhd import PornHdIE
+from .pornhub import (
+ PornHubIE,
+ PornHubUserIE,
+ PornHubPlaylistIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .pornez import PornezIE
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
+)
+from .premiershiprugby import PremiershipRugbyIE
+from .presstv import PressTVIE
+from .projectveritas import ProjectVeritasIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .prx import (
+ PRXStoryIE,
+ PRXSeriesIE,
+ PRXAccountIE,
+ PRXStoriesSearchIE,
+ PRXSeriesSearchIE
+)
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+ QQMusicPlaylistIE,
+)
+from .r7 import (
+ R7IE,
+ R7ArticleIE,
+)
+from .radiko import RadikoIE, RadikoRadioIE
+from .radiocanada import (
+ RadioCanadaIE,
+ RadioCanadaAudioVideoIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import FranceCultureIE, RadioFranceIE
+from .radiozet import RadioZetPodcastIE
+from .radiokapital import (
+ RadioKapitalIE,
+ RadioKapitalShowIE,
+)
+from .radlive import (
+ RadLiveIE,
+ RadLiveChannelIE,
+ RadLiveSeasonIE,
+)
+from .rai import (
+ RaiPlayIE,
+ RaiPlayLiveIE,
+ RaiPlayPlaylistIE,
+ RaiPlaySoundIE,
+ RaiPlaySoundLiveIE,
+ RaiPlaySoundPlaylistIE,
+ RaiIE,
+)
+from .raywenderlich import (
+ RayWenderlichIE,
+ RayWenderlichCourseIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rcs import (
+ RCSIE,
+ RCSEmbedsIE,
+ RCSVariousIE,
+)
+from .rcti import (
+ RCTIPlusIE,
+ RCTIPlusSeriesIE,
+ RCTIPlusTVIE,
+)
+from .rds import RDSIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullEmbedIE,
+ RedBullTVRrnContentIE,
+ RedBullIE,
+)
+from .reddit import RedditIE
+from .redgifs import (
+ RedGifsIE,
+ RedGifsSearchIE,
+ RedGifsUserIE,
+)
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .rice import RICEIE
+from .rmcdecouverte import RMCDecouverteIE
+from .rockstargames import RockstarGamesIE
+from .rokfin import (
+ RokfinIE,
+ RokfinStackIE,
+ RokfinChannelIE,
+ RokfinSearchIE,
+)
+from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
+from .rottentomatoes import RottenTomatoesIE
+from .rozhlas import RozhlasIE
+from .rtbf import RTBFIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import RtlNlIE
+from .rtl2 import (
+ RTL2IE,
+ RTL2YouIE,
+ RTL2YouSeriesIE,
+)
+from .rtnews import (
+ RTNewsIE,
+ RTDocumentryIE,
+ RTDocumentryPlaylistIE,
+ RuptlyIE,
+)
+from .rtp import RTPIE
+from .rtrfm import RTRFMIE
+from .rts import RTSIE
+from .rtve import (
+ RTVEALaCartaIE,
+ RTVEAudioIE,
+ RTVELiveIE,
+ RTVEInfantilIE,
+ RTVETelevisionIE,
+)
+from .rtvnh import RTVNHIE
+from .rtvs import RTVSIE
+from .ruhd import RUHDIE
+from .rule34video import Rule34VideoIE
+from .rumble import (
+ RumbleEmbedIE,
+ RumbleChannelIE,
+)
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeEmbedIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+ RutubePlaylistIE,
+ RutubeTagsIE,
+)
+from .glomex import (
+ GlomexIE,
+ GlomexEmbedIE,
+)
+from .megatvcom import (
+ MegaTVComIE,
+ MegaTVComEmbedIE,
+)
+from .ant1newsgr import (
+ Ant1NewsGrWatchIE,
+ Ant1NewsGrArticleIE,
+ Ant1NewsGrEmbedIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .ruv import (
+ RuvIE,
+ RuvSpilaIE
+)
+from .safari import (
+ SafariIE,
+ SafariApiIE,
+ SafariCourseIE,
+)
+from .saitosan import SaitosanIE
+from .samplefocus import SampleFocusIE
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
+from .seeker import SeekerIE
+from .senategov import SenateISVPIE, SenateGovIE
+from .sendtonews import SendtoNewsIE
+from .servus import ServusIE
+from .sevenplus import SevenPlusIE
+from .sexu import SexuIE
+from .seznamzpravy import (
+ SeznamZpravyIE,
+ SeznamZpravyArticleIE,
+)
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
+from .shared import (
+ SharedIE,
+ VivoIE,
+)
+from .shemaroome import ShemarooMeIE
+from .showroomlive import ShowRoomLiveIE
+from .simplecast import (
+ SimplecastIE,
+ SimplecastEpisodeIE,
+ SimplecastPodcastIE,
+)
+from .sina import SinaIE
+from .sixplay import SixPlayIE
+from .skeb import SkebIE
+from .skyit import (
+ SkyItPlayerIE,
+ SkyItVideoIE,
+ SkyItVideoLiveIE,
+ SkyItIE,
+ SkyItAcademyIE,
+ SkyItArteIE,
+ CieloTVItIE,
+ TV8ItIE,
+)
+from .skylinewebcams import SkylineWebcamsIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
+from .skynewsau import SkyNewsAUIE
+from .sky import (
+ SkyNewsIE,
+ SkyNewsStoryIE,
+ SkySportsIE,
+ SkySportsNewsIE,
+)
+from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
+from .slutload import SlutloadIE
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .sonyliv import (
+ SonyLIVIE,
+ SonyLIVSeriesIE,
+)
+from .soundcloud import (
+ SoundcloudEmbedIE,
+ SoundcloudIE,
+ SoundcloudSetIE,
+ SoundcloudRelatedIE,
+ SoundcloudUserIE,
+ SoundcloudTrackStationIE,
+ SoundcloudPlaylistIE,
+ SoundcloudSearchIE,
+)
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
+from .southpark import (
+ SouthParkIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkLatIE,
+ SouthParkNlIE
+)
+from .sovietscloset import (
+ SovietsClosetIE,
+ SovietsClosetPlaylistIE
+)
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
+from .stitcher import (
+ StitcherIE,
+ StitcherShowIE,
+)
+from .sport5 import Sport5IE
+from .sportbox import SportBoxIE
+from .sportdeutschland import SportDeutschlandIE
+from .spotify import (
+ SpotifyIE,
+ SpotifyShowIE,
+)
+from .spreaker import (
+ SpreakerIE,
+ SpreakerPageIE,
+ SpreakerShowIE,
+ SpreakerShowPageIE,
+)
+from .springboardplatform import SpringboardPlatformIE
+from .sprout import SproutIE
+from .srgssr import (
+ SRGSSRIE,
+ SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .startv import StarTVIE
+from .steam import SteamIE
+from .storyfire import (
+ StoryFireIE,
+ StoryFireUserIE,
+ StoryFireSeriesIE,
+)
+from .streamable import StreamableIE
+from .streamanity import StreamanityIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streamff import StreamFFIE
+from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
+from .stripchat import StripchatIE
+from .stv import STVPlayerIE
+from .substack import SubstackIE
+from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
+from .svt import (
+ SVTIE,
+ SVTPageIE,
+ SVTPlayIE,
+ SVTSeriesIE,
+)
+from .swrmediathek import SWRMediathekIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import TagesschauIE
+from .tass import TassIE
+from .tbs import TBSIE
+from .tdslifeway import TDSLifewayIE
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
+from .techtalks import TechTalksIE
+from .ted import (
+ TedEmbedIE,
+ TedPlaylistIE,
+ TedSeriesIE,
+ TedTalkIE,
+)
+from .tele5 import Tele5IE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telegram import TelegramEmbedIE
+from .telemb import TeleMBIE
+from .telemundo import TelemundoIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecSquatIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+ TeleQuebecVideoIE,
+)
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .tennistv import TennisTVIE
+from .tenplay import TenPlayIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .tfo import TFOIE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+ ThePlatformIE,
+ ThePlatformFeedIE,
+)
+from .thestar import TheStarIE
+from .thesun import TheSunIE
+from .theta import (
+ ThetaVideoIE,
+ ThetaStreamIE,
+)
+from .theweatherchannel import TheWeatherChannelIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
+from .threespeak import (
+ ThreeSpeakIE,
+ ThreeSpeakUserIE,
+)
+from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+ TikTokSoundIE,
+ TikTokEffectIE,
+ TikTokTagIE,
+ TikTokVMIE,
+ DouyinIE,
+)
+from .tinypic import TinyPicIE
+from .tmz import TMZIE
+from .tnaflix import (
+ TNAFlixNetworkEmbedIE,
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
+from .toggle import (
+ ToggleIE,
+ MeWatchIE,
+)
+from .toggo import (
+ ToggoIE,
+)
+from .tokentube import (
+ TokentubeIE,
+ TokentubeChannelIE
+)
+from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
+from .trovo import (
+ TrovoIE,
+ TrovoVodIE,
+ TrovoChannelVodIE,
+ TrovoChannelClipIE,
+)
+from .trueid import TrueIDIE
+from .trunews import TruNewsIE
+from .trutv import TruTVIE
+from .tube8 import Tube8IE
+from .tubitv import (
+ TubiTvIE,
+ TubiTvShowIE,
+)
+from .tumblr import TumblrIE
+from .tunein import (
+ TuneInClipIE,
+ TuneInStationIE,
+ TuneInProgramIE,
+ TuneInTopicIE,
+ TuneInShortenerIE,
+)
+from .tunepk import TunePkIE
+from .turbo import TurboIE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+ KatsomoIE,
+ MTVUutisetArticleIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
+from .tv2hu import (
+ TV2HuIE,
+ TV2HuSeriesIE,
+)
+from .tv4 import TV4IE
+from .tv5mondeplus import TV5MondePlusIE
+from .tv5unis import (
+ TV5UnisVideoIE,
+ TV5UnisIE,
+)
+from .tva import (
+ TVAIE,
+ QubIE,
+)
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
+from .tver import TVerIE
+from .tvigle import TvigleIE
+from .tvland import TVLandIE
+from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
+from .tvnoe import TVNoeIE
+from .tvnow import (
+ TVNowIE,
+ TVNowFilmIE,
+ TVNowNewIE,
+ TVNowSeasonIE,
+ TVNowAnnualIE,
+ TVNowShowIE,
+)
+from .tvopengr import (
+ TVOpenGrWatchIE,
+ TVOpenGrEmbedIE,
+)
+from .tvp import (
+ TVPEmbedIE,
+ TVPIE,
+ TVPStreamIE,
+ TVPWebsiteIE,
+)
+from .tvplay import (
+ TVPlayIE,
+ ViafreeIE,
+ TVPlayHomeIE,
+)
+from .tvplayer import TVPlayerIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import (
+ TwitCastingIE,
+ TwitCastingLiveIE,
+ TwitCastingUserIE,
+)
+from .twitch import (
+ TwitchVodIE,
+ TwitchCollectionIE,
+ TwitchVideosIE,
+ TwitchVideosClipsIE,
+ TwitchVideosCollectionsIE,
+ TwitchStreamIE,
+ TwitchClipsIE,
+)
+from .twitter import (
+ TwitterCardIE,
+ TwitterIE,
+ TwitterAmplifyIE,
+ TwitterBroadcastIE,
+ TwitterShortenerIE,
+)
+from .udemy import (
+ UdemyIE,
+ UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
+from .ukcolumn import UkColumnIE
+from .uktvplay import UKTVPlayIE
+from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
+from .drooble import DroobleIE
+from .umg import UMGDeIE
+from .unistra import UnistraIE
+from .unity import UnityIE
+from .uol import UOLIE
+from .uplynk import (
+ UplynkIE,
+ UplynkPreplayIE,
+)
+from .urort import UrortIE
+from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+ UstudioIE,
+ UstudioEmbedIE,
+)
+from .utreon import UtreonIE
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veo import VeoIE
+from .veoh import VeohIE
+from .vesti import VestiIE
+from .vevo import (
+ VevoIE,
+ VevoPlaylistIE,
+)
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+ ViceIE,
+ ViceArticleIE,
+ ViceShowIE,
+)
+from .vidbit import VidbitIE
+from .viddler import ViddlerIE
+from .videa import VideaIE
+from .videocampus_sachsen import VideocampusSachsenIE
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomore import (
+ VideomoreIE,
+ VideomoreVideoIE,
+ VideomoreSeasonIE,
+)
+from .videopress import VideoPressIE
+from .vidio import (
+ VidioIE,
+ VidioPremierIE,
+ VidioLiveIE
+)
+from .vidlii import VidLiiIE
+from .vier import VierIE, VierVideosIE
+from .viewlift import (
+ ViewLiftIE,
+ ViewLiftEmbedIE,
+)
+from .viidea import ViideaIE
+from .vimeo import (
+ VimeoIE,
+ VimeoAlbumIE,
+ VimeoChannelIE,
+ VimeoGroupsIE,
+ VimeoLikesIE,
+ VimeoOndemandIE,
+ VimeoReviewIE,
+ VimeoUserIE,
+ VimeoWatchLaterIE,
+ VHXEmbedIE,
+)
+from .vimm import (
+ VimmIE,
+ VimmRecordingIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
+from .viqeo import ViqeoIE
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+)
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+ VKWallPostIE,
+)
+from .vlive import (
+ VLiveIE,
+ VLivePostIE,
+ VLiveChannelIE,
+)
+from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
+from .vodplatform import VODPlatformIE
+from .voicerepublic import VoiceRepublicIE
+from .voicy import (
+ VoicyIE,
+ VoicyChannelIE,
+)
+from .voot import (
+ VootIE,
+ VootSeriesIE,
+)
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
+from .vrt import VRTIE
+from .vrak import VrakIE
+from .vrv import (
+ VRVIE,
+ VRVSeriesIE,
+)
+from .vshare import VShareIE
+from .vtm import VTMIE
+from .medialaan import MedialaanIE
+from .vuclip import VuClipIE
+from .vupload import VuploadIE
+from .vvvvid import (
+ VVVVIDIE,
+ VVVVIDShowIE,
+)
+from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
+from .wakanim import WakanimIE
+from .walla import WallaIE
+from .washingtonpost import (
+ WashingtonPostIE,
+ WashingtonPostArticleIE,
+)
+from .wasdtv import (
+ WASDTVStreamIE,
+ WASDTVRecordIE,
+ WASDTVClipIE,
+)
+from .wat import WatIE
+from .watchbox import WatchBoxIE
+from .watchindianporn import WatchIndianPornIE
+from .wdr import (
+ WDRIE,
+ WDRPageIE,
+ WDRElefantIE,
+ WDRMobileIE,
+)
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
+from .weibo import (
+ WeiboIE,
+ WeiboMobileIE
+)
+from .weiqitv import WeiqiTVIE
+from .willow import WillowIE
+from .wimtv import WimTVIE
+from .whowatch import WhoWatchIE
+from .wistia import (
+ WistiaIE,
+ WistiaPlaylistIE,
+)
+from .worldstarhiphop import WorldStarHipHopIE
+from .wppilot import (
+ WPPilotIE,
+ WPPilotChannelsIE,
+)
+from .wsj import (
+ WSJIE,
+ WSJArticleIE,
+)
+from .wwe import WWEIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+ XHamsterUserIE,
+)
+from .xiami import (
+ XiamiSongIE,
+ XiamiAlbumIE,
+ XiamiArtistIE,
+ XiamiCollectionIE
+)
+from .ximalaya import (
+ XimalayaIE,
+ XimalayaAlbumIE
+)
+from .xinpianchang import XinpianchangIE
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+ YahooIE,
+ YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
+ YahooJapanNewsIE,
+)
+from .yandexdisk import YandexDiskIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+ YandexMusicArtistTracksIE,
+ YandexMusicArtistAlbumsIE,
+)
+from .yandexvideo import (
+ YandexVideoIE,
+ YandexVideoPreviewIE,
+ ZenYandexIE,
+ ZenYandexChannelIE,
+)
+from .yapfiles import YapFilesIE
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import (
+ YoukuIE,
+ YoukuShowIE,
+)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
+from .youporn import YouPornIE
+from .yourporn import YourPornIE
+from .yourupload import YourUploadIE
+from .youtube import (
+ YoutubeIE,
+ YoutubeClipIE,
+ YoutubeFavouritesIE,
+ YoutubeNotificationsIE,
+ YoutubeHistoryIE,
+ YoutubeTabIE,
+ YoutubeLivestreamEmbedIE,
+ YoutubePlaylistIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
+ YoutubeMusicSearchURLIE,
+ YoutubeSubscriptionsIE,
+ YoutubeStoriesIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeYtBeIE,
+ YoutubeYtUserIE,
+ YoutubeWatchLaterIE,
+)
+from .zapiks import ZapiksIE
+from .zattoo import (
+ BBVTVIE,
+ EinsUndEinsTVIE,
+ EWETVIE,
+ GlattvisionTVIE,
+ MNetTVIE,
+ NetPlusIE,
+ OsnatelTVIE,
+ QuantumTVIE,
+ SaltTVIE,
+ SAKTVIE,
+ VTXTVIE,
+ WalyTVIE,
+ ZattooIE,
+ ZattooLiveIE,
+ ZattooMoviesIE,
+ ZattooRecordingsIE,
+)
+from .zdf import ZDFIE, ZDFChannelIE
+from .zee5 import (
+ Zee5IE,
+ Zee5SeriesIE,
+)
+from .zhihu import ZhihuIE
+from .zingmp3 import (
+ ZingMp3IE,
+ ZingMp3AlbumIE,
+ ZingMp3ChartHomeIE,
+ ZingMp3WeekChartIE,
+ ZingMp3ChartMusicVideoIE,
+ ZingMp3UserIE,
+)
+from .zoom import ZoomIE
+from .zype import ZypeIE
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 1b9deeae8..a75efdd0f 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -7,16 +7,17 @@ import json
import re
import struct
import time
+import urllib.parse
+import urllib.request
import urllib.response
import uuid
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
-from ..compat import compat_urllib_parse_urlparse, compat_urllib_request
from ..utils import (
ExtractorError,
bytes_to_intlist,
- decode_base,
+ decode_base_n,
int_or_none,
intlist_to_bytes,
request_to_url,
@@ -33,7 +34,7 @@ def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
ydl._opener.add_handler(handler)
@@ -46,7 +47,7 @@ def remove_opener(ydl, handler):
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
- assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
@@ -96,7 +97,7 @@ def remove_opener(ydl, handler):
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
-class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
+class AbemaLicenseHandler(urllib.request.BaseHandler):
handler_order = 499
STRTABLE = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
HKEY = b'3AF0298C219469522A313570E8583005A642E73EDD58E3EA2FB7339D3DF1597E'
@@ -109,7 +110,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
self.ie = ie
def _get_videokey_from_ticket(self, ticket):
- to_show = self.ie._downloader.params.get('verbose', False)
+ to_show = self.ie.get_param('verbose', False)
media_token = self.ie._get_media_token(to_show=to_show)
license_response = self.ie._download_json(
@@ -123,7 +124,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
'Content-Type': 'application/json',
})
- res = decode_base(license_response['k'], self.STRTABLE)
+ res = decode_base_n(license_response['k'], table=self.STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff))
h = hmac.new(
@@ -136,7 +137,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler):
def abematv_license_open(self, url):
url = request_to_url(url)
- ticket = compat_urllib_parse_urlparse(url).netloc
+ ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data),
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index a8e6c4363..a2666c2b8 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1,3 +1,4 @@
+import getpass
import json
import re
import time
@@ -5,19 +6,15 @@ import urllib.error
import xml.etree.ElementTree as etree
from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_getpass
-)
+from ..compat import compat_urlparse
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
unescapeHTML,
- urlencode_postdata,
unified_timestamp,
- ExtractorError,
- NO_DEFAULT,
+ urlencode_postdata,
)
-
MSO_INFO = {
'DTV': {
'name': 'DIRECTV',
@@ -1431,7 +1428,7 @@ class AdobePassIE(InfoExtractor):
guid = xml_text(resource, 'guid') if '<' in resource else resource
count = 0
while count < 2:
- requestor_info = self._downloader.cache.load(self._MVPD_CACHE, requestor_id) or {}
+ requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
authn_token = requestor_info.get('authn_token')
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
authn_token = None
@@ -1506,7 +1503,7 @@ class AdobePassIE(InfoExtractor):
'send_confirm_link': False,
'send_token': True
}))
- philo_code = compat_getpass('Type auth code you have received [Return]: ')
+ philo_code = getpass.getpass('Type auth code you have received [Return]: ')
self._download_webpage(
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
'token': philo_code
@@ -1726,12 +1723,12 @@ class AdobePassIE(InfoExtractor):
raise_mvpd_required()
raise
if '<pendingLogout' in session:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
requestor_info['authn_token'] = authn_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
authz_token = requestor_info.get(guid)
if authz_token and is_expired(authz_token, 'simpleTokenTTL'):
@@ -1747,14 +1744,14 @@ class AdobePassIE(InfoExtractor):
'userMeta': '1',
}), headers=mvpd_headers)
if '<pendingLogout' in authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
if '<error' in authorize:
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
authz_token = unescapeHTML(xml_text(authorize, 'authzToken'))
requestor_info[guid] = authz_token
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
+ self.cache.store(self._MVPD_CACHE, requestor_id, requestor_info)
mvpd_headers.update({
'ap_19': xml_text(authn_token, 'simpleSamlNameID'),
@@ -1770,7 +1767,7 @@ class AdobePassIE(InfoExtractor):
'hashed_guid': 'false',
}), headers=mvpd_headers)
if '<pendingLogout' in short_authorize:
- self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {})
+ self.cache.store(self._MVPD_CACHE, requestor_id, {})
count += 1
continue
return short_authorize
diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py
deleted file mode 100644
index fe2b70aed..000000000
--- a/yt_dlp/extractor/animelab.py
+++ /dev/null
@@ -1,270 +0,0 @@
-from .common import InfoExtractor
-
-from ..utils import (
- ExtractorError,
- urlencode_postdata,
- int_or_none,
- str_or_none,
- determine_ext,
-)
-
-from ..compat import compat_HTTPError
-
-
-class AnimeLabBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://www.animelab.com/login'
- _NETRC_MACHINE = 'animelab'
- _LOGGED_IN = False
-
- def _is_logged_in(self, login_page=None):
- if not self._LOGGED_IN:
- if not login_page:
- login_page = self._download_webpage(self._LOGIN_URL, None, 'Downloading login page')
- AnimeLabBaseIE._LOGGED_IN = 'Sign In' not in login_page
- return self._LOGGED_IN
-
- def _perform_login(self, username, password):
- if self._is_logged_in():
- return
-
- login_form = {
- 'email': username,
- 'password': password,
- }
-
- try:
- response = self._download_webpage(
- self._LOGIN_URL, None, 'Logging in', 'Wrong login info',
- data=urlencode_postdata(login_form),
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- raise ExtractorError('Unable to log in (wrong credentials?)', expected=True)
- raise
-
- if not self._is_logged_in(response):
- raise ExtractorError('Unable to login (cannot verify if logged in)')
-
- def _real_initialize(self):
- if not self._is_logged_in():
- self.raise_login_required('Login is required to access any AnimeLab content')
-
-
-class AnimeLabIE(AnimeLabBaseIE):
- _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42',
- 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f',
- 'info_dict': {
- 'id': '383',
- 'ext': 'mp4',
- 'display_id': 'fullmetal-alchemist-brotherhood-episode-42',
- 'title': 'Fullmetal Alchemist: Brotherhood - Episode 42 - Signs of a Counteroffensive',
- 'description': 'md5:103eb61dd0a56d3dfc5dbf748e5e83f4',
- 'series': 'Fullmetal Alchemist: Brotherhood',
- 'episode': 'Signs of a Counteroffensive',
- 'episode_number': 42,
- 'duration': 1469,
- 'season': 'Season 1',
- 'season_number': 1,
- 'season_id': '38',
- },
- 'params': {
- # Ensure the same video is downloaded whether the user is premium or not
- 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]',
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- # unfortunately we can get different URLs for the same formats
- # e.g. if we are using a "free" account so no dubs available
- # (so _remove_duplicate_formats is not effective)
- # so we use a dictionary as a workaround
- formats = {}
- for language_option_url in ('https://www.animelab.com/player/%s/subtitles',
- 'https://www.animelab.com/player/%s/dubbed'):
- actual_url = language_option_url % display_id
- webpage = self._download_webpage(actual_url, display_id, 'Downloading URL ' + actual_url)
-
- video_collection = self._parse_json(self._search_regex(r'new\s+?AnimeLabApp\.VideoCollection\s*?\((.*?)\);', webpage, 'AnimeLab VideoCollection'), display_id)
- position = int_or_none(self._search_regex(r'playlistPosition\s*?=\s*?(\d+)', webpage, 'Playlist Position'))
-
- raw_data = video_collection[position]['videoEntry']
-
- video_id = str_or_none(raw_data['id'])
-
- # create a title from many sources (while grabbing other info)
- # TODO use more fallback sources to get some of these
- series = raw_data.get('showTitle')
- video_type = raw_data.get('videoEntryType', {}).get('name')
- episode_number = raw_data.get('episodeNumber')
- episode_name = raw_data.get('name')
-
- title_parts = (series, video_type, episode_number, episode_name)
- if None not in title_parts:
- title = '%s - %s %s - %s' % title_parts
- else:
- title = episode_name
-
- description = raw_data.get('synopsis') or self._og_search_description(webpage, default=None)
-
- duration = int_or_none(raw_data.get('duration'))
-
- thumbnail_data = raw_data.get('images', [])
- thumbnails = []
- for thumbnail in thumbnail_data:
- for instance in thumbnail['imageInstances']:
- image_data = instance.get('imageInfo', {})
- thumbnails.append({
- 'id': str_or_none(image_data.get('id')),
- 'url': image_data.get('fullPath'),
- 'width': image_data.get('width'),
- 'height': image_data.get('height'),
- })
-
- season_data = raw_data.get('season', {}) or {}
- season = str_or_none(season_data.get('name'))
- season_number = int_or_none(season_data.get('seasonNumber'))
- season_id = str_or_none(season_data.get('id'))
-
- for video_data in raw_data['videoList']:
- current_video_list = {}
- current_video_list['language'] = video_data.get('language', {}).get('languageCode')
-
- is_hardsubbed = video_data.get('hardSubbed')
-
- for video_instance in video_data['videoInstances']:
- httpurl = video_instance.get('httpUrl')
- url = httpurl if httpurl else video_instance.get('rtmpUrl')
- if url is None:
- # this video format is unavailable to the user (not premium etc.)
- continue
-
- current_format = current_video_list.copy()
-
- format_id_parts = []
-
- format_id_parts.append(str_or_none(video_instance.get('id')))
-
- if is_hardsubbed is not None:
- if is_hardsubbed:
- format_id_parts.append('yeshardsubbed')
- else:
- format_id_parts.append('nothardsubbed')
-
- format_id_parts.append(current_format['language'])
-
- format_id = '_'.join([x for x in format_id_parts if x is not None])
-
- ext = determine_ext(url)
- if ext == 'm3u8':
- for format_ in self._extract_m3u8_formats(
- url, video_id, m3u8_id=format_id, fatal=False):
- formats[format_['format_id']] = format_
- continue
- elif ext == 'mpd':
- for format_ in self._extract_mpd_formats(
- url, video_id, mpd_id=format_id, fatal=False):
- formats[format_['format_id']] = format_
- continue
-
- current_format['url'] = url
- quality_data = video_instance.get('videoQuality')
- if quality_data:
- quality = quality_data.get('name') or quality_data.get('description')
- else:
- quality = None
-
- height = None
- if quality:
- height = int_or_none(self._search_regex(r'(\d+)p?$', quality, 'Video format height', default=None))
-
- if height is None:
- self.report_warning('Could not get height of video')
- else:
- current_format['height'] = height
- current_format['format_id'] = format_id
-
- formats[current_format['format_id']] = current_format
-
- formats = list(formats.values())
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'series': series,
- 'episode': episode_name,
- 'episode_number': int_or_none(episode_number),
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'formats': formats,
- 'season': season,
- 'season_number': season_number,
- 'season_id': season_id,
- }
-
-
-class AnimeLabShowsIE(AnimeLabBaseIE):
- _VALID_URL = r'https?://(?:www\.)?animelab\.com/shows/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'https://www.animelab.com/shows/attack-on-titan',
- 'info_dict': {
- 'id': '45',
- 'title': 'Attack on Titan',
- 'description': 'md5:989d95a2677e9309368d5cf39ba91469',
- },
- 'playlist_count': 59,
- 'skip': 'All AnimeLab content requires authentication',
- }
-
- def _real_extract(self, url):
- _BASE_URL = 'http://www.animelab.com'
- _SHOWS_API_URL = '/api/videoentries/show/videos/'
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id, 'Downloading requested URL')
-
- show_data_str = self._search_regex(r'({"id":.*}),\svideoEntry', webpage, 'AnimeLab show data')
- show_data = self._parse_json(show_data_str, display_id)
-
- show_id = str_or_none(show_data.get('id'))
- title = show_data.get('name')
- description = show_data.get('shortSynopsis') or show_data.get('longSynopsis')
-
- entries = []
- for season in show_data['seasons']:
- season_id = season['id']
- get_data = urlencode_postdata({
- 'seasonId': season_id,
- 'limit': 1000,
- })
- # despite using urlencode_postdata, we are sending a GET request
- target_url = _BASE_URL + _SHOWS_API_URL + show_id + "?" + get_data.decode('utf-8')
- response = self._download_webpage(
- target_url,
- None, 'Season id %s' % season_id)
-
- season_data = self._parse_json(response, display_id)
-
- for video_data in season_data['list']:
- entries.append(self.url_result(
- _BASE_URL + '/player/' + video_data['slug'], 'AnimeLab',
- str_or_none(video_data.get('id')), video_data.get('name')
- ))
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': title,
- 'description': description,
- 'entries': entries,
- }
-
-# TODO implement myqueue
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index c85d5297d..1ca6ddc4d 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -1,36 +1,34 @@
-import re
import json
+import re
+import urllib.parse
+
from .common import InfoExtractor
-from .youtube import YoutubeIE, YoutubeBaseInfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_HTTPError
-)
+from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
+from ..compat import compat_HTTPError, compat_urllib_parse_unquote
from ..utils import (
+ KNOWN_EXTENSIONS,
+ ExtractorError,
+ HEADRequest,
bug_reports_message,
clean_html,
dict_get,
extract_attributes,
- ExtractorError,
get_element_by_id,
- HEADRequest,
int_or_none,
join_nonempty,
- KNOWN_EXTENSIONS,
merge_dicts,
mimetype2ext,
orderedSet,
parse_duration,
parse_qs,
- str_to_int,
str_or_none,
+ str_to_int,
traverse_obj,
try_get,
unified_strdate,
unified_timestamp,
+ url_or_none,
urlhandle_detect_ext,
- url_or_none
)
@@ -143,7 +141,7 @@ class ArchiveOrgIE(InfoExtractor):
return json.loads(extract_attributes(element)['value'])
def _real_extract(self, url):
- video_id = compat_urllib_parse_unquote_plus(self._match_id(url))
+ video_id = urllib.parse.unquote_plus(self._match_id(url))
identifier, entry_id = (video_id.split('/', 1) + [None])[:2]
# Archive.org metadata API doesn't clearly demarcate playlist entries
@@ -442,9 +440,10 @@ class YoutubeWebArchiveIE(InfoExtractor):
'only_matching': True
},
]
- _YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
- _YT_INITIAL_BOUNDARY_RE = r'(?:(?:var\s+meta|</script|\n)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_BOUNDARY_RE
+ _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
+ _YT_INITIAL_PLAYER_RESPONSE_RE = fr'''(?x)
+ (?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*|
+ {YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE}'''
_YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers
_YT_ALL_THUMB_SERVERS = orderedSet(
@@ -474,11 +473,6 @@ class YoutubeWebArchiveIE(InfoExtractor):
elif not isinstance(res, list) or len(res) != 0:
self.report_warning('Error while parsing CDX API response' + bug_reports_message())
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- regex), webpage, name, default='{}'), video_id, fatal=False)
-
def _extract_webpage_title(self, webpage):
page_title = self._html_extract_title(webpage, default='')
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
@@ -488,10 +482,11 @@ class YoutubeWebArchiveIE(InfoExtractor):
def _extract_metadata(self, video_id, webpage):
search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
- player_response = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id, 'initial player response') or {}
+ player_response = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
+ video_id, default={})
+ initial_data = self._search_json(
+ self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
initial_data_video = traverse_obj(
initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py
index 96b134fa0..c80ce2233 100644
--- a/yt_dlp/extractor/arnes.py
+++ b/yt_dlp/extractor/arnes.py
@@ -90,7 +90,7 @@ class ArnesIE(InfoExtractor):
'timestamp': parse_iso8601(video.get('creationTime')),
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template=f'{self._BASE_URL}/?channel=%s'),
+ 'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
'duration': float_or_none(video.get('duration'), 1000),
'view_count': int_or_none(video.get('views')),
'tags': video.get('hashtags'),
diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py
new file mode 100644
index 000000000..3f7b1e9f8
--- /dev/null
+++ b/yt_dlp/extractor/atscaleconf.py
@@ -0,0 +1,34 @@
+import re
+
+from .common import InfoExtractor
+
+
+class AtScaleConfEventIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atscaleconference\.com/events/(?P<id>[^/&$?]+)'
+
+ _TESTS = [{
+ 'url': 'https://atscaleconference.com/events/data-scale-spring-2022/',
+ 'playlist_mincount': 13,
+ 'info_dict': {
+ 'id': 'data-scale-spring-2022',
+ 'title': 'Data @Scale Spring 2022',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }, {
+ 'url': 'https://atscaleconference.com/events/video-scale-2021/',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'video-scale-2021',
+ 'title': 'Video @Scale 2021',
+ 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55'
+ },
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+
+ return self.playlist_from_matches(
+ re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage),
+ ie='Generic', playlist_id=id,
+ title=self._og_search_title(webpage), description=self._og_search_description(webpage))
diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py
index 189d1224f..0105d9db8 100644
--- a/yt_dlp/extractor/audius.py
+++ b/yt_dlp/extractor/audius.py
@@ -1,8 +1,8 @@
import random
from .common import InfoExtractor
-from ..utils import ExtractorError, try_get, compat_str, str_or_none
-from ..compat import compat_urllib_parse_unquote
+from ..compat import compat_str, compat_urllib_parse_unquote
+from ..utils import ExtractorError, str_or_none, try_get
class AudiusBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py
index d289f6be3..6fc938de9 100644
--- a/yt_dlp/extractor/awaan.py
+++ b/yt_dlp/extractor/awaan.py
@@ -41,7 +41,7 @@ class AWAANBaseIE(InfoExtractor):
'id': video_id,
'title': title,
'description': video_data.get('description_en') or video_data.get('description_ar'),
- 'thumbnail': format_field(img, template='http://admin.mangomolo.com/analytics/%s'),
+ 'thumbnail': format_field(img, None, 'http://admin.mangomolo.com/analytics/%s'),
'duration': int_or_none(video_data.get('duration')),
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
'is_live': is_live,
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 9cb019a49..5ddeef7b5 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -1,16 +1,12 @@
-import xml.etree.ElementTree
import functools
import itertools
import json
import re
+import urllib.error
+import xml.etree.ElementTree
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_error,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..utils import (
ExtractorError,
OnDemandPagedList,
@@ -391,7 +387,7 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
except ExtractorError as e:
- if not (isinstance(e.exc_info[1], compat_urllib_error.HTTPError)
+ if not (isinstance(e.exc_info[1], urllib.error.HTTPError)
and e.exc_info[1].code in (403, 404)):
raise
fmts = []
diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py
index 8f9849d9b..5ae4b917a 100644
--- a/yt_dlp/extractor/bellmedia.py
+++ b/yt_dlp/extractor/bellmedia.py
@@ -24,7 +24,7 @@ class BellMediaIE(InfoExtractor):
)/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
_TESTS = [{
'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
- 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
+ 'md5': '3e5b8e38370741d5089da79161646635',
'info_dict': {
'id': '1403070',
'ext': 'flv',
@@ -32,6 +32,14 @@ class BellMediaIE(InfoExtractor):
'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
'upload_date': '20180525',
'timestamp': 1527288600,
+ 'season_id': 73997,
+ 'season': '2018',
+ 'thumbnail': 'http://images2.9c9media.com/image_asset/2018_5_25_baf30cbd-b28d-4a18-9903-4bb8713b00f5_PNG_956x536.jpg',
+ 'tags': [],
+ 'categories': ['ETFs'],
+ 'season_number': 8,
+ 'duration': 272.038,
+ 'series': 'Market Call Tonight',
},
}, {
'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index ead0dd88b..d695d9b49 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -677,6 +677,11 @@ class BilibiliAudioIE(BilibiliAudioBaseIE):
'vcodec': 'none'
}]
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
song = self._call_api('song/info', au_id)
title = song['title']
statistic = song.get('statistic') or {}
@@ -784,7 +789,8 @@ class BiliIntlBaseIE(InfoExtractor):
def json2srt(self, json):
data = '\n\n'.join(
f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
- for i, line in enumerate(json['body']) if line.get('content'))
+ for i, line in enumerate(traverse_obj(json, (
+ 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
return data
def _get_subtitles(self, *, ep_id=None, aid=None):
@@ -947,12 +953,11 @@ class BiliIntlIE(BiliIntlBaseIE):
video_id = ep_id or aid
webpage = self._download_webpage(url, video_id)
# Bstation layout
- initial_data = self._parse_json(self._search_regex(
- r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), video_id, fatal=False) or {}
- video_data = (
- traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
- or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {})
+ initial_data = (
+ self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
+ or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
+ video_data = traverse_obj(
+ initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict)
if season_id and not video_data:
# Non-Bstation layout, read through episode list
@@ -960,7 +965,7 @@ class BiliIntlIE(BiliIntlBaseIE):
video_data = traverse_obj(season_json,
('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id),
expected_type=dict, get_all=False)
- return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
+ return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid)
class BiliIntlSeriesIE(BiliIntlBaseIE):
diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py
index c0aaeae02..c842c342c 100644
--- a/yt_dlp/extractor/bloomberg.py
+++ b/yt_dlp/extractor/bloomberg.py
@@ -7,13 +7,11 @@ class BloombergIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
_TESTS = [{
- 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2',
- # The md5 checksum changes
+ 'url': 'https://www.bloomberg.com/news/videos/2021-09-14/apple-unveils-the-new-iphone-13-stock-doesn-t-move-much-video',
'info_dict': {
- 'id': 'qurhIVlJSB6hzkVi229d8g',
+ 'id': 'V8cFcYMxTHaMcEiiYVr39A',
'ext': 'flv',
- 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies',
- 'description': 'md5:a8ba0302912d03d246979735c17d2761',
+ 'title': 'Apple Unveils the New IPhone 13, Stock Doesn\'t Move Much',
},
'params': {
'format': 'best[format_id^=hds]',
@@ -57,7 +55,7 @@ class BloombergIE(InfoExtractor):
title = re.sub(': Video$', '', self._og_search_title(webpage))
embed_info = self._download_json(
- 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id)
+ 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id)
formats = []
for stream in embed_info['streams']:
stream_url = stream.get('url')
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index 936c34e15..a5412897d 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -600,9 +600,9 @@ class BrightcoveNewIE(AdobePassIE):
account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
policy_key_id = '%s_%s' % (account_id, player_id)
- policy_key = self._downloader.cache.load('brightcove', policy_key_id)
+ policy_key = self.cache.load('brightcove', policy_key_id)
policy_key_extracted = False
- store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
+ store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
def extract_policy_key():
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index cac3f1e9d..999b7bc53 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -304,13 +304,13 @@ class CBCGemIE(InfoExtractor):
def _get_claims_token(self, email, password):
if not self.claims_token_valid():
self._claims_token = self._new_claims_token(email, password)
- self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
+ self.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
return self._claims_token
def _real_initialize(self):
if self.claims_token_valid():
return
- self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token')
+ self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py
index b11e1f74e..1bc0f07f2 100644
--- a/yt_dlp/extractor/ccc.py
+++ b/yt_dlp/extractor/ccc.py
@@ -75,6 +75,7 @@ class CCCIE(InfoExtractor):
'thumbnail': event_data.get('thumb_url'),
'timestamp': parse_iso8601(event_data.get('date')),
'duration': int_or_none(event_data.get('length')),
+ 'view_count': int_or_none(event_data.get('view_count')),
'tags': event_data.get('tags'),
'formats': formats,
}
diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py
index 9b257bee9..6d01c60d5 100644
--- a/yt_dlp/extractor/cda.py
+++ b/yt_dlp/extractor/cda.py
@@ -1,13 +1,9 @@
import codecs
-import re
import json
+import re
from .common import InfoExtractor
-from ..compat import (
- compat_chr,
- compat_ord,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_ord, compat_urllib_parse_unquote
from ..utils import (
ExtractorError,
float_or_none,
@@ -16,8 +12,8 @@ from ..utils import (
multipart_encode,
parse_duration,
random_birthday,
- urljoin,
try_get,
+ urljoin,
)
@@ -144,7 +140,7 @@ class CDAIE(InfoExtractor):
b = []
for c in a:
f = compat_ord(c)
- b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f <= 126 else compat_chr(f))
+ b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py
index 7e8c0bfc9..e54d92a86 100644
--- a/yt_dlp/extractor/chingari.py
+++ b/yt_dlp/extractor/chingari.py
@@ -1,11 +1,11 @@
import itertools
import json
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
str_to_int,
url_or_none,
@@ -47,8 +47,8 @@ class ChingariBaseIE(InfoExtractor):
'id': id,
'extractor_key': ChingariIE.ie_key(),
'extractor': 'Chingari',
- 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
- 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))),
+ 'title': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
+ 'description': urllib.parse.unquote_plus(clean_html(post_data.get('caption'))),
'duration': media_data.get('duration'),
'thumbnail': url_or_none(thumbnail),
'like_count': post_data.get('likeCount'),
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index ebeca4395..4fbcfe203 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1,6 +1,10 @@
import base64
import collections
+import getpass
import hashlib
+import http.client
+import http.cookiejar
+import http.cookies
import itertools
import json
import math
@@ -9,24 +13,12 @@ import os
import random
import sys
import time
+import urllib.parse
+import urllib.request
import xml.etree.ElementTree
-from ..compat import (
- compat_cookiejar_Cookie,
- compat_cookies_SimpleCookie,
- compat_etree_fromstring,
- compat_expanduser,
- compat_getpass,
- compat_http_client,
- compat_os_name,
- compat_str,
- compat_urllib_error,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
- re,
-)
+from ..compat import functools, re # isort: split
+from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
from ..downloader import FileDownloader
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
@@ -35,6 +27,7 @@ from ..utils import (
ExtractorError,
GeoRestrictedError,
GeoUtils,
+ LenientJSONDecoder,
RegexNotFoundError,
UnsupportedError,
age_restricted,
@@ -384,6 +377,11 @@ class InfoExtractor:
release_year: Year (YYYY) when the album was released.
composer: Composer of the piece
+ The following fields should only be set for clips that should be cut from the original video:
+
+ section_start: Start time of the section in seconds
+ section_end: End time of the section in seconds
+
Unless mentioned otherwise, the fields should be Unicode strings.
Unless mentioned otherwise, None is equivalent to absence of information.
@@ -610,8 +608,7 @@ class InfoExtractor:
if ip_block:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- self._downloader.write_debug(
- '[debug] Using fake IP %s as X-Forwarded-For' % self._x_forwarded_for_ip)
+ self.write_debug(f'Using fake IP {self._x_forwarded_for_ip} as X-Forwarded-For')
return
# Path 2: bypassing based on country code
@@ -666,7 +663,7 @@ class InfoExtractor:
if hasattr(e, 'countries'):
kwargs['countries'] = e.countries
raise type(e)(e.orig_msg, **kwargs)
- except compat_http_client.IncompleteRead as e:
+ except http.client.IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@@ -690,6 +687,14 @@ class InfoExtractor:
"""Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
+ @property
+ def cache(self):
+ return self._downloader.cache
+
+ @property
+ def cookiejar(self):
+ return self._downloader.cookiejar
+
def _initialize_pre_login(self):
""" Intialization before login. Redefine in subclasses."""
pass
@@ -717,7 +722,7 @@ class InfoExtractor:
@staticmethod
def __can_accept_status_code(err, expected_status):
- assert isinstance(err, compat_urllib_error.HTTPError)
+ assert isinstance(err, urllib.error.HTTPError)
if expected_status is None:
return False
elif callable(expected_status):
@@ -725,7 +730,14 @@ class InfoExtractor:
else:
return err.code in variadic(expected_status)
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+ def _create_request(self, url_or_request, data=None, headers=None, query=None):
+ if isinstance(url_or_request, urllib.request.Request):
+ return update_Request(url_or_request, data=data, headers=headers, query=query)
+ if query:
+ url_or_request = update_url_query(url_or_request, query)
+ return sanitized_Request(url_or_request, data, headers or {})
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
"""
Return the response handle.
@@ -753,21 +765,13 @@ class InfoExtractor:
# geo unrestricted country. We will do so once we encounter any
# geo restriction error.
if self._x_forwarded_for_ip:
- if 'X-Forwarded-For' not in headers:
- headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+ headers = (headers or {}).copy()
+ headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
- if isinstance(url_or_request, compat_urllib_request.Request):
- url_or_request = update_Request(
- url_or_request, data=data, headers=headers, query=query)
- else:
- if query:
- url_or_request = update_url_query(url_or_request, query)
- if data is not None or headers:
- url_or_request = sanitized_Request(url_or_request, data, headers)
try:
- return self._downloader.urlopen(url_or_request)
+ return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
except network_exceptions as err:
- if isinstance(err, compat_urllib_error.HTTPError):
+ if isinstance(err, urllib.error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
# being closed before it can be read. Works around the
@@ -788,14 +792,42 @@ class InfoExtractor:
self.report_warning(errmsg)
return False
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
+ encoding=None, data=None, headers={}, query={}, expected_status=None):
"""
Return a tuple (page content as string, URL handle).
- See _download_webpage docstring for arguments specification.
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a urllib.request.Request object
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
"""
+
# Strip hashes from the URL (#1038)
- if isinstance(url_or_request, (compat_str, str)):
+ if isinstance(url_or_request, str):
url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
@@ -850,140 +882,48 @@ class InfoExtractor:
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
expected=True)
+ def _request_dump_filename(self, url, video_id):
+ basen = f'{video_id}_{url}'
+ trim_length = self.get_param('trim_file_name') or 240
+ if len(basen) > trim_length:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:trim_length - len(h)] + h
+ filename = sanitize_filename(f'{basen}.dump', restricted=True)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = fR'\\?\{absfilepath}'
+ return filename
+
+ def __decode_webpage(self, webpage_bytes, encoding, headers):
+ if not encoding:
+ encoding = self._guess_encoding_from_content(headers.get('Content-Type', ''), webpage_bytes)
+ try:
+ return webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ return webpage_bytes.decode('utf-8', 'replace')
+
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
- content_type = urlh.headers.get('Content-Type', '')
webpage_bytes = urlh.read()
if prefix is not None:
webpage_bytes = prefix + webpage_bytes
- if not encoding:
- encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl())
dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump)
- if self.get_param('write_pages', False):
- basen = f'{video_id}_{urlh.geturl()}'
- trim_length = self.get_param('trim_file_name') or 240
- if len(basen) > trim_length:
- h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
- basen = basen[:trim_length - len(h)] + h
- raw_filename = basen + '.dump'
- filename = sanitize_filename(raw_filename, restricted=True)
- self.to_screen('Saving request to ' + filename)
- # Working around MAX_PATH limitation on Windows (see
- # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
- if compat_os_name == 'nt':
- absfilepath = os.path.abspath(filename)
- if len(absfilepath) > 259:
- filename = '\\\\?\\' + absfilepath
+ if self.get_param('write_pages'):
+ filename = self._request_dump_filename(urlh.geturl(), video_id)
+ self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf:
outf.write(webpage_bytes)
- try:
- content = webpage_bytes.decode(encoding, 'replace')
- except LookupError:
- content = webpage_bytes.decode('utf-8', 'replace')
-
+ content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
self.__check_blocked(content)
return content
- def _download_webpage(
- self, url_or_request, video_id, note=None, errnote=None,
- fatal=True, tries=1, timeout=5, encoding=None, data=None,
- headers={}, query={}, expected_status=None):
- """
- Return the data of the page as a string.
-
- Arguments:
- url_or_request -- plain text URL as a string or
- a compat_urllib_request.Requestobject
- video_id -- Video/playlist/item identifier (string)
-
- Keyword arguments:
- note -- note printed before downloading (string)
- errnote -- note printed in case of an error (string)
- fatal -- flag denoting whether error should be considered fatal,
- i.e. whether it should cause ExtractionError to be raised,
- otherwise a warning will be reported and extraction continued
- tries -- number of tries
- timeout -- sleep interval between tries
- encoding -- encoding for a page content decoding, guessed automatically
- when not explicitly specified
- data -- POST data (bytes)
- headers -- HTTP headers (dict)
- query -- URL query (dict)
- expected_status -- allows to accept failed HTTP requests (non 2xx
- status code) by explicitly specifying a set of accepted status
- codes. Can be any of the following entities:
- - an integer type specifying an exact failed status code to
- accept
- - a list or a tuple of integer types specifying a list of
- failed status codes to accept
- - a callable accepting an actual failed status code and
- returning True if it should be accepted
- Note that this argument does not affect success status codes (2xx)
- which are always accepted.
- """
-
- success = False
- try_count = 0
- while success is False:
- try:
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- success = True
- except compat_http_client.IncompleteRead as e:
- try_count += 1
- if try_count >= tries:
- raise e
- self._sleep(timeout, video_id)
- if res is False:
- return res
- else:
- content, _ = res
- return content
-
- def _download_xml_handle(
- self, url_or_request, video_id, note='Downloading XML',
- errnote='Unable to download XML', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- xml_string, urlh = res
- return self._parse_xml(
- xml_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_xml(
- self, url_or_request, video_id,
- note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None,
- data=None, headers={}, query={}, expected_status=None):
- """
- Return the xml as an xml.etree.ElementTree.Element.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_xml_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
if transform_source:
xml_string = transform_source(xml_string)
@@ -996,101 +936,126 @@ class InfoExtractor:
else:
self.report_warning(errmsg + str(ve))
- def _download_json_handle(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- json_string, urlh = res
- return self._parse_json(
- json_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_json(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return the JSON object as a dict.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- json_string = transform_source(json_string)
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, **parser_kwargs):
try:
- return json.loads(json_string, strict=False)
+ return json.loads(
+ json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs)
except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % video_id
+ errmsg = f'{video_id}: Failed to parse JSON'
if fatal:
raise ExtractorError(errmsg, cause=ve)
else:
- self.report_warning(errmsg + str(ve))
+ self.report_warning(f'{errmsg}: {ve}')
def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True):
return self._parse_json(
data[data.find('{'):data.rfind('}') + 1],
video_id, transform_source, fatal)
- def _download_socket_json_handle(
- self, url_or_request, video_id, note='Polling socket',
- errnote='Unable to poll socket', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
+ def __create_download_methods(name, parser, note, errnote, return_value):
+
+ def parse(ie, content, *args, **kwargs):
+ if parser is None:
+ return content
+ # parser is fetched by name so subclasses can override it
+ return getattr(ie, parser)(content, *args, **kwargs)
+
+ def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query, expected_status=expected_status)
+ if res is False:
+ return res
+ content, urlh = res
+ return parse(self, content, video_id, transform_source=transform_source, fatal=fatal), urlh
+
+ def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ if self.get_param('load_pages'):
+ url_or_request = self._create_request(url_or_request, data, headers, query)
+ filename = self._request_dump_filename(url_or_request.full_url, video_id)
+ self.to_screen(f'Loading request from {filename}')
+ try:
+ with open(filename, 'rb') as dumpf:
+ webpage_bytes = dumpf.read()
+ except OSError as e:
+ self.report_warning(f'Unable to load request from disk: {e}')
+ else:
+ content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers)
+ return parse(self, content, video_id, transform_source, fatal)
+ kwargs = {
+ 'note': note,
+ 'errnote': errnote,
+ 'transform_source': transform_source,
+ 'fatal': fatal,
+ 'encoding': encoding,
+ 'data': data,
+ 'headers': headers,
+ 'query': query,
+ 'expected_status': expected_status,
+ }
+ if parser is None:
+ kwargs.pop('transform_source')
+ # The method is fetched by name so subclasses can override _download_..._handle
+ res = getattr(self, download_handle.__name__)(url_or_request, video_id, **kwargs)
+ return res if res is False else res[0]
+
+ def impersonate(func, name, return_value):
+ func.__name__, func.__qualname__ = name, f'InfoExtractor.{name}'
+ func.__doc__ = f'''
+ @param transform_source Apply this transformation before parsing
+ @returns {return_value}
+
+ See _download_webpage_handle docstring for other arguments specification
+ '''
+
+ impersonate(download_handle, f'_download_{name}_handle', f'({return_value}, URL handle)')
+ impersonate(download_content, f'_download_{name}', f'{return_value}')
+ return download_handle, download_content
+
+ _download_xml_handle, _download_xml = __create_download_methods(
+ 'xml', '_parse_xml', 'Downloading XML', 'Unable to download XML', 'xml as an xml.etree.ElementTree.Element')
+ _download_json_handle, _download_json = __create_download_methods(
+ 'json', '_parse_json', 'Downloading JSON metadata', 'Unable to download JSON metadata', 'JSON object as a dict')
+ _download_socket_json_handle, _download_socket_json = __create_download_methods(
+ 'socket_json', '_parse_socket_response_as_json', 'Polling socket', 'Unable to poll socket', 'JSON object as a dict')
+ __download_webpage = __create_download_methods('webpage', None, None, None, 'data of the page as a string')[1]
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- webpage, urlh = res
- return self._parse_socket_response_as_json(
- webpage, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_socket_json(
- self, url_or_request, video_id, note='Polling socket',
- errnote='Unable to poll socket', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
+ def _download_webpage(
+ self, url_or_request, video_id, note=None, errnote=None,
+ fatal=True, tries=1, timeout=NO_DEFAULT, *args, **kwargs):
"""
- Return the JSON object as a dict.
+ Return the data of the page as a string.
- See _download_webpage docstring for arguments specification.
+ Keyword arguments:
+ tries -- number of tries
+ timeout -- sleep interval between tries
+
+ See _download_webpage_handle docstring for other arguments specification.
"""
- res = self._download_socket_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
+
+ R''' # NB: These are unused; should they be deprecated?
+ if tries != 1:
+ self._downloader.deprecation_warning('tries argument is deprecated in InfoExtractor._download_webpage')
+ if timeout is NO_DEFAULT:
+ timeout = 5
+ else:
+ self._downloader.deprecation_warning('timeout argument is deprecated in InfoExtractor._download_webpage')
+ '''
+
+ try_count = 0
+ while True:
+ try:
+ return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
+ except http.client.IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
def report_warning(self, msg, video_id=None, *args, only_once=False, **kwargs):
- idstr = format_field(video_id, template='%s: ')
+ idstr = format_field(video_id, None, '%s: ')
msg = f'[{self.IE_NAME}] {idstr}{msg}'
if only_once:
if f'WARNING: {msg}' in self._printed_messages:
@@ -1136,7 +1101,7 @@ class InfoExtractor:
self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
return
- msg += format_field(self._login_hint(method), template='. %s')
+ msg += format_field(self._login_hint(method), None, '. %s')
raise ExtractorError(msg, expected=True)
def raise_geo_restricted(
@@ -1228,6 +1193,33 @@ class InfoExtractor:
self.report_warning('unable to extract %s' % _name + bug_reports_message())
return None
+ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
+ contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
+ """Searches string for the JSON object specified by start_pattern"""
+ # NB: end_pattern is only used to reduce the size of the initial match
+ if default is NO_DEFAULT:
+ default, has_default = {}, False
+ else:
+ fatal, has_default = False, True
+
+ json_string = self._search_regex(
+ rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}',
+ string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
+ if not json_string:
+ return default
+
+ _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
+ try:
+ return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
+ except ExtractorError as e:
+ if fatal:
+ raise ExtractorError(
+ f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
+ elif not has_default:
+ self.report_warning(
+ f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
+ return default
+
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
"""
Like _search_regex, but strips HTML tags and unescapes entities.
@@ -1292,7 +1284,7 @@ class InfoExtractor:
if tfa is not None:
return tfa
- return compat_getpass('Type %s and press [Return]: ' % note)
+ return getpass.getpass('Type %s and press [Return]: ' % note)
# Helper functions for extracting OpenGraph info
@staticmethod
@@ -1343,7 +1335,7 @@ class InfoExtractor:
return self._og_search_property('url', html, **kargs)
def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
- return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+ return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
name = variadic(name)
@@ -1400,27 +1392,25 @@ class InfoExtractor:
return self._html_search_meta('twitter:player', html,
'twitter card player')
- def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
- json_ld_list = list(re.finditer(JSON_LD_RE, html))
- default = kwargs.get('default', NO_DEFAULT)
- # JSON-LD may be malformed and thus `fatal` should be respected.
- # At the same time `default` may be passed that assumes `fatal=False`
- # for _search_regex. Let's simulate the same behavior here as well.
- fatal = kwargs.get('fatal', True) if default is NO_DEFAULT else False
- json_ld = []
- for mobj in json_ld_list:
- json_ld_item = self._parse_json(
- mobj.group('json_ld'), video_id, fatal=fatal)
- if not json_ld_item:
- continue
- if isinstance(json_ld_item, dict):
- json_ld.append(json_ld_item)
- elif isinstance(json_ld_item, (list, tuple)):
- json_ld.extend(json_ld_item)
- if json_ld:
- json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
- if json_ld:
- return json_ld
+ def _yield_json_ld(self, html, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Yield all json ld objects in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ for mobj in re.finditer(JSON_LD_RE, html):
+ json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal)
+ for json_ld in variadic(json_ld_item):
+ if isinstance(json_ld, dict):
+ yield json_ld
+
+ def _search_json_ld(self, html, video_id, expected_type=None, *, fatal=True, default=NO_DEFAULT):
+ """Search for a video in any json ld in the html"""
+ if default is not NO_DEFAULT:
+ fatal = False
+ info = self._json_ld(
+ list(self._yield_json_ld(html, video_id, fatal=fatal, default=default)),
+ video_id, fatal=fatal, expected_type=expected_type)
+ if info:
+ return info
if default is not NO_DEFAULT:
return default
elif fatal:
@@ -1430,7 +1420,7 @@ class InfoExtractor:
return {}
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
- if isinstance(json_ld, compat_str):
+ if isinstance(json_ld, str):
json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
if not json_ld:
return {}
@@ -1451,6 +1441,10 @@ class InfoExtractor:
'ViewAction': 'view',
}
+ def is_type(e, *expected_types):
+ type = variadic(traverse_obj(e, '@type'))
+ return any(x in type for x in expected_types)
+
def extract_interaction_type(e):
interaction_type = e.get('interactionType')
if isinstance(interaction_type, dict):
@@ -1464,9 +1458,7 @@ class InfoExtractor:
if not isinstance(interaction_statistic, list):
return
for is_e in interaction_statistic:
- if not isinstance(is_e, dict):
- continue
- if is_e.get('@type') != 'InteractionCounter':
+ if not is_type(is_e, 'InteractionCounter'):
continue
interaction_type = extract_interaction_type(is_e)
if not interaction_type:
@@ -1503,22 +1495,23 @@ class InfoExtractor:
info['chapters'] = chapters
def extract_video_object(e):
- assert e['@type'] == 'VideoObject'
+ assert is_type(e, 'VideoObject')
author = e.get('author')
info.update({
'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
- 'thumbnails': [{'url': url_or_none(url)}
- for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
+ 'thumbnails': [{'url': url}
+ for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+ if url_or_none(url)],
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
# author can be an instance of 'Organization' or 'Person' types.
# both types can have 'name' property(inherited from 'Thing' type). [1]
# however some websites are using 'Text' type instead.
# 1. https://schema.org/VideoObject
- 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
- 'filesize': float_or_none(e.get('contentSize')),
+ 'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, str) else None,
+ 'filesize': int_or_none(float_or_none(e.get('contentSize'))),
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),
@@ -1534,13 +1527,12 @@ class InfoExtractor:
if at_top_level and set(e.keys()) == {'@context', '@graph'}:
traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
break
- item_type = e.get('@type')
- if expected_type is not None and expected_type != item_type:
+ if expected_type is not None and not is_type(e, expected_type):
continue
rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none)
if rating is not None:
info['average_rating'] = rating
- if item_type in ('TVEpisode', 'Episode'):
+ if is_type(e, 'TVEpisode', 'Episode'):
episode_name = unescapeHTML(e.get('name'))
info.update({
'episode': episode_name,
@@ -1550,37 +1542,39 @@ class InfoExtractor:
if not info.get('title') and episode_name:
info['title'] = episode_name
part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
+ if is_type(part_of_season, 'TVSeason', 'Season', 'CreativeWorkSeason'):
info.update({
'season': unescapeHTML(part_of_season.get('name')),
'season_number': int_or_none(part_of_season.get('seasonNumber')),
})
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
+ if is_type(part_of_series, 'TVSeries', 'Series', 'CreativeWorkSeries'):
info['series'] = unescapeHTML(part_of_series.get('name'))
- elif item_type == 'Movie':
+ elif is_type(e, 'Movie'):
info.update({
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('dateCreated')),
})
- elif item_type in ('Article', 'NewsArticle'):
+ elif is_type(e, 'Article', 'NewsArticle'):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
'description': unescapeHTML(e.get('articleBody') or e.get('description')),
})
- if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':
+ if is_type(traverse_obj(e, ('video', 0)), 'VideoObject'):
extract_video_object(e['video'][0])
- elif item_type == 'VideoObject':
+ elif is_type(traverse_obj(e, ('subjectOf', 0)), 'VideoObject'):
+ extract_video_object(e['subjectOf'][0])
+ elif is_type(e, 'VideoObject'):
extract_video_object(e)
if expected_type is None:
continue
else:
break
video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ if is_type(video, 'VideoObject'):
extract_video_object(video)
if expected_type is None:
continue
@@ -1597,15 +1591,13 @@ class InfoExtractor:
webpage, 'next.js data', fatal=fatal, **kw),
video_id, transform_source=transform_source, fatal=fatal)
- def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
- ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
- # not all website do this, but it can be changed
- # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
+ """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
rectx = re.escape(context_name)
+ FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)'
js, arg_keys, arg_vals = self._search_regex(
- (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
- r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
- webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+ (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'),
+ webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal)
args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
@@ -1613,7 +1605,8 @@ class InfoExtractor:
if val in ('undefined', 'void 0'):
args[key] = 'null'
- return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+ ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
+ return traverse_obj(ret, traverse) or {}
@staticmethod
def _hidden_inputs(html):
@@ -2166,7 +2159,7 @@ class InfoExtractor:
]), m3u8_doc)
def format_url(url):
- return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url)
+ return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
if self.get_param('hls_split_discontinuity', False):
def _extract_m3u8_playlist_indices(manifest_url=None, m3u8_doc=None):
@@ -2539,7 +2532,7 @@ class InfoExtractor:
})
continue
- src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src)
src_url = src_url.strip()
if proto == 'm3u8' or src_ext == 'm3u8':
@@ -2562,7 +2555,7 @@ class InfoExtractor:
'plugin': 'flowplayer-3.2.0.1',
}
f4m_url += '&' if '?' in f4m_url else '?'
- f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ f4m_url += urllib.parse.urlencode(f4m_params)
formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
elif src_ext == 'mpd':
formats.extend(self._extract_mpd_formats(
@@ -2803,13 +2796,18 @@ class InfoExtractor:
mime_type = representation_attrib['mimeType']
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
- codecs = parse_codecs(representation_attrib.get('codecs', ''))
+ codec_str = representation_attrib.get('codecs', '')
+ # Some kind of binary subtitle found in some youtube livestreams
+ if mime_type == 'application/x-rawcc':
+ codecs = {'scodec': codec_str}
+ else:
+ codecs = parse_codecs(codec_str)
if content_type not in ('video', 'audio', 'text'):
if mime_type == 'image/jpeg':
content_type = mime_type
- elif codecs['vcodec'] != 'none':
+ elif codecs.get('vcodec', 'none') != 'none':
content_type = 'video'
- elif codecs['acodec'] != 'none':
+ elif codecs.get('acodec', 'none') != 'none':
content_type = 'audio'
elif codecs.get('scodec', 'none') != 'none':
content_type = 'text'
@@ -2827,7 +2825,7 @@ class InfoExtractor:
if re.match(r'^https?://', base_url):
break
if mpd_base_url and base_url.startswith('/'):
- base_url = compat_urlparse.urljoin(mpd_base_url, base_url)
+ base_url = urllib.parse.urljoin(mpd_base_url, base_url)
elif mpd_base_url and not re.match(r'^https?://', base_url):
if not mpd_base_url.endswith('/'):
mpd_base_url += '/'
@@ -3097,7 +3095,7 @@ class InfoExtractor:
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
- track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+ track_url_pattern = urllib.parse.urljoin(ism_url, track_url_pattern)
fragments = []
fragment_ctx = {
@@ -3116,7 +3114,7 @@ class InfoExtractor:
fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
for _ in range(fragment_repeat):
fragments.append({
- 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'url': re.sub(r'{start[ _]time}', str(fragment_ctx['time']), track_url_pattern),
'duration': fragment_ctx['duration'] / stream_timescale,
})
fragment_ctx['time'] += fragment_ctx['duration']
@@ -3184,7 +3182,8 @@ class InfoExtractor:
return f
return {}
- def _media_formats(src, cur_media_type, type_info={}):
+ def _media_formats(src, cur_media_type, type_info=None):
+ type_info = type_info or {}
full_url = absolute_url(src)
ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
@@ -3202,6 +3201,7 @@ class InfoExtractor:
formats = [{
'url': full_url,
'vcodec': 'none' if cur_media_type == 'audio' else None,
+ 'ext': ext,
}]
return is_plain_url, formats
@@ -3228,7 +3228,8 @@ class InfoExtractor:
media_attributes = extract_attributes(media_tag)
src = strip_or_none(media_attributes.get('src'))
if src:
- _, formats = _media_formats(src, media_type)
+ f = parse_content_type(media_attributes.get('type'))
+ _, formats = _media_formats(src, media_type, f)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content:
@@ -3357,7 +3358,7 @@ class InfoExtractor:
return formats, subtitles
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
- query = compat_urlparse.urlparse(url).query
+ query = urllib.parse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
mobj = re.search(
r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
@@ -3463,7 +3464,7 @@ class InfoExtractor:
if not isinstance(track, dict):
continue
track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
+ if not track_kind or not isinstance(track_kind, str):
continue
if track_kind.lower() not in ('captions', 'subtitles'):
continue
@@ -3536,7 +3537,7 @@ class InfoExtractor:
# Often no height is provided but there is a label in
# format like "1080p", "720p SD", or 1080.
height = int_or_none(self._search_regex(
- r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''),
'height', default=None))
a_format = {
'url': source_url,
@@ -3588,17 +3589,15 @@ class InfoExtractor:
def _set_cookie(self, domain, name, value, expire_time=None, port=None,
path='/', secure=False, discard=False, rest={}, **kwargs):
- cookie = compat_cookiejar_Cookie(
+ cookie = http.cookiejar.Cookie(
0, name, value, port, port is not None, domain, True,
domain.startswith('.'), path, True, secure, expire_time,
discard, None, None, rest)
- self._downloader.cookiejar.set_cookie(cookie)
+ self.cookiejar.set_cookie(cookie)
def _get_cookies(self, url):
- """ Return a compat_cookies_SimpleCookie with the cookies for the url """
- req = sanitized_Request(url)
- self._downloader.cookiejar.add_cookie_header(req)
- return compat_cookies_SimpleCookie(req.get_header('Cookie'))
+ """ Return a http.cookies.SimpleCookie with the cookies for the url """
+ return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
@@ -3742,7 +3741,7 @@ class InfoExtractor:
def _get_automatic_captions(self, *args, **kwargs):
raise NotImplementedError('This method must be implemented by subclasses')
- @property
+ @functools.cached_property
def _cookies_passed(self):
"""Whether cookies have been passed to YoutubeDL"""
return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None
@@ -3764,10 +3763,10 @@ class InfoExtractor:
return headers
def _generic_id(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ return urllib.parse.unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
def _generic_title(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+ return urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py
index e8f19b9e0..2f93e8ea5 100644
--- a/yt_dlp/extractor/commonprotocols.py
+++ b/yt_dlp/extractor/commonprotocols.py
@@ -1,5 +1,6 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import compat_urlparse
class RtmpIE(InfoExtractor):
@@ -23,7 +24,7 @@ class RtmpIE(InfoExtractor):
'formats': [{
'url': url,
'ext': 'flv',
- 'format_id': compat_urlparse.urlparse(url).scheme,
+ 'format_id': urllib.parse.urlparse(url).scheme,
}],
}
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index bb1dbbaad..6877e1a3f 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -1,19 +1,20 @@
import base64
-import re
import json
-import zlib
-
+import re
+import urllib.request
import xml.etree.ElementTree
+import zlib
from hashlib import sha1
-from math import pow, sqrt, floor
+from math import floor, pow, sqrt
+
from .common import InfoExtractor
from .vrv import VRVBaseIE
+from ..aes import aes_cbc_decrypt
from ..compat import (
compat_b64decode,
compat_etree_fromstring,
compat_str,
compat_urllib_parse_urlencode,
- compat_urllib_request,
compat_urlparse,
)
from ..utils import (
@@ -22,8 +23,8 @@ from ..utils import (
extract_attributes,
float_or_none,
format_field,
- intlist_to_bytes,
int_or_none,
+ intlist_to_bytes,
join_nonempty,
lowercase_escape,
merge_dicts,
@@ -34,9 +35,6 @@ from ..utils import (
try_get,
xpath_text,
)
-from ..aes import (
- aes_cbc_decrypt,
-)
class CrunchyrollBaseIE(InfoExtractor):
@@ -259,7 +257,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVBaseIE):
}
def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ request = (url_or_request if isinstance(url_or_request, urllib.request.Request)
else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
@@ -728,11 +726,12 @@ class CrunchyrollBetaBaseIE(CrunchyrollBaseIE):
headers={
'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
})
- bucket = policy_response['cms']['bucket']
+ cms = traverse_obj(policy_response, 'cms_beta', 'cms')
+ bucket = cms['bucket']
params = {
- 'Policy': policy_response['cms']['policy'],
- 'Signature': policy_response['cms']['signature'],
- 'Key-Pair-Id': policy_response['cms']['key_pair_id']
+ 'Policy': cms['policy'],
+ 'Signature': cms['signature'],
+ 'Key-Pair-Id': cms['key_pair_id']
}
locale = traverse_obj(initial_state, ('localization', 'locale'))
if locale:
diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py
index 5b76b29ff..a105b6ce2 100644
--- a/yt_dlp/extractor/curiositystream.py
+++ b/yt_dlp/extractor/curiositystream.py
@@ -1,12 +1,8 @@
import re
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- urlencode_postdata,
- compat_str,
- ExtractorError,
-)
+from ..compat import compat_str
+from ..utils import ExtractorError, int_or_none, urlencode_postdata
class CuriosityStreamBaseIE(InfoExtractor):
@@ -23,6 +19,11 @@ class CuriosityStreamBaseIE(InfoExtractor):
def _call_api(self, path, video_id, query=None):
headers = {}
+ if not self._auth_token:
+ auth_cookie = self._get_cookies('https://curiositystream.com').get('auth_token')
+ if auth_cookie:
+ self.write_debug('Obtained auth_token cookie')
+ self._auth_token = auth_cookie.value
if self._auth_token:
headers['X-Auth-Token'] = self._auth_token
result = self._download_json(
@@ -45,7 +46,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
IE_NAME = 'curiositystream'
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://app.curiositystream.com/video/2',
+ 'url': 'http://app.curiositystream.com/video/2',
'info_dict': {
'id': '2',
'ext': 'mp4',
diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py
index 07239f39c..9b83264ee 100644
--- a/yt_dlp/extractor/cwtv.py
+++ b/yt_dlp/extractor/cwtv.py
@@ -91,4 +91,5 @@ class CWTVIE(InfoExtractor):
'timestamp': parse_iso8601(video_data.get('start_time')),
'age_limit': parse_age_limit(video_data.get('rating')),
'ie_key': 'ThePlatform',
+ 'thumbnail': video_data.get('large_thumbnail')
}
diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py
index 3b090d5e0..46438891f 100644
--- a/yt_dlp/extractor/dailymotion.py
+++ b/yt_dlp/extractor/dailymotion.py
@@ -5,13 +5,15 @@ import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
+ ExtractorError,
+ OnDemandPagedList,
age_restricted,
clean_html,
- ExtractorError,
int_or_none,
- OnDemandPagedList,
+ traverse_obj,
try_get,
unescapeHTML,
+ unsmuggle_url,
urlencode_postdata,
)
@@ -220,6 +222,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
return urls
def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url)
video_id, playlist_id = self._match_valid_url(url).groups()
if playlist_id:
@@ -252,7 +255,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
metadata = self._download_json(
'https://www.dailymotion.com/player/metadata/video/' + xid,
xid, 'Downloading metadata JSON',
- query={'app': 'com.dailymotion.neon'})
+ query=traverse_obj(smuggled_data, 'query') or {'app': 'com.dailymotion.neon'})
error = metadata.get('error')
if error:
diff --git a/yt_dlp/extractor/dailywire.py b/yt_dlp/extractor/dailywire.py
new file mode 100644
index 000000000..1f27797ad
--- /dev/null
+++ b/yt_dlp/extractor/dailywire.py
@@ -0,0 +1,114 @@
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ join_nonempty,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class DailyWireBaseIE(InfoExtractor):
+ _JSON_PATH = {
+ 'episode': ('props', 'pageProps', 'episodeData', 'episode'),
+ 'videos': ('props', 'pageProps', 'videoData', 'video'),
+ 'podcasts': ('props', 'pageProps', 'episode'),
+ }
+
+ def _get_json(self, url):
+ sites_type, slug = self._match_valid_url(url).group('sites_type', 'id')
+ json_data = self._search_nextjs_data(self._download_webpage(url, slug), slug)
+ return slug, traverse_obj(json_data, self._JSON_PATH[sites_type])
+
+
+class DailyWireIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>episode|videos)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/episode/1-fauci',
+ 'info_dict': {
+ 'id': 'ckzsl50xnqpy30850in3v4bu7',
+ 'ext': 'mp4',
+ 'display_id': '1-fauci',
+ 'title': '1. Fauci',
+ 'description': 'md5:9df630347ef85081b7e97dd30bc22853',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/ckzsl50xnqpy30850in3v4bu7/ckzsl50xnqpy30850in3v4bu7-1648237399554.jpg',
+ 'creator': 'Caroline Roberts',
+ 'series_id': 'ckzplm0a097fn0826r2vc3j7h',
+ 'series': 'China: The Enemy Within',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/episode/ep-124-bill-maher',
+ 'info_dict': {
+ 'id': 'cl0ngbaalplc80894sfdo9edf',
+ 'ext': 'mp3',
+ 'display_id': 'ep-124-bill-maher',
+ 'title': 'Ep. 124 - Bill Maher',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/episodes/cl0ngbaalplc80894sfdo9edf/cl0ngbaalplc80894sfdo9edf-1647065568518.jpg',
+ 'creator': 'Caroline Roberts',
+ 'description': 'md5:adb0de584bcfa9c41374999d9e324e98',
+ 'series_id': 'cjzvep7270hp00786l9hwccob',
+ 'series': 'The Sunday Special',
+ }
+ }, {
+ 'url': 'https://www.dailywire.com/videos/the-hyperions',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ urls = traverse_obj(
+ episode_info, (('segments', 'videoUrl'), ..., ('video', 'audio')), expected_type=url_or_none)
+
+ formats, subtitles = [], {}
+ for url in urls:
+ if determine_ext(url) != 'm3u8':
+ formats.append({'url': url})
+ continue
+ format_, subs_ = self._extract_m3u8_formats_and_subtitles(url, slug)
+ formats.extend(format_)
+ self._merge_subtitles(subs_, target=subtitles)
+ self._sort_formats(formats)
+ return {
+ 'id': episode_info['id'],
+ 'display_id': slug,
+ 'title': traverse_obj(episode_info, 'title', 'name'),
+ 'description': episode_info.get('description'),
+ 'creator': join_nonempty(('createdBy', 'firstName'), ('createdBy', 'lastName'), from_dict=episode_info, delim=' '),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'is_live': episode_info.get('isLive'),
+ 'thumbnail': traverse_obj(episode_info, 'thumbnail', 'image', expected_type=url_or_none),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series_id': traverse_obj(episode_info, ('show', 'id')),
+ 'series': traverse_obj(episode_info, ('show', 'name')),
+ }
+
+
+class DailyWirePodcastIE(DailyWireBaseIE):
+ _VALID_URL = r'https?://(?:www\.)dailywire(?:\.com)/(?P<sites_type>podcasts)/(?P<podcaster>[\w-]+/(?P<id>[\w-]+))'
+ _TESTS = [{
+ 'url': 'https://www.dailywire.com/podcasts/morning-wire/get-ready-for-recession-6-15-22',
+ 'info_dict': {
+ 'id': 'cl4f01d0w8pbe0a98ydd0cfn1',
+ 'ext': 'm4a',
+ 'display_id': 'get-ready-for-recession-6-15-22',
+ 'title': 'Get Ready for Recession | 6.15.22',
+ 'description': 'md5:c4afbadda4e1c38a4496f6d62be55634',
+ 'thumbnail': 'https://daily-wire-production.imgix.net/podcasts/ckx4otgd71jm508699tzb6hf4-1639506575562.jpg',
+ 'duration': 900.117667,
+ }
+ }]
+
+ def _real_extract(self, url):
+ slug, episode_info = self._get_json(url)
+ audio_id = traverse_obj(episode_info, 'audioMuxPlaybackId', 'VUsAipTrBVSgzw73SpC2DAJD401TYYwEp')
+
+ return {
+ 'id': episode_info['id'],
+ 'url': f'https://stream.media.dailywire.com/{audio_id}/audio.m4a',
+ 'display_id': slug,
+ 'title': episode_info.get('title'),
+ 'duration': float_or_none(episode_info.get('duration')),
+ 'thumbnail': episode_info.get('thumbnail'),
+ 'description': episode_info.get('description'),
+ }
diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py
index c891ad0a6..3813a51fe 100644
--- a/yt_dlp/extractor/digitalconcerthall.py
+++ b/yt_dlp/extractor/digitalconcerthall.py
@@ -86,7 +86,7 @@ class DigitalConcertHallIE(InfoExtractor):
})
m3u8_url = traverse_obj(
- stream_info, ('channel', lambda x: x.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
+ stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False)
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False)
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 6ac0c713a..0d12513b2 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -53,8 +53,8 @@ class DropboxIE(InfoExtractor):
else:
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
- json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON')
- info_json = self._parse_json(json_string, video_id).get('props')
+ info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
+ contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props']
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py
index 475825eb8..e280b1c9f 100644
--- a/yt_dlp/extractor/dropout.py
+++ b/yt_dlp/extractor/dropout.py
@@ -1,8 +1,8 @@
from .common import InfoExtractor
from .vimeo import VHXEmbedIE
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
get_element_by_class,
get_element_by_id,
get_elements_by_class,
@@ -96,11 +96,12 @@ class DropoutIE(InfoExtractor):
def _login(self, display_id):
username, password = self._get_login_info()
- if not (username and password):
- self.raise_login_required(method='password')
+ if not username:
+ return True
response = self._download_webpage(
- self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({
+ self._LOGIN_URL, display_id, note='Logging in', fatal=False,
+ data=urlencode_postdata({
'email': username,
'password': password,
'authenticity_token': self._get_authenticity_token(display_id),
@@ -110,19 +111,25 @@ class DropoutIE(InfoExtractor):
user_has_subscription = self._search_regex(
r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
if user_has_subscription.lower() == 'true':
- return response
+ return
elif user_has_subscription.lower() == 'false':
- raise ExtractorError('Account is not subscribed')
+ return 'Account is not subscribed'
else:
- raise ExtractorError('Incorrect username/password')
+ return 'Incorrect username/password'
def _real_extract(self, url):
display_id = self._match_id(url)
- try:
- self._login(display_id)
- webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
- finally:
- self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False)
+
+ webpage = None
+ if self._get_cookies('https://www.dropout.tv').get('_session'):
+ webpage = self._download_webpage(url, display_id)
+ if not webpage or '<div id="watch-unauthorized"' in webpage:
+ login_err = self._login(display_id)
+ webpage = self._download_webpage(url, display_id)
+ if login_err and '<div id="watch-unauthorized"' in webpage:
+ if login_err is True:
+ self.raise_login_required(method='any')
+ raise ExtractorError(login_err, expected=True)
embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
thumbnail = self._og_search_thumbnail(webpage)
@@ -137,7 +144,7 @@ class DropoutIE(InfoExtractor):
return {
'_type': 'url_transparent',
'ie_key': VHXEmbedIE.ie_key(),
- 'url': embed_url,
+ 'url': VHXEmbedIE._smuggle_referrer(embed_url, 'https://www.dropout.tv'),
'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'),
'display_id': display_id,
'title': title,
diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py
index 24403842d..fb0546cae 100644
--- a/yt_dlp/extractor/duboku.py
+++ b/yt_dlp/extractor/duboku.py
@@ -51,31 +51,39 @@ def _get_element_by_tag_and_attrib(html, tag=None, attribute=None, value=None, e
class DubokuIE(InfoExtractor):
IE_NAME = 'duboku'
- IE_DESC = 'www.duboku.co'
+ IE_DESC = 'www.duboku.io'
- _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/vodplay/)(?P<id>[0-9]+-[0-9-]+)\.html.*'
_TESTS = [{
- 'url': 'https://www.duboku.co/vodplay/1575-1-1.html',
+ 'url': 'https://w.duboku.io/vodplay/1575-1-1.html',
'info_dict': {
'id': '1575-1-1',
- 'ext': 'ts',
+ 'ext': 'mp4',
'series': '白色月光',
'title': 'contains:白色月光',
'season_number': 1,
'episode_number': 1,
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
+ 'episode': 'Episode 1',
},
'params': {
'skip_download': 'm3u8 download',
},
}, {
- 'url': 'https://www.duboku.co/vodplay/1588-1-1.html',
+ 'url': 'https://w.duboku.io/vodplay/1588-1-1.html',
'info_dict': {
'id': '1588-1-1',
- 'ext': 'ts',
+ 'ext': 'mp4',
'series': '亲爱的自己',
- 'title': 'contains:预告片',
+ 'title': 'contains:第1集',
'season_number': 1,
'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
+ 'episode_id': '1',
+ 'season_id': '1',
},
'params': {
'skip_download': 'm3u8 download',
@@ -91,7 +99,7 @@ class DubokuIE(InfoExtractor):
season_id = temp[1]
episode_id = temp[2]
- webpage_url = 'https://www.duboku.co/vodplay/%s.html' % video_id
+ webpage_url = 'https://w.duboku.io/vodplay/%s.html' % video_id
webpage_html = self._download_webpage(webpage_url, video_id)
# extract video url
@@ -124,12 +132,13 @@ class DubokuIE(InfoExtractor):
data_from = player_data.get('from')
# if it is an embedded iframe, maybe it's an external source
+ headers = {'Referer': webpage_url}
if data_from == 'iframe':
# use _type url_transparent to retain the meaningful details
# of the video.
return {
'_type': 'url_transparent',
- 'url': smuggle_url(data_url, {'http_headers': {'Referer': webpage_url}}),
+ 'url': smuggle_url(data_url, {'http_headers': headers}),
'id': video_id,
'title': title,
'series': series_title,
@@ -139,7 +148,7 @@ class DubokuIE(InfoExtractor):
'episode_id': episode_id,
}
- formats = self._extract_m3u8_formats(data_url, video_id, 'mp4')
+ formats = self._extract_m3u8_formats(data_url, video_id, 'mp4', headers=headers)
return {
'id': video_id,
@@ -150,36 +159,29 @@ class DubokuIE(InfoExtractor):
'episode_number': int_or_none(episode_id),
'episode_id': episode_id,
'formats': formats,
- 'http_headers': {'Referer': 'https://www.duboku.co/static/player/videojs.html'}
+ 'http_headers': headers
}
class DubokuPlaylistIE(InfoExtractor):
IE_NAME = 'duboku:list'
- IE_DESC = 'www.duboku.co entire series'
+ IE_DESC = 'www.duboku.io entire series'
- _VALID_URL = r'(?:https?://[^/]+\.duboku\.co/voddetail/)(?P<id>[0-9]+)\.html.*'
+ _VALID_URL = r'(?:https?://[^/]+\.duboku\.io/voddetail/)(?P<id>[0-9]+)\.html.*'
_TESTS = [{
- 'url': 'https://www.duboku.co/voddetail/1575.html',
+ 'url': 'https://w.duboku.io/voddetail/1575.html',
'info_dict': {
'id': 'startswith:1575',
'title': '白色月光',
},
'playlist_count': 12,
}, {
- 'url': 'https://www.duboku.co/voddetail/1554.html',
+ 'url': 'https://w.duboku.io/voddetail/1554.html',
'info_dict': {
'id': 'startswith:1554',
'title': '以家人之名',
},
'playlist_mincount': 30,
- }, {
- 'url': 'https://www.duboku.co/voddetail/1554.html#playlist2',
- 'info_dict': {
- 'id': '1554#playlist2',
- 'title': '以家人之名',
- },
- 'playlist_mincount': 27,
}]
def _real_extract(self, url):
@@ -189,7 +191,7 @@ class DubokuPlaylistIE(InfoExtractor):
series_id = mobj.group('id')
fragment = compat_urlparse.urlparse(url).fragment
- webpage_url = 'https://www.duboku.co/voddetail/%s.html' % series_id
+ webpage_url = 'https://w.duboku.io/voddetail/%s.html' % series_id
webpage_html = self._download_webpage(webpage_url, series_id)
# extract title
@@ -234,6 +236,6 @@ class DubokuPlaylistIE(InfoExtractor):
# return url results
return self.playlist_result([
self.url_result(
- compat_urlparse.urljoin('https://www.duboku.co', x['href']),
+ compat_urlparse.urljoin('https://w.duboku.io', x['href']),
ie=DubokuIE.ie_key(), video_title=x.get('title'))
for x in playlist], series_id + '#' + playlist_id, title)
diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py
index 507f0a5c1..276543653 100644
--- a/yt_dlp/extractor/ertgr.py
+++ b/yt_dlp/extractor/ertgr.py
@@ -119,7 +119,7 @@ class ERTFlixCodenameIE(ERTFlixBaseIE):
class ERTFlixIE(ERTFlixBaseIE):
IE_NAME = 'ertflix'
IE_DESC = 'ERTFLIX videos'
- _VALID_URL = r'https?://www\.ertflix\.gr/(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
+ _VALID_URL = r'https?://www\.ertflix\.gr/(?:[^/]+/)?(?:series|vod)/(?P<id>[a-z]{3}\.\d+)'
_TESTS = [{
'url': 'https://www.ertflix.gr/vod/vod.173258-aoratoi-ergates',
'md5': '6479d5e60fd7e520b07ba5411dcdd6e7',
@@ -171,6 +171,9 @@ class ERTFlixIE(ERTFlixBaseIE):
'title': 'Το δίκτυο',
},
'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.ertflix.gr/en/vod/vod.127652-ta-kalytera-mas-chronia-ep1-mia-volta-sto-feggari',
+ 'only_matching': True,
}]
def _extract_episode(self, episode):
diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py
index 8fad70e6b..451148636 100644
--- a/yt_dlp/extractor/espn.py
+++ b/yt_dlp/extractor/espn.py
@@ -1,8 +1,11 @@
+import base64
+import json
import re
+import urllib.parse
+from .adobepass import AdobePassIE
from .common import InfoExtractor
from .once import OnceIE
-from ..compat import compat_str
from ..utils import (
determine_ext,
dict_get,
@@ -24,7 +27,6 @@ class ESPNIE(OnceIE):
(?:
(?:
video/(?:clip|iframe/twitter)|
- watch/player
)
(?:
.*?\?.*?\bid=|
@@ -47,6 +49,8 @@ class ESPNIE(OnceIE):
'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
'timestamp': 1390936111,
'upload_date': '20140128',
+ 'duration': 1302,
+ 'thumbnail': r're:https://.+\.jpg',
},
'params': {
'skip_download': True,
@@ -72,15 +76,6 @@ class ESPNIE(OnceIE):
'url': 'https://cdn.espn.go.com/video/clip/_/id/19771774',
'only_matching': True,
}, {
- 'url': 'http://www.espn.com/watch/player?id=19141491',
- 'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
- 'only_matching': True,
- }, {
- 'url': 'http://www.espn.com/watch/player/_/id/19141491',
- 'only_matching': True,
- }, {
'url': 'http://www.espn.com/video/clip?id=10365079',
'only_matching': True,
}, {
@@ -98,7 +93,13 @@ class ESPNIE(OnceIE):
}, {
'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings',
'only_matching': True,
- }]
+ }, {
+ 'url': 'http://www.espn.com/watch/player?id=19141491',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.espn.com/watch/player?bucketId=257&id=19505875',
+ 'only_matching': True,
+ }, ]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -116,7 +117,7 @@ class ESPNIE(OnceIE):
for source_id, source in source.items():
if source_id == 'alert':
continue
- elif isinstance(source, compat_str):
+ elif isinstance(source, str):
extract_source(source, base_source_id)
elif isinstance(source, dict):
traverse_source(
@@ -196,7 +197,7 @@ class ESPNArticleIE(InfoExtractor):
@classmethod
def suitable(cls, url):
- return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
+ return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -277,3 +278,119 @@ class ESPNCricInfoIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
}
+
+
+class WatchESPNIE(AdobePassIE):
+ _VALID_URL = r'https://www.espn.com/watch/player/_/id/(?P<id>[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})'
+ _TESTS = [{
+ 'url': 'https://www.espn.com/watch/player/_/id/ba7d17da-453b-4697-bf92-76a99f61642b',
+ 'info_dict': {
+ 'id': 'ba7d17da-453b-4697-bf92-76a99f61642b',
+ 'ext': 'mp4',
+ 'title': 'Serbia vs. Turkey',
+ 'thumbnail': 'https://artwork.api.espn.com/artwork/collections/media/ba7d17da-453b-4697-bf92-76a99f61642b/default?width=640&apikey=1ngjw23osgcis1i1vbj96lmfqs',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.espn.com/watch/player/_/id/4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34',
+ 'info_dict': {
+ 'id': '4e9b5bd1-4ceb-4482-9d28-1dd5f30d2f34',
+ 'ext': 'mp4',
+ 'title': 'Real Madrid vs. Real Betis (LaLiga)',
+ 'thumbnail': 'https://s.secure.espncdn.com/stitcher/artwork/collections/media/bd1f3d12-0654-47d9-852e-71b85ea695c7/16x9.jpg?timestamp=202201112217&showBadge=true&cb=12&package=ESPN_PLUS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ _API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
+
+ def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
+ if 'Authorization' not in headers:
+ headers['Authorization'] = f'Bearer {self._API_KEY}'
+ parse = urllib.parse.urlencode if path == 'token' else json.dumps
+ return self._download_json(
+ f'https://espn.api.edge.bamgrid.com/{path}', video_id, headers=headers, data=parse(payload).encode())
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ f'https://watch-cdn.product.api.espn.com/api/product/v3/watchespn/web/playback/event?id={video_id}',
+ video_id)['playbackState']
+
+ # ESPN+ subscription required, through cookies
+ if 'DTC' in video_data.get('sourceId'):
+ cookie = self._get_cookies(url).get('ESPN-ONESITE.WEB-PROD.token')
+ if not cookie:
+ self.raise_login_required(method='cookies')
+
+ assertion = self._call_bamgrid_api(
+ 'devices', video_id,
+ headers={'Content-Type': 'application/json; charset=UTF-8'},
+ payload={
+ 'deviceFamily': 'android',
+ 'applicationRuntime': 'android',
+ 'deviceProfile': 'tv',
+ 'attributes': {},
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:device',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ assertion = self._call_bamgrid_api(
+ 'accounts/grant', video_id, payload={'id_token': cookie.value.split('|')[1]},
+ headers={
+ 'Authorization': token,
+ 'Content-Type': 'application/json; charset=UTF-8'
+ })['assertion']
+ token = self._call_bamgrid_api(
+ 'token', video_id, payload={
+ 'subject_token': assertion,
+ 'subject_token_type': 'urn:bamtech:params:oauth:token-type:account',
+ 'platform': 'android',
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:token-exchange'
+ })['access_token']
+
+ playback = self._download_json(
+ video_data['videoHref'].format(scenario='browser~ssai'), video_id,
+ headers={
+ 'Accept': 'application/vnd.media-service+json; version=5',
+ 'Authorization': token
+ })
+ m3u8_url, headers = playback['stream']['complete'][0]['url'], {'authorization': token}
+
+ # No login required
+ elif video_data.get('sourceId') == 'ESPN_FREE':
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id)
+ m3u8_url, headers = asset['stream'], {}
+
+ # TV Provider required
+ else:
+ resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
+ auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
+
+ asset = self._download_json(
+ f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
+ video_id, data=f'adobeToken={urllib.parse.quote_plus(base64.b64encode(auth))}&drmSupport=HLS'.encode())
+ m3u8_url, headers = asset['stream'], {}
+
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data.get('name'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': video_data.get('posterHref'),
+ 'http_headers': headers,
+ }
diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py
index a1b8e9bc9..5aba21ba7 100644
--- a/yt_dlp/extractor/expressen.py
+++ b/yt_dlp/extractor/expressen.py
@@ -19,9 +19,10 @@ class ExpressenIE(InfoExtractor):
'''
_TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
- 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+ 'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
'info_dict': {
- 'id': '8690962',
+ 'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
+ 'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
'ext': 'mp4',
'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
@@ -64,7 +65,7 @@ class ExpressenIE(InfoExtractor):
display_id, transform_source=unescapeHTML)
info = extract_data('video-tracking-info')
- video_id = info['videoId']
+ video_id = info['contentId']
data = extract_data('article-data')
stream = data['stream']
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 9c5a5f482..32818a024 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1,2175 +1,23 @@
-# flake8: noqa: F401
+import contextlib
+import os
-from .abc import (
- ABCIE,
- ABCIViewIE,
- ABCIViewShowSeriesIE,
-)
-from .abcnews import (
- AbcNewsIE,
- AbcNewsVideoIE,
-)
-from .abcotvs import (
- ABCOTVSIE,
- ABCOTVSClipsIE,
-)
-from .abematv import (
- AbemaTVIE,
- AbemaTVTitleIE,
-)
-from .academicearth import AcademicEarthCourseIE
-from .acast import (
- ACastIE,
- ACastChannelIE,
-)
-from .adn import ADNIE
-from .adobeconnect import AdobeConnectIE
-from .adobetv import (
- AdobeTVEmbedIE,
- AdobeTVIE,
- AdobeTVShowIE,
- AdobeTVChannelIE,
- AdobeTVVideoIE,
-)
-from .adultswim import AdultSwimIE
-from .aenetworks import (
- AENetworksIE,
- AENetworksCollectionIE,
- AENetworksShowIE,
- HistoryTopicIE,
- HistoryPlayerIE,
- BiographyIE,
-)
-from .afreecatv import (
- AfreecaTVIE,
- AfreecaTVLiveIE,
- AfreecaTVUserIE,
-)
-from .airmozilla import AirMozillaIE
-from .aljazeera import AlJazeeraIE
-from .alphaporno import AlphaPornoIE
-from .amara import AmaraIE
-from .alura import (
- AluraIE,
- AluraCourseIE
-)
-from .amcnetworks import AMCNetworksIE
-from .animelab import (
- AnimeLabIE,
- AnimeLabShowsIE,
-)
-from .amazon import AmazonStoreIE
-from .americastestkitchen import (
- AmericasTestKitchenIE,
- AmericasTestKitchenSeasonIE,
-)
-from .animeondemand import AnimeOnDemandIE
-from .anvato import AnvatoIE
-from .aol import AolIE
-from .allocine import AllocineIE
-from .aliexpress import AliExpressLiveIE
-from .alsace20tv import (
- Alsace20TVIE,
- Alsace20TVEmbedIE,
-)
-from .apa import APAIE
-from .aparat import AparatIE
-from .appleconnect import AppleConnectIE
-from .appletrailers import (
- AppleTrailersIE,
- AppleTrailersSectionIE,
-)
-from .applepodcasts import ApplePodcastsIE
-from .archiveorg import (
- ArchiveOrgIE,
- YoutubeWebArchiveIE,
-)
-from .arcpublishing import ArcPublishingIE
-from .arkena import ArkenaIE
-from .ard import (
- ARDBetaMediathekIE,
- ARDIE,
- ARDMediathekIE,
-)
-from .arte import (
- ArteTVIE,
- ArteTVEmbedIE,
- ArteTVPlaylistIE,
- ArteTVCategoryIE,
-)
-from .arnes import ArnesIE
-from .asiancrush import (
- AsianCrushIE,
- AsianCrushPlaylistIE,
-)
-from .atresplayer import AtresPlayerIE
-from .atttechchannel import ATTTechChannelIE
-from .atvat import ATVAtIE
-from .audimedia import AudiMediaIE
-from .audioboom import AudioBoomIE
-from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .audius import (
- AudiusIE,
- AudiusTrackIE,
- AudiusPlaylistIE,
- AudiusProfileIE,
-)
-from .awaan import (
- AWAANIE,
- AWAANVideoIE,
- AWAANLiveIE,
- AWAANSeasonIE,
-)
-from .azmedien import AZMedienIE
-from .baidu import BaiduVideoIE
-from .banbye import (
- BanByeIE,
- BanByeChannelIE,
-)
-from .bandaichannel import BandaiChannelIE
-from .bandcamp import (
- BandcampIE,
- BandcampAlbumIE,
- BandcampWeeklyIE,
- BandcampUserIE,
-)
-from .bannedvideo import BannedVideoIE
-from .bbc import (
- BBCCoUkIE,
- BBCCoUkArticleIE,
- BBCCoUkIPlayerEpisodesIE,
- BBCCoUkIPlayerGroupIE,
- BBCCoUkPlaylistIE,
- BBCIE,
-)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .bellmedia import BellMediaIE
-from .beatport import BeatportIE
-from .bet import BetIE
-from .bfi import BFIPlayerIE
-from .bfmtv import (
- BFMTVIE,
- BFMTVLiveIE,
- BFMTVArticleIE,
-)
-from .bibeltv import BibelTVIE
-from .bigflix import BigflixIE
-from .bigo import BigoIE
-from .bild import BildIE
-from .bilibili import (
- BiliBiliIE,
- BiliBiliSearchIE,
- BilibiliCategoryIE,
- BiliBiliBangumiIE,
- BilibiliAudioIE,
- BilibiliAudioAlbumIE,
- BiliBiliPlayerIE,
- BilibiliChannelIE,
- BiliIntlIE,
- BiliIntlSeriesIE,
- BiliLiveIE,
-)
-from .biobiochiletv import BioBioChileTVIE
-from .bitchute import (
- BitChuteIE,
- BitChuteChannelIE,
-)
-from .bitwave import (
- BitwaveReplayIE,
- BitwaveStreamIE,
-)
-from .biqle import BIQLEIE
-from .blackboardcollaborate import BlackboardCollaborateIE
-from .bleacherreport import (
- BleacherReportIE,
- BleacherReportCMSIE,
-)
-from .blogger import BloggerIE
-from .bloomberg import BloombergIE
-from .bokecc import BokeCCIE
-from .bongacams import BongaCamsIE
-from .bostonglobe import BostonGlobeIE
-from .box import BoxIE
-from .bpb import BpbIE
-from .br import (
- BRIE,
- BRMediathekIE,
-)
-from .bravotv import BravoTVIE
-from .breakcom import BreakIE
-from .breitbart import BreitBartIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .businessinsider import BusinessInsiderIE
-from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
-from .c56 import C56IE
-from .cableav import CableAVIE
-from .callin import CallinIE
-from .caltrans import CaltransIE
-from .cam4 import CAM4IE
-from .camdemy import (
- CamdemyIE,
- CamdemyFolderIE
-)
-from .cammodels import CamModelsIE
-from .camwithher import CamWithHerIE
-from .canalalpha import CanalAlphaIE
-from .canalplus import CanalplusIE
-from .canalc2 import Canalc2IE
-from .canvas import (
- CanvasIE,
- CanvasEenIE,
- VrtNUIE,
- DagelijkseKostIE,
-)
-from .carambatv import (
- CarambaTVIE,
- CarambaTVPageIE,
-)
-from .cartoonnetwork import CartoonNetworkIE
-from .cbc import (
- CBCIE,
- CBCPlayerIE,
- CBCGemIE,
- CBCGemPlaylistIE,
- CBCGemLiveIE,
-)
-from .cbs import CBSIE
-from .cbslocal import (
- CBSLocalIE,
- CBSLocalArticleIE,
-)
-from .cbsinteractive import CBSInteractiveIE
-from .cbsnews import (
- CBSNewsEmbedIE,
- CBSNewsIE,
- CBSNewsLiveVideoIE,
-)
-from .cbssports import (
- CBSSportsEmbedIE,
- CBSSportsIE,
- TwentyFourSevenSportsIE,
-)
-from .ccc import (
- CCCIE,
- CCCPlaylistIE,
-)
-from .ccma import CCMAIE
-from .cctv import CCTVIE
-from .cda import CDAIE
-from .ceskatelevize import CeskaTelevizeIE
-from .cgtn import CGTNIE
-from .channel9 import Channel9IE
-from .charlierose import CharlieRoseIE
-from .chaturbate import ChaturbateIE
-from .chilloutzone import ChilloutzoneIE
-from .chingari import (
- ChingariIE,
- ChingariUserIE,
-)
-from .chirbit import (
- ChirbitIE,
- ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
-from .cinemax import CinemaxIE
-from .ciscolive import (
- CiscoLiveSessionIE,
- CiscoLiveSearchIE,
-)
-from .ciscowebex import CiscoWebexIE
-from .cjsw import CJSWIE
-from .cliphunter import CliphunterIE
-from .clippit import ClippitIE
-from .cliprs import ClipRsIE
-from .clipsyndicate import ClipsyndicateIE
-from .closertotruth import CloserToTruthIE
-from .cloudflarestream import CloudflareStreamIE
-from .cloudy import CloudyIE
-from .clubic import ClubicIE
-from .clyp import ClypIE
-from .cmt import CMTIE
-from .cnbc import (
- CNBCIE,
- CNBCVideoIE,
-)
-from .cnn import (
- CNNIE,
- CNNBlogsIE,
- CNNArticleIE,
-)
-from .coub import CoubIE
-from .comedycentral import (
- ComedyCentralIE,
- ComedyCentralTVIE,
-)
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import (
- MmsIE,
- RtmpIE,
- ViewSourceIE,
-)
-from .condenast import CondeNastIE
-from .contv import CONtvIE
-from .corus import CorusIE
-from .cpac import (
- CPACIE,
- CPACPlaylistIE,
-)
-from .cozytv import CozyTVIE
-from .cracked import CrackedIE
-from .crackle import CrackleIE
-from .craftsy import CraftsyIE
-from .crooksandliars import CrooksAndLiarsIE
-from .crowdbunker import (
- CrowdBunkerIE,
- CrowdBunkerChannelIE,
-)
-from .crunchyroll import (
- CrunchyrollIE,
- CrunchyrollShowPlaylistIE,
- CrunchyrollBetaIE,
- CrunchyrollBetaShowIE,
-)
-from .cspan import CSpanIE, CSpanCongressIE
-from .ctsnews import CtsNewsIE
-from .ctv import CTVIE
-from .ctvnews import CTVNewsIE
-from .cultureunplugged import CultureUnpluggedIE
-from .curiositystream import (
- CuriosityStreamIE,
- CuriosityStreamCollectionsIE,
- CuriosityStreamSeriesIE,
-)
-from .cwtv import CWTVIE
-from .cybrary import (
- CybraryIE,
- CybraryCourseIE
-)
-from .daftsex import DaftsexIE
-from .dailymail import DailyMailIE
-from .dailymotion import (
- DailymotionIE,
- DailymotionPlaylistIE,
- DailymotionUserIE,
-)
-from .damtomo import (
- DamtomoRecordIE,
- DamtomoVideoIE,
-)
-from .daum import (
- DaumIE,
- DaumClipIE,
- DaumPlaylistIE,
- DaumUserIE,
-)
-from .daystar import DaystarClipIE
-from .dbtv import DBTVIE
-from .dctp import DctpTvIE
-from .deezer import (
- DeezerPlaylistIE,
- DeezerAlbumIE,
-)
-from .democracynow import DemocracynowIE
-from .dfb import DFBIE
-from .dhm import DHMIE
-from .digg import DiggIE
-from .dotsub import DotsubIE
-from .douyutv import (
- DouyuShowIE,
- DouyuTVIE,
-)
-from .dplay import (
- DPlayIE,
- DiscoveryPlusIE,
- HGTVDeIE,
- GoDiscoveryIE,
- TravelChannelIE,
- CookingChannelIE,
- HGTVUsaIE,
- FoodNetworkIE,
- InvestigationDiscoveryIE,
- DestinationAmericaIE,
- AmHistoryChannelIE,
- ScienceChannelIE,
- DIYNetworkIE,
- DiscoveryLifeIE,
- AnimalPlanetIE,
- TLCIE,
- DiscoveryPlusIndiaIE,
- DiscoveryNetworksDeIE,
- DiscoveryPlusItalyIE,
- DiscoveryPlusItalyShowIE,
- DiscoveryPlusIndiaShowIE,
-)
-from .dreisat import DreiSatIE
-from .drbonanza import DRBonanzaIE
-from .drtuber import DrTuberIE
-from .drtv import (
- DRTVIE,
- DRTVLiveIE,
-)
-from .dtube import DTubeIE
-from .dvtv import DVTVIE
-from .duboku import (
- DubokuIE,
- DubokuPlaylistIE
-)
-from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
-from .digitalconcerthall import DigitalConcertHallIE
-from .discovery import DiscoveryIE
-from .disney import DisneyIE
-from .dispeak import DigitallySpeakingIE
-from .doodstream import DoodStreamIE
-from .dropbox import DropboxIE
-from .dropout import (
- DropoutSeasonIE,
- DropoutIE
-)
-from .dw import (
- DWIE,
- DWArticleIE,
-)
-from .eagleplatform import EaglePlatformIE
-from .ebaumsworld import EbaumsWorldIE
-from .echomsk import EchoMskIE
-from .egghead import (
- EggheadCourseIE,
- EggheadLessonIE,
-)
-from .ehow import EHowIE
-from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
-from .eitb import EitbIE
-from .ellentube import (
- EllenTubeIE,
- EllenTubeVideoIE,
- EllenTubePlaylistIE,
-)
-from .elonet import ElonetIE
-from .elpais import ElPaisIE
-from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
-from .epicon import (
- EpiconIE,
- EpiconSeriesIE,
-)
-from .eporner import EpornerIE
-from .eroprofile import (
- EroProfileIE,
- EroProfileAlbumIE,
-)
-from .ertgr import (
- ERTFlixCodenameIE,
- ERTFlixIE,
- ERTWebtvEmbedIE,
-)
-from .escapist import EscapistIE
-from .espn import (
- ESPNIE,
- ESPNArticleIE,
- FiveThirtyEightIE,
- ESPNCricInfoIE,
-)
-from .esri import EsriVideoIE
-from .europa import EuropaIE
-from .europeantour import EuropeanTourIE
-from .euscreen import EUScreenIE
-from .expotv import ExpoTVIE
-from .expressen import ExpressenIE
-from .extremetube import ExtremeTubeIE
-from .eyedotv import EyedoTVIE
-from .facebook import (
- FacebookIE,
- FacebookPluginsVideoIE,
- FacebookRedirectURLIE,
-)
-from .fancode import (
- FancodeVodIE,
- FancodeLiveIE
-)
+from ..utils import load_plugins
-from .faz import FazIE
-from .fc2 import (
- FC2IE,
- FC2EmbedIE,
- FC2LiveIE,
-)
-from .fczenit import FczenitIE
-from .fifa import FifaIE
-from .filmmodu import FilmmoduIE
-from .filmon import (
- FilmOnIE,
- FilmOnChannelIE,
-)
-from .filmweb import FilmwebIE
-from .firsttv import FirstTVIE
-from .fivetv import FiveTVIE
-from .flickr import FlickrIE
-from .folketinget import FolketingetIE
-from .footyroom import FootyRoomIE
-from .formula1 import Formula1IE
-from .fourtube import (
- FourTubeIE,
- PornTubeIE,
- PornerBrosIE,
- FuxIE,
-)
-from .fox import FOXIE
-from .fox9 import (
- FOX9IE,
- FOX9NewsIE,
-)
-from .foxgay import FoxgayIE
-from .foxnews import (
- FoxNewsIE,
- FoxNewsArticleIE,
-)
-from .foxsports import FoxSportsIE
-from .fptplay import FptplayIE
-from .franceculture import FranceCultureIE
-from .franceinter import FranceInterIE
-from .francetv import (
- FranceTVIE,
- FranceTVSiteIE,
- FranceTVInfoIE,
-)
-from .freesound import FreesoundIE
-from .freespeech import FreespeechIE
-from .frontendmasters import (
- FrontendMastersIE,
- FrontendMastersLessonIE,
- FrontendMastersCourseIE
-)
-from .fujitv import FujiTVFODPlus7IE
-from .funimation import (
- FunimationIE,
- FunimationPageIE,
- FunimationShowIE,
-)
-from .funk import FunkIE
-from .fusion import FusionIE
-from .gab import (
- GabTVIE,
- GabIE,
-)
-from .gaia import GaiaIE
-from .gameinformer import GameInformerIE
-from .gamejolt import (
- GameJoltIE,
- GameJoltUserIE,
- GameJoltGameIE,
- GameJoltGameSoundtrackIE,
- GameJoltCommunityIE,
- GameJoltSearchIE,
-)
-from .gamespot import GameSpotIE
-from .gamestar import GameStarIE
-from .gaskrank import GaskrankIE
-from .gazeta import GazetaIE
-from .gdcvault import GDCVaultIE
-from .gedidigital import GediDigitalIE
-from .generic import GenericIE
-from .gettr import (
- GettrIE,
- GettrStreamingIE,
-)
-from .gfycat import GfycatIE
-from .giantbomb import GiantBombIE
-from .giga import GigaIE
-from .glide import GlideIE
-from .globo import (
- GloboIE,
- GloboArticleIE,
-)
-from .go import GoIE
-from .godtube import GodTubeIE
-from .gofile import GofileIE
-from .golem import GolemIE
-from .goodgame import GoodGameIE
-from .googledrive import GoogleDriveIE
-from .googlepodcasts import (
- GooglePodcastsIE,
- GooglePodcastsFeedIE,
-)
-from .googlesearch import GoogleSearchIE
-from .gopro import GoProIE
-from .goshgay import GoshgayIE
-from .gotostage import GoToStageIE
-from .gputechconf import GPUTechConfIE
-from .gronkh import (
- GronkhIE,
- GronkhFeedIE,
- GronkhVodsIE
-)
-from .groupon import GrouponIE
-from .hbo import HBOIE
-from .hearthisat import HearThisAtIE
-from .heise import HeiseIE
-from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
-from .hgtv import HGTVComShowIE
-from .hketv import HKETVIE
-from .hidive import HiDiveIE
-from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
-from .hitrecord import HitRecordIE
-from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import (
- HotStarIE,
- HotStarPrefixIE,
- HotStarPlaylistIE,
- HotStarSeriesIE,
-)
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
-from .hrfensehen import HRFernsehenIE
-from .hrti import (
- HRTiIE,
- HRTiPlaylistIE,
-)
-from .hse import (
- HSEShowIE,
- HSEProductIE,
-)
-from .huajiao import HuajiaoIE
-from .huya import HuyaLiveIE
-from .huffpost import HuffPostIE
-from .hungama import (
- HungamaIE,
- HungamaSongIE,
- HungamaAlbumPlaylistIE,
-)
-from .hypem import HypemIE
-from .icareus import IcareusIE
-from .ichinanalive import (
- IchinanaLiveIE,
- IchinanaLiveClipIE,
-)
-from .ign import (
- IGNIE,
- IGNVideoIE,
- IGNArticleIE,
-)
-from .iheart import (
- IHeartRadioIE,
- IHeartRadioPodcastIE,
-)
-from .imdb import (
- ImdbIE,
- ImdbListIE
-)
-from .imgur import (
- ImgurIE,
- ImgurAlbumIE,
- ImgurGalleryIE,
-)
-from .ina import InaIE
-from .inc import IncIE
-from .indavideo import IndavideoEmbedIE
-from .infoq import InfoQIE
-from .instagram import (
- InstagramIE,
- InstagramIOSIE,
- InstagramUserIE,
- InstagramTagIE,
- InstagramStoryIE,
-)
-from .internazionale import InternazionaleIE
-from .internetvideoarchive import InternetVideoArchiveIE
-from .iprima import (
- IPrimaIE,
- IPrimaCNNIE
-)
-from .iqiyi import (
- IqiyiIE,
- IqIE,
- IqAlbumIE
-)
+_LAZY_LOADER = False
+if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ with contextlib.suppress(ImportError):
+ from .lazy_extractors import * # noqa: F403
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
-from .itprotv import (
- ITProTVIE,
- ITProTVCourseIE
-)
+if not _LAZY_LOADER:
+ from ._extractors import * # noqa: F403
+ _ALL_CLASSES = [ # noqa: F811
+ klass
+ for name, klass in globals().items()
+ if name.endswith('IE') and name != 'GenericIE'
+ ]
+ _ALL_CLASSES.append(GenericIE) # noqa: F405
-from .itv import (
- ITVIE,
- ITVBTCCIE,
-)
-from .ivi import (
- IviIE,
- IviCompilationIE
-)
-from .ivideon import IvideonIE
-from .iwara import (
- IwaraIE,
- IwaraPlaylistIE,
- IwaraUserIE,
-)
-from .izlesene import IzleseneIE
-from .jable import (
- JableIE,
- JablePlaylistIE,
-)
-from .jamendo import (
- JamendoIE,
- JamendoAlbumIE,
-)
-from .jeuxvideo import JeuxVideoIE
-from .jove import JoveIE
-from .joj import JojIE
-from .jwplatform import JWPlatformIE
-from .kakao import KakaoIE
-from .kaltura import KalturaIE
-from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
-from .kelbyone import KelbyOneIE
-from .ketnet import KetnetIE
-from .khanacademy import (
- KhanAcademyIE,
- KhanAcademyUnitIE,
-)
-from .kickstarter import KickStarterIE
-from .kinja import KinjaEmbedIE
-from .kinopoisk import KinoPoiskIE
-from .konserthusetplay import KonserthusetPlayIE
-from .koo import KooIE
-from .krasview import KrasViewIE
-from .ku6 import Ku6IE
-from .kusi import KUSIIE
-from .kuwo import (
- KuwoIE,
- KuwoAlbumIE,
- KuwoChartIE,
- KuwoSingerIE,
- KuwoCategoryIE,
- KuwoMvIE,
-)
-from .la7 import (
- LA7IE,
- LA7PodcastEpisodeIE,
- LA7PodcastIE,
-)
-from .laola1tv import (
- Laola1TvEmbedIE,
- Laola1TvIE,
- EHFTVIE,
- ITTFIE,
-)
-from .lastfm import (
- LastFMIE,
- LastFMPlaylistIE,
- LastFMUserIE,
-)
-from .lbry import (
- LBRYIE,
- LBRYChannelIE,
-)
-from .lci import LCIIE
-from .lcp import (
- LcpPlayIE,
- LcpIE,
-)
-from .lecture2go import Lecture2GoIE
-from .lecturio import (
- LecturioIE,
- LecturioCourseIE,
- LecturioDeCourseIE,
-)
-from .leeco import (
- LeIE,
- LePlaylistIE,
- LetvCloudIE,
-)
-from .lego import LEGOIE
-from .lemonde import LemondeIE
-from .lenta import LentaIE
-from .libraryofcongress import LibraryOfCongressIE
-from .libsyn import LibsynIE
-from .lifenews import (
- LifeNewsIE,
- LifeEmbedIE,
-)
-from .likee import (
- LikeeIE,
- LikeeUserIE
-)
-from .limelight import (
- LimelightMediaIE,
- LimelightChannelIE,
- LimelightChannelListIE,
-)
-from .line import (
- LineLiveIE,
- LineLiveChannelIE,
-)
-from .linkedin import (
- LinkedInIE,
- LinkedInLearningIE,
- LinkedInLearningCourseIE,
-)
-from .linuxacademy import LinuxAcademyIE
-from .litv import LiTVIE
-from .livejournal import LiveJournalIE
-from .livestream import (
- LivestreamIE,
- LivestreamOriginalIE,
- LivestreamShortenerIE,
-)
-from .lnkgo import (
- LnkGoIE,
- LnkIE,
-)
-from .localnews8 import LocalNews8IE
-from .lovehomeporn import LoveHomePornIE
-from .lrt import (
- LRTVODIE,
- LRTStreamIE
-)
-from .lynda import (
- LyndaIE,
- LyndaCourseIE
-)
-from .m6 import M6IE
-from .magentamusik360 import MagentaMusik360IE
-from .mailru import (
- MailRuIE,
- MailRuMusicIE,
- MailRuMusicSearchIE,
-)
-from .mainstreaming import MainStreamingIE
-from .malltv import MallTVIE
-from .mangomolo import (
- MangomoloVideoIE,
- MangomoloLiveIE,
-)
-from .manoto import (
- ManotoTVIE,
- ManotoTVShowIE,
- ManotoTVLiveIE,
-)
-from .manyvids import ManyVidsIE
-from .maoritv import MaoriTVIE
-from .markiza import (
- MarkizaIE,
- MarkizaPageIE,
-)
-from .massengeschmacktv import MassengeschmackTVIE
-from .masters import MastersIE
-from .matchtv import MatchTVIE
-from .mdr import MDRIE
-from .medaltv import MedalTVIE
-from .mediaite import MediaiteIE
-from .mediaklikk import MediaKlikkIE
-from .mediaset import (
- MediasetIE,
- MediasetShowIE,
-)
-from .mediasite import (
- MediasiteIE,
- MediasiteCatalogIE,
- MediasiteNamedCatalogIE,
-)
-from .medici import MediciIE
-from .megaphone import MegaphoneIE
-from .meipai import MeipaiIE
-from .melonvod import MelonVODIE
-from .meta import METAIE
-from .metacafe import MetacafeIE
-from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
-from .mgtv import MGTVIE
-from .miaopai import MiaoPaiIE
-from .microsoftstream import MicrosoftStreamIE
-from .microsoftvirtualacademy import (
- MicrosoftVirtualAcademyIE,
- MicrosoftVirtualAcademyCourseIE,
-)
-from .mildom import (
- MildomIE,
- MildomVodIE,
- MildomClipIE,
- MildomUserVodIE,
-)
-from .minds import (
- MindsIE,
- MindsChannelIE,
- MindsGroupIE,
-)
-from .ministrygrid import MinistryGridIE
-from .minoto import MinotoIE
-from .miomio import MioMioIE
-from .mirrativ import (
- MirrativIE,
- MirrativUserIE,
-)
-from .mit import TechTVMITIE, OCWMITIE
-from .mitele import MiTeleIE
-from .mixch import (
- MixchIE,
- MixchArchiveIE,
-)
-from .mixcloud import (
- MixcloudIE,
- MixcloudUserIE,
- MixcloudPlaylistIE,
-)
-from .mlb import (
- MLBIE,
- MLBVideoIE,
-)
-from .mlssoccer import MLSSoccerIE
-from .mnet import MnetIE
-from .moevideo import MoeVideoIE
-from .mofosex import (
- MofosexIE,
- MofosexEmbedIE,
-)
-from .mojvideo import MojvideoIE
-from .morningstar import MorningstarIE
-from .motherless import (
- MotherlessIE,
- MotherlessGroupIE
-)
-from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
-from .moviepilot import MoviepilotIE
-from .moviezine import MoviezineIE
-from .movingimage import MovingImageIE
-from .msn import MSNIE
-from .mtv import (
- MTVIE,
- MTVVideoIE,
- MTVServicesEmbeddedIE,
- MTVDEIE,
- MTVJapanIE,
- MTVItaliaIE,
- MTVItaliaProgrammaIE,
-)
-from .muenchentv import MuenchenTVIE
-from .murrtube import MurrtubeIE, MurrtubeUserIE
-from .musescore import MuseScoreIE
-from .musicdex import (
- MusicdexSongIE,
- MusicdexAlbumIE,
- MusicdexArtistIE,
- MusicdexPlaylistIE,
-)
-from .mwave import MwaveIE, MwaveMeetGreetIE
-from .mxplayer import (
- MxplayerIE,
- MxplayerShowIE,
-)
-from .mychannels import MyChannelsIE
-from .myspace import MySpaceIE, MySpaceAlbumIE
-from .myspass import MySpassIE
-from .myvi import (
- MyviIE,
- MyviEmbedIE,
-)
-from .myvideoge import MyVideoGeIE
-from .myvidster import MyVidsterIE
-from .n1 import (
- N1InfoAssetIE,
- N1InfoIIE,
-)
-from .nate import (
- NateIE,
- NateProgramIE,
-)
-from .nationalgeographic import (
- NationalGeographicVideoIE,
- NationalGeographicTVIE,
-)
-from .naver import (
- NaverIE,
- NaverLiveIE,
-)
-from .nba import (
- NBAWatchEmbedIE,
- NBAWatchIE,
- NBAWatchCollectionIE,
- NBAEmbedIE,
- NBAIE,
- NBAChannelIE,
-)
-from .nbc import (
- NBCIE,
- NBCNewsIE,
- NBCOlympicsIE,
- NBCOlympicsStreamIE,
- NBCSportsIE,
- NBCSportsStreamIE,
- NBCSportsVPlayerIE,
-)
-from .ndr import (
- NDRIE,
- NJoyIE,
- NDREmbedBaseIE,
- NDREmbedIE,
- NJoyEmbedIE,
-)
-from .ndtv import NDTVIE
-from .nebula import (
- NebulaIE,
- NebulaSubscriptionsIE,
- NebulaChannelIE,
-)
-from .nerdcubed import NerdCubedFeedIE
-from .netzkino import NetzkinoIE
-from .neteasemusic import (
- NetEaseMusicIE,
- NetEaseMusicAlbumIE,
- NetEaseMusicSingerIE,
- NetEaseMusicListIE,
- NetEaseMusicMvIE,
- NetEaseMusicProgramIE,
- NetEaseMusicDjRadioIE,
-)
-from .newgrounds import (
- NewgroundsIE,
- NewgroundsPlaylistIE,
- NewgroundsUserIE,
-)
-from .newstube import NewstubeIE
-from .newsy import NewsyIE
-from .nextmedia import (
- NextMediaIE,
- NextMediaActionNewsIE,
- AppleDailyIE,
- NextTVIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nfb import NFBIE
-from .nfhsnetwork import NFHSNetworkIE
-from .nfl import (
- NFLIE,
- NFLArticleIE,
-)
-from .nhk import (
- NhkVodIE,
- NhkVodProgramIE,
- NhkForSchoolBangumiIE,
- NhkForSchoolSubjectIE,
- NhkForSchoolProgramListIE,
-)
-from .nhl import NHLIE
-from .nick import (
- NickIE,
- NickBrIE,
- NickDeIE,
- NickNightIE,
- NickRuIE,
-)
-from .niconico import (
- NiconicoIE,
- NiconicoPlaylistIE,
- NiconicoUserIE,
- NiconicoSeriesIE,
- NiconicoHistoryIE,
- NicovideoSearchDateIE,
- NicovideoSearchIE,
- NicovideoSearchURLIE,
- NicovideoTagURLIE,
-)
-from .ninecninemedia import (
- NineCNineMediaIE,
- CPTwentyFourIE,
-)
-from .ninegag import NineGagIE
-from .ninenow import NineNowIE
-from .nintendo import NintendoIE
-from .nitter import NitterIE
-from .njpwworld import NJPWWorldIE
-from .nobelprize import NobelPrizeIE
-from .nonktube import NonkTubeIE
-from .noodlemagazine import NoodleMagazineIE
-from .noovo import NoovoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
-from .nova import (
- NovaEmbedIE,
- NovaIE,
-)
-from .novaplay import NovaPlayIE
-from .nowness import (
- NownessIE,
- NownessPlaylistIE,
- NownessSeriesIE,
-)
-from .noz import NozIE
-from .npo import (
- AndereTijdenIE,
- NPOIE,
- NPOLiveIE,
- NPORadioIE,
- NPORadioFragmentIE,
- SchoolTVIE,
- HetKlokhuisIE,
- VPROIE,
- WNLIE,
-)
-from .npr import NprIE
-from .nrk import (
- NRKIE,
- NRKPlaylistIE,
- NRKSkoleIE,
- NRKTVIE,
- NRKTVDirekteIE,
- NRKRadioPodkastIE,
- NRKTVEpisodeIE,
- NRKTVEpisodesIE,
- NRKTVSeasonIE,
- NRKTVSeriesIE,
-)
-from .nrl import NRLTVIE
-from .ntvcojp import NTVCoJpCUIE
-from .ntvde import NTVDeIE
-from .ntvru import NTVRuIE
-from .nytimes import (
- NYTimesIE,
- NYTimesArticleIE,
- NYTimesCookingIE,
-)
-from .nuvid import NuvidIE
-from .nzherald import NZHeraldIE
-from .nzz import NZZIE
-from .odatv import OdaTVIE
-from .odnoklassniki import OdnoklassnikiIE
-from .oktoberfesttv import OktoberfestTVIE
-from .olympics import OlympicsReplayIE
-from .on24 import On24IE
-from .ondemandkorea import OnDemandKoreaIE
-from .onefootball import OneFootballIE
-from .onet import (
- OnetIE,
- OnetChannelIE,
- OnetMVPIE,
- OnetPlIE,
-)
-from .onionstudios import OnionStudiosIE
-from .ooyala import (
- OoyalaIE,
- OoyalaExternalIE,
-)
-from .opencast import (
- OpencastIE,
- OpencastPlaylistIE,
-)
-from .openrec import (
- OpenRecIE,
- OpenRecCaptureIE,
- OpenRecMovieIE,
-)
-from .ora import OraTVIE
-from .orf import (
- ORFTVthekIE,
- ORFFM4IE,
- ORFFM4StoryIE,
- ORFOE1IE,
- ORFOE3IE,
- ORFNOEIE,
- ORFWIEIE,
- ORFBGLIE,
- ORFOOEIE,
- ORFSTMIE,
- ORFKTNIE,
- ORFSBGIE,
- ORFTIRIE,
- ORFVBGIE,
- ORFIPTVIE,
-)
-from .outsidetv import OutsideTVIE
-from .packtpub import (
- PacktPubIE,
- PacktPubCourseIE,
-)
-from .palcomp3 import (
- PalcoMP3IE,
- PalcoMP3ArtistIE,
- PalcoMP3VideoIE,
-)
-from .pandoratv import PandoraTVIE
-from .panopto import (
- PanoptoIE,
- PanoptoListIE,
- PanoptoPlaylistIE
-)
-from .paramountplus import (
- ParamountPlusIE,
- ParamountPlusSeriesIE,
-)
-from .parliamentliveuk import ParliamentLiveUKIE
-from .parlview import ParlviewIE
-from .patreon import (
- PatreonIE,
- PatreonUserIE
-)
-from .pbs import PBSIE
-from .pearvideo import PearVideoIE
-from .peekvids import PeekVidsIE, PlayVidsIE
-from .peertube import (
- PeerTubeIE,
- PeerTubePlaylistIE,
-)
-from .peertv import PeerTVIE
-from .peloton import (
- PelotonIE,
- PelotonLiveIE
-)
-from .people import PeopleIE
-from .performgroup import PerformGroupIE
-from .periscope import (
- PeriscopeIE,
- PeriscopeUserIE,
-)
-from .philharmoniedeparis import PhilharmonieDeParisIE
-from .phoenix import PhoenixIE
-from .photobucket import PhotobucketIE
-from .piapro import PiaproIE
-from .picarto import (
- PicartoIE,
- PicartoVodIE,
-)
-from .piksel import PikselIE
-from .pinkbike import PinkbikeIE
-from .pinterest import (
- PinterestIE,
- PinterestCollectionIE,
-)
-from .pixivsketch import (
- PixivSketchIE,
- PixivSketchUserIE,
-)
-from .pladform import PladformIE
-from .planetmarathi import PlanetMarathiIE
-from .platzi import (
- PlatziIE,
- PlatziCourseIE,
-)
-from .playfm import PlayFMIE
-from .playplustv import PlayPlusTVIE
-from .plays import PlaysTVIE
-from .playstuff import PlayStuffIE
-from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
-from .playwire import PlaywireIE
-from .plutotv import PlutoTVIE
-from .pluralsight import (
- PluralsightIE,
- PluralsightCourseIE,
-)
-from .podchaser import PodchaserIE
-from .podomatic import PodomaticIE
-from .pokemon import (
- PokemonIE,
- PokemonWatchIE,
- PokemonSoundLibraryIE,
-)
-from .pokergo import (
- PokerGoIE,
- PokerGoCollectionIE,
-)
-from .polsatgo import PolsatGoIE
-from .polskieradio import (
- PolskieRadioIE,
- PolskieRadioCategoryIE,
- PolskieRadioPlayerIE,
- PolskieRadioPodcastIE,
- PolskieRadioPodcastListIE,
- PolskieRadioRadioKierowcowIE,
-)
-from .popcorntimes import PopcorntimesIE
-from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
-from .porncom import PornComIE
-from .pornflip import PornFlipIE
-from .pornhd import PornHdIE
-from .pornhub import (
- PornHubIE,
- PornHubUserIE,
- PornHubPlaylistIE,
- PornHubPagedVideoListIE,
- PornHubUserVideosUploadIE,
-)
-from .pornotube import PornotubeIE
-from .pornovoisines import PornoVoisinesIE
-from .pornoxo import PornoXOIE
-from .pornez import PornezIE
-from .puhutv import (
- PuhuTVIE,
- PuhuTVSerieIE,
-)
-from .presstv import PressTVIE
-from .projectveritas import ProjectVeritasIE
-from .prosiebensat1 import ProSiebenSat1IE
-from .prx import (
- PRXStoryIE,
- PRXSeriesIE,
- PRXAccountIE,
- PRXStoriesSearchIE,
- PRXSeriesSearchIE
-)
-from .puls4 import Puls4IE
-from .pyvideo import PyvideoIE
-from .qqmusic import (
- QQMusicIE,
- QQMusicSingerIE,
- QQMusicAlbumIE,
- QQMusicToplistIE,
- QQMusicPlaylistIE,
-)
-from .r7 import (
- R7IE,
- R7ArticleIE,
-)
-from .radiko import RadikoIE, RadikoRadioIE
-from .radiocanada import (
- RadioCanadaIE,
- RadioCanadaAudioVideoIE,
-)
-from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
-from .radiozet import RadioZetPodcastIE
-from .radiokapital import (
- RadioKapitalIE,
- RadioKapitalShowIE,
-)
-from .radlive import (
- RadLiveIE,
- RadLiveChannelIE,
- RadLiveSeasonIE,
-)
-from .rai import (
- RaiPlayIE,
- RaiPlayLiveIE,
- RaiPlayPlaylistIE,
- RaiPlaySoundIE,
- RaiPlaySoundLiveIE,
- RaiPlaySoundPlaylistIE,
- RaiIE,
-)
-from .raywenderlich import (
- RayWenderlichIE,
- RayWenderlichCourseIE,
-)
-from .rbmaradio import RBMARadioIE
-from .rcs import (
- RCSIE,
- RCSEmbedsIE,
- RCSVariousIE,
-)
-from .rcti import (
- RCTIPlusIE,
- RCTIPlusSeriesIE,
- RCTIPlusTVIE,
-)
-from .rds import RDSIE
-from .redbulltv import (
- RedBullTVIE,
- RedBullEmbedIE,
- RedBullTVRrnContentIE,
- RedBullIE,
-)
-from .reddit import RedditIE
-from .redgifs import (
- RedGifsIE,
- RedGifsSearchIE,
- RedGifsUserIE,
-)
-from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
-from .rentv import (
- RENTVIE,
- RENTVArticleIE,
-)
-from .restudy import RestudyIE
-from .reuters import ReutersIE
-from .reverbnation import ReverbNationIE
-from .rice import RICEIE
-from .rmcdecouverte import RMCDecouverteIE
-from .rockstargames import RockstarGamesIE
-from .rokfin import (
- RokfinIE,
- RokfinStackIE,
- RokfinChannelIE,
- RokfinSearchIE,
-)
-from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
-from .rottentomatoes import RottenTomatoesIE
-from .rozhlas import RozhlasIE
-from .rtbf import RTBFIE
-from .rte import RteIE, RteRadioIE
-from .rtlnl import RtlNlIE
-from .rtl2 import (
- RTL2IE,
- RTL2YouIE,
- RTL2YouSeriesIE,
-)
-from .rtnews import (
- RTNewsIE,
- RTDocumentryIE,
- RTDocumentryPlaylistIE,
- RuptlyIE,
-)
-from .rtp import RTPIE
-from .rtrfm import RTRFMIE
-from .rts import RTSIE
-from .rtve import (
- RTVEALaCartaIE,
- RTVEAudioIE,
- RTVELiveIE,
- RTVEInfantilIE,
- RTVETelevisionIE,
-)
-from .rtvnh import RTVNHIE
-from .rtvs import RTVSIE
-from .ruhd import RUHDIE
-from .rule34video import Rule34VideoIE
-from .rumble import (
- RumbleEmbedIE,
- RumbleChannelIE,
-)
-from .rutube import (
- RutubeIE,
- RutubeChannelIE,
- RutubeEmbedIE,
- RutubeMovieIE,
- RutubePersonIE,
- RutubePlaylistIE,
- RutubeTagsIE,
-)
-from .glomex import (
- GlomexIE,
- GlomexEmbedIE,
-)
-from .megatvcom import (
- MegaTVComIE,
- MegaTVComEmbedIE,
-)
-from .ant1newsgr import (
- Ant1NewsGrWatchIE,
- Ant1NewsGrArticleIE,
- Ant1NewsGrEmbedIE,
-)
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .ruv import (
- RuvIE,
- RuvSpilaIE
-)
-from .safari import (
- SafariIE,
- SafariApiIE,
- SafariCourseIE,
-)
-from .saitosan import SaitosanIE
-from .samplefocus import SampleFocusIE
-from .sapo import SapoIE
-from .savefrom import SaveFromIE
-from .sbs import SBSIE
-from .screencast import ScreencastIE
-from .screencastomatic import ScreencastOMaticIE
-from .scrippsnetworks import (
- ScrippsNetworksWatchIE,
- ScrippsNetworksIE,
-)
-from .scte import (
- SCTEIE,
- SCTECourseIE,
-)
-from .seeker import SeekerIE
-from .senategov import SenateISVPIE, SenateGovIE
-from .sendtonews import SendtoNewsIE
-from .servus import ServusIE
-from .sevenplus import SevenPlusIE
-from .sexu import SexuIE
-from .seznamzpravy import (
- SeznamZpravyIE,
- SeznamZpravyArticleIE,
-)
-from .shahid import (
- ShahidIE,
- ShahidShowIE,
-)
-from .shared import (
- SharedIE,
- VivoIE,
-)
-from .shemaroome import ShemarooMeIE
-from .showroomlive import ShowRoomLiveIE
-from .simplecast import (
- SimplecastIE,
- SimplecastEpisodeIE,
- SimplecastPodcastIE,
-)
-from .sina import SinaIE
-from .sixplay import SixPlayIE
-from .skeb import SkebIE
-from .skyit import (
- SkyItPlayerIE,
- SkyItVideoIE,
- SkyItVideoLiveIE,
- SkyItIE,
- SkyItAcademyIE,
- SkyItArteIE,
- CieloTVItIE,
- TV8ItIE,
-)
-from .skylinewebcams import SkylineWebcamsIE
-from .skynewsarabia import (
- SkyNewsArabiaIE,
- SkyNewsArabiaArticleIE,
-)
-from .skynewsau import SkyNewsAUIE
-from .sky import (
- SkyNewsIE,
- SkyNewsStoryIE,
- SkySportsIE,
- SkySportsNewsIE,
-)
-from .slideshare import SlideshareIE
-from .slideslive import SlidesLiveIE
-from .slutload import SlutloadIE
-from .snotr import SnotrIE
-from .sohu import SohuIE
-from .sonyliv import (
- SonyLIVIE,
- SonyLIVSeriesIE,
-)
-from .soundcloud import (
- SoundcloudEmbedIE,
- SoundcloudIE,
- SoundcloudSetIE,
- SoundcloudRelatedIE,
- SoundcloudUserIE,
- SoundcloudTrackStationIE,
- SoundcloudPlaylistIE,
- SoundcloudSearchIE,
-)
-from .soundgasm import (
- SoundgasmIE,
- SoundgasmProfileIE
-)
-from .southpark import (
- SouthParkIE,
- SouthParkDeIE,
- SouthParkDkIE,
- SouthParkEsIE,
- SouthParkNlIE
-)
-from .sovietscloset import (
- SovietsClosetIE,
- SovietsClosetPlaylistIE
-)
-from .spankbang import (
- SpankBangIE,
- SpankBangPlaylistIE,
-)
-from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE
-from .spike import (
- BellatorIE,
- ParamountNetworkIE,
-)
-from .stitcher import (
- StitcherIE,
- StitcherShowIE,
-)
-from .sport5 import Sport5IE
-from .sportbox import SportBoxIE
-from .sportdeutschland import SportDeutschlandIE
-from .spotify import (
- SpotifyIE,
- SpotifyShowIE,
-)
-from .spreaker import (
- SpreakerIE,
- SpreakerPageIE,
- SpreakerShowIE,
- SpreakerShowPageIE,
-)
-from .springboardplatform import SpringboardPlatformIE
-from .sprout import SproutIE
-from .srgssr import (
- SRGSSRIE,
- SRGSSRPlayIE,
-)
-from .srmediathek import SRMediathekIE
-from .stanfordoc import StanfordOpenClassroomIE
-from .startv import StarTVIE
-from .steam import SteamIE
-from .storyfire import (
- StoryFireIE,
- StoryFireUserIE,
- StoryFireSeriesIE,
-)
-from .streamable import StreamableIE
-from .streamanity import StreamanityIE
-from .streamcloud import StreamcloudIE
-from .streamcz import StreamCZIE
-from .streamff import StreamFFIE
-from .streetvoice import StreetVoiceIE
-from .stretchinternet import StretchInternetIE
-from .stripchat import StripchatIE
-from .stv import STVPlayerIE
-from .sunporno import SunPornoIE
-from .sverigesradio import (
- SverigesRadioEpisodeIE,
- SverigesRadioPublicationIE,
-)
-from .svt import (
- SVTIE,
- SVTPageIE,
- SVTPlayIE,
- SVTSeriesIE,
-)
-from .swrmediathek import SWRMediathekIE
-from .syfy import SyfyIE
-from .sztvhu import SztvHuIE
-from .tagesschau import TagesschauIE
-from .tass import TassIE
-from .tbs import TBSIE
-from .tdslifeway import TDSLifewayIE
-from .teachable import (
- TeachableIE,
- TeachableCourseIE,
-)
-from .teachertube import (
- TeacherTubeIE,
- TeacherTubeUserIE,
-)
-from .teachingchannel import TeachingChannelIE
-from .teamcoco import TeamcocoIE
-from .teamtreehouse import TeamTreeHouseIE
-from .techtalks import TechTalksIE
-from .ted import (
- TedEmbedIE,
- TedPlaylistIE,
- TedSeriesIE,
- TedTalkIE,
-)
-from .tele5 import Tele5IE
-from .tele13 import Tele13IE
-from .telebruxelles import TeleBruxellesIE
-from .telecinco import TelecincoIE
-from .telegraaf import TelegraafIE
-from .telegram import TelegramEmbedIE
-from .telemb import TeleMBIE
-from .telemundo import TelemundoIE
-from .telequebec import (
- TeleQuebecIE,
- TeleQuebecSquatIE,
- TeleQuebecEmissionIE,
- TeleQuebecLiveIE,
- TeleQuebecVideoIE,
-)
-from .teletask import TeleTaskIE
-from .telewebion import TelewebionIE
-from .tennistv import TennisTVIE
-from .tenplay import TenPlayIE
-from .testurl import TestURLIE
-from .tf1 import TF1IE
-from .tfo import TFOIE
-from .theintercept import TheInterceptIE
-from .theplatform import (
- ThePlatformIE,
- ThePlatformFeedIE,
-)
-from .thestar import TheStarIE
-from .thesun import TheSunIE
-from .theta import (
- ThetaVideoIE,
- ThetaStreamIE,
-)
-from .theweatherchannel import TheWeatherChannelIE
-from .thisamericanlife import ThisAmericanLifeIE
-from .thisav import ThisAVIE
-from .thisoldhouse import ThisOldHouseIE
-from .threespeak import (
- ThreeSpeakIE,
- ThreeSpeakUserIE,
-)
-from .threeqsdn import ThreeQSDNIE
-from .tiktok import (
- TikTokIE,
- TikTokUserIE,
- TikTokSoundIE,
- TikTokEffectIE,
- TikTokTagIE,
- TikTokVMIE,
- DouyinIE,
-)
-from .tinypic import TinyPicIE
-from .tmz import TMZIE
-from .tnaflix import (
- TNAFlixNetworkEmbedIE,
- TNAFlixIE,
- EMPFlixIE,
- MovieFapIE,
-)
-from .toggle import (
- ToggleIE,
- MeWatchIE,
-)
-from .toggo import (
- ToggoIE,
-)
-from .tokentube import (
- TokentubeIE,
- TokentubeChannelIE
-)
-from .tonline import TOnlineIE
-from .toongoggles import ToonGogglesIE
-from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
-from .traileraddict import TrailerAddictIE
-from .trilulilu import TriluliluIE
-from .trovo import (
- TrovoIE,
- TrovoVodIE,
- TrovoChannelVodIE,
- TrovoChannelClipIE,
-)
-from .trueid import TrueIDIE
-from .trunews import TruNewsIE
-from .trutv import TruTVIE
-from .tube8 import Tube8IE
-from .tubitv import (
- TubiTvIE,
- TubiTvShowIE,
-)
-from .tumblr import TumblrIE
-from .tunein import (
- TuneInClipIE,
- TuneInStationIE,
- TuneInProgramIE,
- TuneInTopicIE,
- TuneInShortenerIE,
-)
-from .tunepk import TunePkIE
-from .turbo import TurboIE
-from .tv2 import (
- TV2IE,
- TV2ArticleIE,
- KatsomoIE,
- MTVUutisetArticleIE,
-)
-from .tv2dk import (
- TV2DKIE,
- TV2DKBornholmPlayIE,
-)
-from .tv2hu import (
- TV2HuIE,
- TV2HuSeriesIE,
-)
-from .tv4 import TV4IE
-from .tv5mondeplus import TV5MondePlusIE
-from .tv5unis import (
- TV5UnisVideoIE,
- TV5UnisIE,
-)
-from .tva import (
- TVAIE,
- QubIE,
-)
-from .tvanouvelles import (
- TVANouvellesIE,
- TVANouvellesArticleIE,
-)
-from .tvc import (
- TVCIE,
- TVCArticleIE,
-)
-from .tver import TVerIE
-from .tvigle import TvigleIE
-from .tvland import TVLandIE
-from .tvn24 import TVN24IE
-from .tvnet import TVNetIE
-from .tvnoe import TVNoeIE
-from .tvnow import (
- TVNowIE,
- TVNowFilmIE,
- TVNowNewIE,
- TVNowSeasonIE,
- TVNowAnnualIE,
- TVNowShowIE,
-)
-from .tvopengr import (
- TVOpenGrWatchIE,
- TVOpenGrEmbedIE,
-)
-from .tvp import (
- TVPEmbedIE,
- TVPIE,
- TVPStreamIE,
- TVPWebsiteIE,
-)
-from .tvplay import (
- TVPlayIE,
- ViafreeIE,
- TVPlayHomeIE,
-)
-from .tvplayer import TVPlayerIE
-from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
-from .twentymin import TwentyMinutenIE
-from .twentythreevideo import TwentyThreeVideoIE
-from .twitcasting import (
- TwitCastingIE,
- TwitCastingLiveIE,
- TwitCastingUserIE,
-)
-from .twitch import (
- TwitchVodIE,
- TwitchCollectionIE,
- TwitchVideosIE,
- TwitchVideosClipsIE,
- TwitchVideosCollectionsIE,
- TwitchStreamIE,
- TwitchClipsIE,
-)
-from .twitter import (
- TwitterCardIE,
- TwitterIE,
- TwitterAmplifyIE,
- TwitterBroadcastIE,
- TwitterShortenerIE,
-)
-from .udemy import (
- UdemyIE,
- UdemyCourseIE
-)
-from .udn import UDNEmbedIE
-from .ufctv import (
- UFCTVIE,
- UFCArabiaIE,
-)
-from .ukcolumn import UkColumnIE
-from .uktvplay import UKTVPlayIE
-from .digiteka import DigitekaIE
-from .dlive import (
- DLiveVODIE,
- DLiveStreamIE,
-)
-from .drooble import DroobleIE
-from .umg import UMGDeIE
-from .unistra import UnistraIE
-from .unity import UnityIE
-from .uol import UOLIE
-from .uplynk import (
- UplynkIE,
- UplynkPreplayIE,
-)
-from .urort import UrortIE
-from .urplay import URPlayIE
-from .usanetwork import USANetworkIE
-from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
-from .ustudio import (
- UstudioIE,
- UstudioEmbedIE,
-)
-from .utreon import UtreonIE
-from .varzesh3 import Varzesh3IE
-from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
-from .veo import VeoIE
-from .veoh import VeohIE
-from .vesti import VestiIE
-from .vevo import (
- VevoIE,
- VevoPlaylistIE,
-)
-from .vgtv import (
- BTArticleIE,
- BTVestlendingenIE,
- VGTVIE,
-)
-from .vh1 import VH1IE
-from .vice import (
- ViceIE,
- ViceArticleIE,
- ViceShowIE,
-)
-from .vidbit import VidbitIE
-from .viddler import ViddlerIE
-from .videa import VideaIE
-from .videocampus_sachsen import VideocampusSachsenIE
-from .videodetective import VideoDetectiveIE
-from .videofyme import VideofyMeIE
-from .videomore import (
- VideomoreIE,
- VideomoreVideoIE,
- VideomoreSeasonIE,
-)
-from .videopress import VideoPressIE
-from .vidio import (
- VidioIE,
- VidioPremierIE,
- VidioLiveIE
-)
-from .vidlii import VidLiiIE
-from .vier import VierIE, VierVideosIE
-from .viewlift import (
- ViewLiftIE,
- ViewLiftEmbedIE,
-)
-from .viidea import ViideaIE
-from .vimeo import (
- VimeoIE,
- VimeoAlbumIE,
- VimeoChannelIE,
- VimeoGroupsIE,
- VimeoLikesIE,
- VimeoOndemandIE,
- VimeoReviewIE,
- VimeoUserIE,
- VimeoWatchLaterIE,
- VHXEmbedIE,
-)
-from .vimm import (
- VimmIE,
- VimmRecordingIE,
-)
-from .vimple import VimpleIE
-from .vine import (
- VineIE,
- VineUserIE,
-)
-from .viki import (
- VikiIE,
- VikiChannelIE,
-)
-from .viqeo import ViqeoIE
-from .viu import (
- ViuIE,
- ViuPlaylistIE,
- ViuOTTIE,
-)
-from .vk import (
- VKIE,
- VKUserVideosIE,
- VKWallPostIE,
-)
-from .vlive import (
- VLiveIE,
- VLivePostIE,
- VLiveChannelIE,
-)
-from .vodlocker import VodlockerIE
-from .vodpl import VODPlIE
-from .vodplatform import VODPlatformIE
-from .voicerepublic import VoiceRepublicIE
-from .voicy import (
- VoicyIE,
- VoicyChannelIE,
-)
-from .voot import (
- VootIE,
- VootSeriesIE,
-)
-from .voxmedia import (
- VoxMediaVolumeIE,
- VoxMediaIE,
-)
-from .vrt import VRTIE
-from .vrak import VrakIE
-from .vrv import (
- VRVIE,
- VRVSeriesIE,
-)
-from .vshare import VShareIE
-from .vtm import VTMIE
-from .medialaan import MedialaanIE
-from .vuclip import VuClipIE
-from .vupload import VuploadIE
-from .vvvvid import (
- VVVVIDIE,
- VVVVIDShowIE,
-)
-from .vyborymos import VyboryMosIE
-from .vzaar import VzaarIE
-from .wakanim import WakanimIE
-from .walla import WallaIE
-from .washingtonpost import (
- WashingtonPostIE,
- WashingtonPostArticleIE,
-)
-from .wasdtv import (
- WASDTVStreamIE,
- WASDTVRecordIE,
- WASDTVClipIE,
-)
-from .wat import WatIE
-from .watchbox import WatchBoxIE
-from .watchindianporn import WatchIndianPornIE
-from .wdr import (
- WDRIE,
- WDRPageIE,
- WDRElefantIE,
- WDRMobileIE,
-)
-from .webcaster import (
- WebcasterIE,
- WebcasterFeedIE,
-)
-from .webofstories import (
- WebOfStoriesIE,
- WebOfStoriesPlaylistIE,
-)
-from .weibo import (
- WeiboIE,
- WeiboMobileIE
-)
-from .weiqitv import WeiqiTVIE
-from .willow import WillowIE
-from .wimtv import WimTVIE
-from .whowatch import WhoWatchIE
-from .wistia import (
- WistiaIE,
- WistiaPlaylistIE,
-)
-from .worldstarhiphop import WorldStarHipHopIE
-from .wppilot import (
- WPPilotIE,
- WPPilotChannelsIE,
-)
-from .wsj import (
- WSJIE,
- WSJArticleIE,
-)
-from .wwe import WWEIE
-from .xbef import XBefIE
-from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
-from .xhamster import (
- XHamsterIE,
- XHamsterEmbedIE,
- XHamsterUserIE,
-)
-from .xiami import (
- XiamiSongIE,
- XiamiAlbumIE,
- XiamiArtistIE,
- XiamiCollectionIE
-)
-from .ximalaya import (
- XimalayaIE,
- XimalayaAlbumIE
-)
-from .xinpianchang import XinpianchangIE
-from .xminus import XMinusIE
-from .xnxx import XNXXIE
-from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
-from .xvideos import XVideosIE
-from .xxxymovies import XXXYMoviesIE
-from .yahoo import (
- YahooIE,
- YahooSearchIE,
- YahooGyaOPlayerIE,
- YahooGyaOIE,
- YahooJapanNewsIE,
-)
-from .yandexdisk import YandexDiskIE
-from .yandexmusic import (
- YandexMusicTrackIE,
- YandexMusicAlbumIE,
- YandexMusicPlaylistIE,
- YandexMusicArtistTracksIE,
- YandexMusicArtistAlbumsIE,
-)
-from .yandexvideo import (
- YandexVideoIE,
- YandexVideoPreviewIE,
- ZenYandexIE,
- ZenYandexChannelIE,
-)
-from .yapfiles import YapFilesIE
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
-from .ynet import YnetIE
-from .youjizz import YouJizzIE
-from .youku import (
- YoukuIE,
- YoukuShowIE,
-)
-from .younow import (
- YouNowLiveIE,
- YouNowChannelIE,
- YouNowMomentIE,
-)
-from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
-from .youtube import (
- YoutubeIE,
- YoutubeClipIE,
- YoutubeFavouritesIE,
- YoutubeNotificationsIE,
- YoutubeHistoryIE,
- YoutubeTabIE,
- YoutubeLivestreamEmbedIE,
- YoutubePlaylistIE,
- YoutubeRecommendedIE,
- YoutubeSearchDateIE,
- YoutubeSearchIE,
- YoutubeSearchURLIE,
- YoutubeMusicSearchURLIE,
- YoutubeSubscriptionsIE,
- YoutubeStoriesIE,
- YoutubeTruncatedIDIE,
- YoutubeTruncatedURLIE,
- YoutubeYtBeIE,
- YoutubeYtUserIE,
- YoutubeWatchLaterIE,
-)
-from .zapiks import ZapiksIE
-from .zattoo import (
- BBVTVIE,
- EinsUndEinsTVIE,
- EWETVIE,
- GlattvisionTVIE,
- MNetTVIE,
- NetPlusIE,
- OsnatelTVIE,
- QuantumTVIE,
- SaltTVIE,
- SAKTVIE,
- VTXTVIE,
- WalyTVIE,
- ZattooIE,
- ZattooLiveIE,
- ZattooMoviesIE,
- ZattooRecordingsIE,
-)
-from .zdf import ZDFIE, ZDFChannelIE
-from .zee5 import (
- Zee5IE,
- Zee5SeriesIE,
-)
-from .zhihu import ZhihuIE
-from .zingmp3 import (
- ZingMp3IE,
- ZingMp3AlbumIE,
- ZingMp3ChartHomeIE,
- ZingMp3WeekChartIE,
- ZingMp3ChartMusicVideoIE,
- ZingMp3UserIE,
-)
-from .zoom import ZoomIE
-from .zype import ZypeIE
+_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
+_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py
index de45f9298..5b34f3bff 100644
--- a/yt_dlp/extractor/facebook.py
+++ b/yt_dlp/extractor/facebook.py
@@ -1,18 +1,18 @@
import json
import re
+import urllib.parse
from .common import InfoExtractor
from ..compat import (
compat_etree_fromstring,
compat_str,
compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
)
from ..utils import (
+ ExtractorError,
clean_html,
determine_ext,
error_to_compat_str,
- ExtractorError,
float_or_none,
get_element_by_id,
get_first,
@@ -467,7 +467,7 @@ class FacebookIE(InfoExtractor):
dash_manifest = video.get('dash_manifest')
if dash_manifest:
formats.extend(self._parse_mpd_formats(
- compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+ compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest))))
def process_formats(formats):
# Downloads with browser's User-Agent are rate limited. Working around
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
index 225677b00..3501c4cf6 100644
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -1,16 +1,13 @@
import re
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
-)
+from ..compat import compat_parse_qs
from ..dependencies import websockets
from ..utils import (
ExtractorError,
WebSocketsWrapper,
js_to_json,
sanitized_Request,
- std_headers,
traverse_obj,
update_url_query,
urlencode_postdata,
@@ -81,7 +78,7 @@ class FC2IE(InfoExtractor):
webpage = None
if not url.startswith('fc2:'):
webpage = self._download_webpage(url, video_id)
- self._downloader.cookiejar.clear_session_cookies() # must clear
+ self.cookiejar.clear_session_cookies() # must clear
self._login()
title, thumbnail, description = None, None, None
@@ -207,10 +204,10 @@ class FC2LiveIE(InfoExtractor):
'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
'Origin': 'https://live.fc2.com',
'Accept': '*/*',
- 'User-Agent': std_headers['User-Agent'],
+ 'User-Agent': self.get_param('http_headers')['User-Agent'],
})
- self.write_debug('[debug] Sending HLS server request')
+ self.write_debug('Sending HLS server request')
while True:
recv = ws.recv()
@@ -232,13 +229,10 @@ class FC2LiveIE(InfoExtractor):
if not data or not isinstance(data, dict):
continue
if data.get('name') == '_response_' and data.get('id') == 1:
- self.write_debug('[debug] Goodbye.')
+ self.write_debug('Goodbye')
playlist_data = data
break
- elif self._downloader.params.get('verbose', False):
- if len(recv) > 100:
- recv = recv[:100] + '...'
- self.to_screen('[debug] Server said: %s' % recv)
+ self.write_debug('Server said: %s%s' % (recv[:100], '...' if len(recv) > 100 else ''))
if not playlist_data:
raise ExtractorError('Unable to fetch HLS playlist info via WebSocket')
diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py
index 552ecd43a..9f60a6b1f 100644
--- a/yt_dlp/extractor/flickr.py
+++ b/yt_dlp/extractor/flickr.py
@@ -94,7 +94,7 @@ class FlickrIE(InfoExtractor):
owner = video_info.get('owner', {})
uploader_id = owner.get('nsid')
uploader_path = owner.get('path_alias') or uploader_id
- uploader_url = format_field(uploader_path, template='https://www.flickr.com/photos/%s/')
+ uploader_url = format_field(uploader_path, None, 'https://www.flickr.com/photos/%s/')
return {
'id': video_id,
diff --git a/yt_dlp/extractor/fourzerostudio.py b/yt_dlp/extractor/fourzerostudio.py
new file mode 100644
index 000000000..e1804e39e
--- /dev/null
+++ b/yt_dlp/extractor/fourzerostudio.py
@@ -0,0 +1,107 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj, unified_timestamp
+
+
+class FourZeroStudioArchiveIE(InfoExtractor):
+ _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/broadcasts/(?P<id>[^/]+)/archive'
+ IE_NAME = '0000studio:archive'
+ _TESTS = [{
+ 'url': 'https://0000.studio/mumeijiten/broadcasts/1290f433-fce0-4909-a24a-5f7df09665dc/archive',
+ 'info_dict': {
+ 'id': '1290f433-fce0-4909-a24a-5f7df09665dc',
+ 'title': 'noteで『canape』様へのファンレターを執筆します。(数秘術その2)',
+ 'timestamp': 1653802534,
+ 'release_timestamp': 1653796604,
+ 'thumbnails': 'count:1',
+ 'comments': 'count:7',
+ 'uploader': '『中崎雄心』の執務室。',
+ 'uploader_id': 'mumeijiten',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, video_id)
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
+
+ pcb = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorBroadcast'), get_all=False)
+ uploader_internal_id = traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'id'), get_all=False)
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(pcb['archiveUrl'], video_id, ext='mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': pcb.get('title'),
+ 'age_limit': 18 if pcb.get('isAdult') else None,
+ 'timestamp': unified_timestamp(pcb.get('finishTime')),
+ 'release_timestamp': unified_timestamp(pcb.get('createdAt')),
+ 'thumbnails': [{
+ 'url': pcb['thumbnailUrl'],
+ 'ext': 'png',
+ }] if pcb.get('thumbnailUrl') else None,
+ 'formats': formats,
+ 'subtitles': subs,
+ 'comments': [{
+ 'author': c.get('username'),
+ 'author_id': c.get('postedUserId'),
+ 'author_thumbnail': c.get('userThumbnailUrl'),
+ 'id': c.get('id'),
+ 'text': c.get('body'),
+ 'timestamp': unified_timestamp(c.get('createdAt')),
+ 'like_count': c.get('likeCount'),
+ 'is_favorited': c.get('isLikedByOwner'),
+ 'author_is_uploader': c.get('postedUserId') == uploader_internal_id,
+ } for c in traverse_obj(nuxt_data, (
+ 'ssrRefs', ..., lambda _, v: v['__typename'] == 'PublicCreatorBroadcastComment')) or []],
+ 'uploader_id': uploader_id,
+ 'uploader': traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
+ }
+
+
+class FourZeroStudioClipIE(InfoExtractor):
+ _VALID_URL = r'https?://0000\.studio/(?P<uploader_id>[^/]+)/archive-clip/(?P<id>[^/]+)'
+ IE_NAME = '0000studio:clip'
+ _TESTS = [{
+ 'url': 'https://0000.studio/soeji/archive-clip/e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
+ 'info_dict': {
+ 'id': 'e46b0278-24cd-40a8-92e1-b8fc2b21f34f',
+ 'title': 'わたベーさんからイラスト差し入れいただきました。ありがとうございました!',
+ 'timestamp': 1652109105,
+ 'like_count': 1,
+ 'uploader': 'ソエジマケイタ',
+ 'uploader_id': 'soeji',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ webpage = self._download_webpage(url, video_id)
+ nuxt_data = self._search_nuxt_data(webpage, video_id, traverse=None)
+
+ clip_info = traverse_obj(nuxt_data, ('ssrRefs', lambda _, v: v['__typename'] == 'PublicCreatorArchivedClip'), get_all=False)
+
+ info = next((
+ m for m in self._parse_html5_media_entries(url, webpage, video_id)
+ if 'mp4' in traverse_obj(m, ('formats', ..., 'ext'))
+ ), None)
+ if not info:
+ self.report_warning('Failed to find a desired media element. Falling back to using NUXT data.')
+ info = {
+ 'formats': [{
+ 'ext': 'mp4',
+ 'url': url,
+ } for url in clip_info.get('mediaFiles') or [] if url],
+ }
+ return {
+ **info,
+ 'id': video_id,
+ 'title': clip_info.get('clipComment'),
+ 'timestamp': unified_timestamp(clip_info.get('createdAt')),
+ 'like_count': clip_info.get('likeCount'),
+ 'uploader_id': uploader_id,
+ 'uploader': traverse_obj(nuxt_data, (
+ 'ssrRefs', lambda _, v: v['__typename'] == 'PublicUser', 'username'), get_all=False),
+ }
diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py
index 4abc2cfd0..b285464ec 100644
--- a/yt_dlp/extractor/foxgay.py
+++ b/yt_dlp/extractor/foxgay.py
@@ -31,7 +31,7 @@ class FoxgayIE(InfoExtractor):
description = get_element_by_id('inf_tit', webpage)
# The default user-agent with foxgay cookies leads to pages without videos
- self._downloader.cookiejar.clear('.foxgay.com')
+ self.cookiejar.clear('.foxgay.com')
# Find the URL for the iFrame which contains the actual video.
iframe_url = self._html_search_regex(
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1', webpage,
diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py
index cee4d6b49..e8513f2c2 100644
--- a/yt_dlp/extractor/foxnews.py
+++ b/yt_dlp/extractor/foxnews.py
@@ -59,10 +59,13 @@ class FoxNewsIE(AMPIE):
@staticmethod
def _extract_urls(webpage):
return [
- mobj.group('url')
+ f'https://video.foxnews.com/v/video-embed.html?video_id={mobj.group("video_id")}'
for mobj in re.finditer(
- r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
- webpage)]
+ r'''(?x)
+ <(?:script|(?:amp-)?iframe)[^>]+\bsrc=["\']
+ (?:https?:)?//video\.foxnews\.com/v/(?:video-embed\.html|embed\.js)\?
+ (?:[^>"\']+&)?(?:video_)?id=(?P<video_id>\d+)
+ ''', webpage)]
def _real_extract(self, url):
host, video_id = self._match_valid_url(url).groups()
diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py
deleted file mode 100644
index 6bd9912f3..000000000
--- a/yt_dlp/extractor/franceculture.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import re
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- extract_attributes,
- int_or_none,
- traverse_obj,
- unified_strdate,
-)
-
-
-class FranceCultureIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- # playlist
- 'url': 'https://www.franceculture.fr/emissions/serie/hasta-dente',
- 'playlist_count': 12,
- 'info_dict': {
- 'id': 'hasta-dente',
- 'title': 'Hasta Dente',
- 'description': 'md5:57479af50648d14e9bb649e6b1f8f911',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20201024',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '3c1c2e55-41a0-11e5-9fe0-005056a87c89',
- 'ext': 'mp3',
- 'title': 'Jeudi, vous avez dit bizarre ?',
- 'description': 'md5:47cf1e00cc21c86b0210279996a812c6',
- 'duration': 604,
- 'upload_date': '20201024',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1603576680
- },
- },
- ],
- }, {
- 'url': 'https://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
- 'info_dict': {
- 'id': 'rendez-vous-au-pays-des-geeks',
- 'display_id': 'rendez-vous-au-pays-des-geeks',
- 'ext': 'mp3',
- 'title': 'Rendez-vous au pays des geeks',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20140301',
- 'vcodec': 'none',
- 'duration': 3569,
- },
- }, {
- # no thumbnail
- 'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- info = {
- 'id': display_id,
- 'title': self._html_search_regex(
- r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
- webpage, 'title', default=self._og_search_title(webpage)),
- 'description': self._html_search_regex(
- r'(?s)<div[^>]+class="excerpt"[^>]*>(.*?)</div>', webpage, 'description', default=None),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader': self._html_search_regex(
- r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
- 'upload_date': unified_strdate(self._html_search_regex(
- r'(?s)class="teaser-text-date".*?(\d{2}/\d{2}/\d{4})', webpage, 'date', default=None)),
- }
-
- playlist_data = self._search_regex(
- r'''(?sx)
- <section[^>]+data-xiti-place="[^"]*?liste_episodes[^"?]*?"[^>]*>
- (.*?)
- </section>
- ''',
- webpage, 'playlist data', fatal=False, default=None)
-
- if playlist_data:
- entries = []
- for item, item_description in re.findall(
- r'(?s)(<button[^<]*class="[^"]*replay-button[^>]*>).*?<p[^>]*class="[^"]*teaser-text-chapo[^>]*>(.*?)</p>',
- playlist_data):
-
- item_attributes = extract_attributes(item)
- entries.append({
- 'id': item_attributes.get('data-emission-uuid'),
- 'url': item_attributes.get('data-url'),
- 'title': item_attributes.get('data-diffusion-title'),
- 'duration': int_or_none(traverse_obj(item_attributes, 'data-duration-seconds', 'data-duration-seconds')),
- 'description': item_description,
- 'timestamp': int_or_none(item_attributes.get('data-start-time')),
- 'thumbnail': info['thumbnail'],
- 'uploader': info['uploader'],
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- **info
- }
-
- video_data = extract_attributes(self._search_regex(
- r'''(?sx)
- (?:
- </h1>|
- <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
- ).*?
- (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
- ''',
- webpage, 'video data'))
- video_url = traverse_obj(video_data, 'data-url', 'data-asset-source')
- ext = determine_ext(video_url.lower())
-
- return {
- 'display_id': display_id,
- 'url': video_url,
- 'ext': ext,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'duration': int_or_none(video_data.get('data-duration')),
- **info
- }
diff --git a/yt_dlp/extractor/freetv.py b/yt_dlp/extractor/freetv.py
new file mode 100644
index 000000000..f38bae90b
--- /dev/null
+++ b/yt_dlp/extractor/freetv.py
@@ -0,0 +1,141 @@
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj, urlencode_postdata
+
+
+class FreeTvBaseIE(InfoExtractor):
+ def _get_api_response(self, content_id, resource_type, postdata):
+ return self._download_json(
+ 'https://www.freetv.com/wordpress/wp-admin/admin-ajax.php',
+ content_id, data=urlencode_postdata(postdata),
+ note=f'Downloading {content_id} {resource_type} JSON')['data']
+
+
+class FreeTvMoviesIE(FreeTvBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/peliculas/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/peliculas/atrapame-si-puedes/',
+ 'md5': 'dc62d5abf0514726640077cd1591aa92',
+ 'info_dict': {
+ 'id': '428021',
+ 'title': 'Atrápame Si Puedes',
+ 'description': 'md5:ca63bc00898aeb2f64ec87c6d3a5b982',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'url': 'https://www.freetv.com/peliculas/monstruoso/',
+ 'md5': '509c15c68de41cb708d1f92d071f20aa',
+ 'info_dict': {
+ 'id': '377652',
+ 'title': 'Monstruoso',
+ 'description': 'md5:333fc19ee327b457b980e54a911ea4a3',
+ 'ext': 'mp4',
+ }
+ }]
+
+ def _extract_video(self, content_id, action='olyott_video_play'):
+ api_response = self._get_api_response(content_id, 'video', {
+ 'action': action,
+ 'contentID': content_id,
+ })
+
+ video_id, video_url = api_response['displayMeta']['contentID'], api_response['displayMeta']['streamURLVideo']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(api_response, ('displayMeta', 'title')),
+ 'description': traverse_obj(api_response, ('displayMeta', 'desc')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ return self._extract_video(
+ self._search_regex((
+ r'class=["\'][^>]+postid-(?P<video_id>\d+)',
+ r'<link[^>]+freetv.com/\?p=(?P<video_id>\d+)',
+ r'<div[^>]+data-params=["\'][^>]+post_id=(?P<video_id>\d+)',
+ ), webpage, 'video id', group='video_id'))
+
+
+class FreeTvIE(FreeTvBaseIE):
+ IE_NAME = 'freetv:series'
+ _VALID_URL = r'https?://(?:www\.)?freetv\.com/series/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.freetv.com/series/el-detective-l/',
+ 'info_dict': {
+ 'id': 'el-detective-l',
+ 'title': 'El Detective L',
+ 'description': 'md5:f9f1143bc33e9856ecbfcbfb97a759be'
+ },
+ 'playlist_count': 24,
+ }, {
+ 'url': 'https://www.freetv.com/series/esmeraldas/',
+ 'info_dict': {
+ 'id': 'esmeraldas',
+ 'title': 'Esmeraldas',
+ 'description': 'md5:43d7ec45bd931d8268a4f5afaf4c77bf'
+ },
+ 'playlist_count': 62,
+ }, {
+ 'url': 'https://www.freetv.com/series/las-aventuras-de-leonardo/',
+ 'info_dict': {
+ 'id': 'las-aventuras-de-leonardo',
+ 'title': 'Las Aventuras de Leonardo',
+ 'description': 'md5:0c47130846c141120a382aca059288f6'
+ },
+ 'playlist_count': 13,
+ },
+ ]
+
+ def _extract_series_season(self, season_id, series_title):
+ episodes = self._get_api_response(season_id, 'series', {
+ 'contentID': season_id,
+ 'action': 'olyott_get_dynamic_series_content',
+ 'type': 'list',
+ 'perPage': '1000',
+ })['1']
+
+ for episode in episodes:
+ video_id = str(episode['contentID'])
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(episode['streamURL'], video_id, 'mp4')
+ self._sort_formats(formats)
+
+ yield {
+ 'id': video_id,
+ 'title': episode.get('fullTitle'),
+ 'description': episode.get('description'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': episode.get('thumbnail'),
+ 'series': series_title,
+ 'series_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seriesID')),
+ 'season_id': traverse_obj(episode, ('contentMeta', 'displayMeta', 'seasonID')),
+ 'season_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'seasonNum'), expected_type=int_or_none),
+ 'episode_number': traverse_obj(
+ episode, ('contentMeta', 'displayMeta', 'episodeNum'), expected_type=int_or_none),
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<h1[^>]+class=["\']synopis[^>]>(?P<title>[^<]+)', webpage, 'title', group='title', fatal=False)
+ description = self._html_search_regex(
+ r'<div[^>]+class=["\']+synopis content[^>]><p>(?P<description>[^<]+)',
+ webpage, 'description', group='description', fatal=False)
+
+ return self.playlist_result(
+ itertools.chain.from_iterable(
+ self._extract_series_season(season_id, title)
+ for season_id in re.findall(r'<option[^>]+value=["\'](\d+)["\']', webpage)),
+ display_id, title, description)
diff --git a/yt_dlp/extractor/fuyintv.py b/yt_dlp/extractor/fuyintv.py
new file mode 100644
index 000000000..197901d57
--- /dev/null
+++ b/yt_dlp/extractor/fuyintv.py
@@ -0,0 +1,30 @@
+from .common import InfoExtractor
+from ..utils import traverse_obj
+
+
+class FuyinTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fuyin\.tv/html/(?:\d+)/(?P<id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'https://www.fuyin.tv/html/2733/44129.html',
+ 'info_dict': {
+ 'id': '44129',
+ 'ext': 'mp4',
+ 'title': '第1集',
+ 'description': 'md5:21a3d238dc8d49608e1308e85044b9c3',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ json_data = self._download_json(
+ 'https://www.fuyin.tv/api/api/tv.movie/url',
+ video_id, query={'urlid': f'{video_id}'})
+ webpage = self._download_webpage(url, video_id, fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': traverse_obj(json_data, ('data', 'title')),
+ 'url': json_data['data']['url'],
+ 'ext': 'mp4',
+ 'description': self._html_search_meta('description', webpage),
+ }
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index f594d02c2..c2f754453 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1,5 +1,6 @@
import os
import re
+import urllib.parse
import xml.etree.ElementTree
from .ant1newsgr import Ant1NewsGrEmbedIE
@@ -69,11 +70,13 @@ from .spankwire import SpankwireIE
from .sportbox import SportBoxIE
from .spotify import SpotifyBaseIE
from .springboardplatform import SpringboardPlatformIE
+from .substack import SubstackIE
from .svt import SVTIE
from .teachable import TeachableIE
from .ted import TedEmbedIE
from .theplatform import ThePlatformIE
from .threeqsdn import ThreeQSDNIE
+from .tiktok import TikTokIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .tube8 import Tube8IE
from .tunein import TuneInBaseIE
@@ -104,12 +107,7 @@ from .yapfiles import YapFilesIE
from .youporn import YouPornIE
from .youtube import YoutubeIE
from .zype import ZypeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
-)
+from ..compat import compat_etree_fromstring
from ..utils import (
KNOWN_EXTENSIONS,
ExtractorError,
@@ -129,6 +127,7 @@ from ..utils import (
sanitized_Request,
smuggle_url,
str_or_none,
+ try_call,
unescapeHTML,
unified_timestamp,
unsmuggle_url,
@@ -2526,6 +2525,118 @@ class GenericIE(InfoExtractor):
'upload_date': '20220504',
},
},
+ {
+ # Webpage contains double BOM
+ 'url': 'https://www.filmarkivet.se/movies/paris-d-moll/',
+ 'md5': 'df02cadc719dcc63d43288366f037754',
+ 'info_dict': {
+ 'id': 'paris-d-moll',
+ 'ext': 'mp4',
+ 'upload_date': '20220518',
+ 'title': 'Paris d-moll',
+ 'description': 'md5:319e37ea5542293db37e1e13072fe330',
+ 'thumbnail': 'https://www.filmarkivet.se/wp-content/uploads/parisdmoll2.jpg',
+ 'timestamp': 1652833414,
+ 'age_limit': 0,
+ }
+ },
+ {
+ 'url': 'https://www.mollymovieclub.com/p/interstellar?s=r#details',
+ 'md5': '198bde8bed23d0b23c70725c83c9b6d9',
+ 'info_dict': {
+ 'id': '53602801',
+ 'ext': 'mpga',
+ 'title': 'Interstellar',
+ 'description': 'Listen now | Episode One',
+ 'thumbnail': 'md5:c30d9c83f738e16d8551d7219d321538',
+ 'uploader': 'Molly Movie Club',
+ 'uploader_id': '839621',
+ },
+ },
+ {
+ 'url': 'https://www.blockedandreported.org/p/episode-117-lets-talk-about-depp?s=r',
+ 'md5': 'c0cc44ee7415daeed13c26e5b56d6aa0',
+ 'info_dict': {
+ 'id': '57962052',
+ 'ext': 'mpga',
+ 'title': 'md5:855b2756f0ee10f6723fa00b16266f8d',
+ 'description': 'md5:fe512a5e94136ad260c80bde00ea4eef',
+ 'thumbnail': 'md5:2218f27dfe517bb5ac16c47d0aebac59',
+ 'uploader': 'Blocked and Reported',
+ 'uploader_id': '500230',
+ },
+ },
+ {
+ 'url': 'https://www.skimag.com/video/ski-people-1980/',
+ 'info_dict': {
+ 'id': 'ski-people-1980',
+ 'title': 'Ski People (1980)',
+ },
+ 'playlist_count': 1,
+ 'playlist': [{
+ 'md5': '022a7e31c70620ebec18deeab376ee03',
+ 'info_dict': {
+ 'id': 'YTmgRiNU',
+ 'ext': 'mp4',
+ 'title': '1980 Ski People',
+ 'timestamp': 1610407738,
+ 'description': 'md5:cf9c3d101452c91e141f292b19fe4843',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/YTmgRiNU/poster.jpg?width=720',
+ 'duration': 5688.0,
+ 'upload_date': '20210111',
+ }
+ }]
+ },
+ {
+ 'note': 'Rumble embed',
+ 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html',
+ 'md5': '53af34098a7f92c4e51cf0bd1c33f009',
+ 'info_dict': {
+ 'id': 'vb0ofn',
+ 'ext': 'mp4',
+ 'timestamp': 1612662578,
+ 'uploader': 'LovingMontana',
+ 'channel': 'LovingMontana',
+ 'upload_date': '20210207',
+ 'title': 'Winter-loving dog helps girls dig a snow fort ',
+ 'channel_url': 'https://rumble.com/c/c-546523',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg',
+ 'duration': 103,
+ }
+ },
+ {
+ 'note': 'Rumble JS embed',
+ 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it',
+ 'md5': '4701209ac99095592e73dbba21889690',
+ 'info_dict': {
+ 'id': 'v15eqxl',
+ 'ext': 'mp4',
+ 'channel': 'Mr Producer Media',
+ 'duration': 92,
+ 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh',
+ 'channel_url': 'https://rumble.com/c/RichSementa',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.OvCc-small-911-Audio-From-The-Man-Who-.jpg',
+ 'timestamp': 1654892716,
+ 'uploader': 'Mr Producer Media',
+ 'upload_date': '20220610',
+ }
+ },
+ {
+ 'note': 'JSON LD with multiple @type',
+ 'url': 'https://www.nu.nl/280161/video/hoe-een-bladvlo-dit-verwoestende-japanse-onkruid-moet-vernietigen.html',
+ 'md5': 'c7949f34f57273013fb7ccb1156393db',
+ 'info_dict': {
+ 'id': 'ipy2AcGL',
+ 'ext': 'mp4',
+ 'description': 'md5:6a9d644bab0dc2dc06849c2505d8383d',
+ 'thumbnail': r're:https://media\.nu\.nl/m/.+\.jpg',
+ 'title': 'Hoe een bladvlo dit verwoestende Japanse onkruid moet vernietigen',
+ 'timestamp': 1586577474,
+ 'upload_date': '20200411',
+ 'age_limit': 0,
+ 'duration': 111.0,
+ }
+ },
]
def report_following_redirect(self, new_url):
@@ -2536,66 +2647,44 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug(f'Identified a {name}')
def _extract_rss(self, url, video_id, doc):
- playlist_title = doc.find('./channel/title').text
- playlist_desc_el = doc.find('./channel/description')
- playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
-
NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
}
entries = []
for it in doc.findall('./channel/item'):
- next_url = None
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
-
- if not next_url:
- next_url = xpath_text(it, 'link', fatal=False)
-
+ next_url = next(
+ (e.attrib.get('url') for e in it.findall('./enclosure')),
+ xpath_text(it, 'link', fatal=False))
if not next_url:
continue
- if it.find('guid').text is not None:
- next_url = smuggle_url(next_url, {'force_videoid': it.find('guid').text})
+ guid = try_call(lambda: it.find('guid').text)
+ if guid:
+ next_url = smuggle_url(next_url, {'force_videoid': guid})
def itunes(key):
- return xpath_text(
- it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
- default=None)
-
- duration = itunes('duration')
- explicit = (itunes('explicit') or '').lower()
- if explicit in ('true', 'yes'):
- age_limit = 18
- elif explicit in ('false', 'no'):
- age_limit = 0
- else:
- age_limit = None
+ return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
entries.append({
'_type': 'url_transparent',
'url': next_url,
- 'title': it.find('title').text,
+ 'title': try_call(lambda: it.find('title').text),
'description': xpath_text(it, 'description', default=None),
- 'timestamp': unified_timestamp(
- xpath_text(it, 'pubDate', default=None)),
- 'duration': int_or_none(duration) or parse_duration(duration),
+ 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
+ 'duration': parse_duration(itunes('duration')),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
'episode': itunes('title'),
'episode_number': int_or_none(itunes('episode')),
'season_number': int_or_none(itunes('season')),
- 'age_limit': age_limit,
+ 'age_limit': {'true': 18, 'yes': 18, 'false': 0, 'no': 0}.get((itunes('explicit') or '').lower()),
})
return {
'_type': 'playlist',
'id': url,
- 'title': playlist_title,
- 'description': playlist_desc,
+ 'title': try_call(lambda: doc.find('./channel/title').text),
+ 'description': try_call(lambda: doc.find('./channel/description').text),
'entries': entries,
}
@@ -2610,7 +2699,7 @@ class GenericIE(InfoExtractor):
title = self._html_search_meta('DC.title', webpage, fatal=True)
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
camtasia_cfg = self._download_xml(
camtasia_url, video_id,
note='Downloading camtasia configuration',
@@ -2626,7 +2715,7 @@ class GenericIE(InfoExtractor):
entries.append({
'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
'title': f'{title} - {n.tag}',
- 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'url': urllib.parse.urljoin(url, url_n.text),
'duration': float_or_none(n.find('./duration').text),
})
@@ -2678,7 +2767,7 @@ class GenericIE(InfoExtractor):
if url.startswith('//'):
return self.url_result(self.http_scheme() + url)
- parsed_url = compat_urlparse.urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
if not parsed_url.scheme:
default_search = self.get_param('default_search')
if default_search is None:
@@ -2754,7 +2843,7 @@ class GenericIE(InfoExtractor):
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m:
self.report_detected('direct video link')
- format_id = compat_str(m.group('format_id'))
+ format_id = str(m.group('format_id'))
subtitles = {}
if format_id.endswith('mpegurl'):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
@@ -2873,7 +2962,7 @@ class GenericIE(InfoExtractor):
# Unescaping the whole page allows to handle those cases in a generic way
# FIXME: unescaping the whole page may break URLs, commenting out for now.
# There probably should be a second run of generic extractor on unescaped webpage.
- # webpage = compat_urllib_parse_unquote(webpage)
+ # webpage = urllib.parse.unquote(webpage)
# Unescape squarespace embeds to be detected by generic extractor,
# see https://github.com/ytdl-org/youtube-dl/issues/21294
@@ -2975,7 +3064,7 @@ class GenericIE(InfoExtractor):
if vimeo_urls:
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
- vhx_url = VHXEmbedIE._extract_url(webpage)
+ vhx_url = VHXEmbedIE._extract_url(url, webpage)
if vhx_url:
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
@@ -3023,6 +3112,7 @@ class GenericIE(InfoExtractor):
wistia_urls = WistiaIE._extract_urls(webpage)
if wistia_urls:
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ playlist['entries'] = list(playlist['entries'])
for entry in playlist['entries']:
entry.update({
'_type': 'url_transparent',
@@ -3042,6 +3132,11 @@ class GenericIE(InfoExtractor):
# Don't set the extractor because it can be a track url or an album
return self.url_result(burl)
+ # Check for Substack custom domains
+ substack_url = SubstackIE._extract_url(webpage, url)
+ if substack_url:
+ return self.url_result(substack_url, SubstackIE)
+
# Look for embedded Vevo player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
@@ -3140,7 +3235,7 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+ return self.url_result(urllib.parse.unquote(mobj.group('url')))
# Look for funnyordie embed
matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
@@ -3393,7 +3488,7 @@ class GenericIE(InfoExtractor):
r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
if mobj is not None:
return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+ urllib.parse.urljoin(url, mobj.group('url')), 'UDNEmbed')
# Look for Senate ISVP iframe
senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
@@ -3626,7 +3721,7 @@ class GenericIE(InfoExtractor):
if mediasite_urls:
entries = [
self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
+ urllib.parse.urljoin(url, mediasite_url),
{'UrlReferrer': url}), ie=MediasiteIE.ie_key())
for mediasite_url in mediasite_urls]
return self.playlist_result(entries, video_id, video_title)
@@ -3762,6 +3857,11 @@ class GenericIE(InfoExtractor):
if ruutu_urls:
return self.playlist_from_matches(ruutu_urls, video_id, video_title)
+ # Look for Tiktok embeds
+ tiktok_urls = TikTokIE._extract_urls(webpage)
+ if tiktok_urls:
+ return self.playlist_from_matches(tiktok_urls, video_id, video_title)
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
@@ -3816,11 +3916,11 @@ class GenericIE(InfoExtractor):
subtitles = {}
for source in sources:
src = source.get('src')
- if not src or not isinstance(src, compat_str):
+ if not src or not isinstance(src, str):
continue
- src = compat_urlparse.urljoin(url, src)
+ src = urllib.parse.urljoin(url, src)
src_type = source.get('type')
- if isinstance(src_type, compat_str):
+ if isinstance(src_type, str):
src_type = src_type.lower()
ext = determine_ext(src).lower()
if src_type == 'video/youtube':
@@ -3854,7 +3954,7 @@ class GenericIE(InfoExtractor):
if not src:
continue
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
- 'url': compat_urlparse.urljoin(url, src),
+ 'url': urllib.parse.urljoin(url, src),
'name': sub.get('label'),
'http_headers': {
'Referer': full_response.geturl(),
@@ -3871,22 +3971,17 @@ class GenericIE(InfoExtractor):
json_ld = self._search_json_ld(webpage, video_id, default={})
if json_ld.get('url') not in (url, None):
self.report_detected('JSON LD')
- if determine_ext(json_ld['url']) == 'm3u8':
- json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles(
- json_ld['url'], video_id, 'mp4')
- json_ld.pop('url')
- self._sort_formats(json_ld['formats'])
- else:
- json_ld['_type'] = 'url_transparent'
- json_ld['url'] = smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True})
- return merge_dicts(json_ld, info_dict)
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}),
+ }, json_ld, info_dict)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
if RtmpIE.suitable(vurl):
return True
- vpath = compat_urlparse.urlparse(vurl).path
+ vpath = urllib.parse.urlparse(vurl).path
vext = determine_ext(vpath, None)
return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
@@ -4014,7 +4109,7 @@ class GenericIE(InfoExtractor):
if refresh_header:
found = re.search(REDIRECT_REGEX, refresh_header)
if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ new_url = urllib.parse.urljoin(url, unescapeHTML(found.group(1)))
if new_url != url:
self.report_following_redirect(new_url)
return {
@@ -4040,8 +4135,8 @@ class GenericIE(InfoExtractor):
for video_url in orderedSet(found):
video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+ video_url = urllib.parse.urljoin(url, video_url)
+ video_id = urllib.parse.unquote(os.path.basename(video_url))
# Sometimes, jwplayer extraction will result in a YouTube URL
if YoutubeIE.suitable(video_url):
diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py
index 9e835a6da..e728598f7 100644
--- a/yt_dlp/extractor/giga.py
+++ b/yt_dlp/extractor/giga.py
@@ -1,13 +1,8 @@
import itertools
from .common import InfoExtractor
-from ..utils import (
- qualities,
- compat_str,
- parse_duration,
- parse_iso8601,
- str_to_int,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, parse_iso8601, qualities, str_to_int
class GigaIE(InfoExtractor):
diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py
index c0905f86a..d7475b6da 100644
--- a/yt_dlp/extractor/googledrive.py
+++ b/yt_dlp/extractor/googledrive.py
@@ -264,7 +264,7 @@ class GoogleDriveIE(InfoExtractor):
subtitles_id = ttsurl.encode('utf-8').decode(
'unicode_escape').split('=')[-1]
- self._downloader.cookiejar.clear(domain='.google.com', path='/', name='NID')
+ self.cookiejar.clear(domain='.google.com', path='/', name='NID')
return {
'id': video_id,
@@ -276,3 +276,59 @@ class GoogleDriveIE(InfoExtractor):
'automatic_captions': self.extract_automatic_captions(
video_id, subtitles_id, hl),
}
+
+
+class GoogleDriveFolderIE(InfoExtractor):
+ IE_NAME = 'GoogleDrive:Folder'
+ _VALID_URL = r'https?://(?:docs|drive)\.google\.com/drive/folders/(?P<id>[\w-]{28,})'
+ _TESTS = [{
+ 'url': 'https://drive.google.com/drive/folders/1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'info_dict': {
+ 'id': '1dQ4sx0-__Nvg65rxTSgQrl7VyW_FZ9QI',
+ 'title': 'Forrest'
+ },
+ 'playlist_count': 3,
+ }]
+ _BOUNDARY = '=====vc17a3rwnndj====='
+ _REQUEST = "/drive/v2beta/files?openDrive=true&reason=102&syncType=0&errorRecovery=false&q=trashed%20%3D%20false%20and%20'{folder_id}'%20in%20parents&fields=kind%2CnextPageToken%2Citems(kind%2CmodifiedDate%2CmodifiedByMeDate%2ClastViewedByMeDate%2CfileSize%2Cowners(kind%2CpermissionId%2Cid)%2ClastModifyingUser(kind%2CpermissionId%2Cid)%2ChasThumbnail%2CthumbnailVersion%2Ctitle%2Cid%2CresourceKey%2Cshared%2CsharedWithMeDate%2CuserPermission(role)%2CexplicitlyTrashed%2CmimeType%2CquotaBytesUsed%2Ccopyable%2CfileExtension%2CsharingUser(kind%2CpermissionId%2Cid)%2Cspaces%2Cversion%2CteamDriveId%2ChasAugmentedPermissions%2CcreatedDate%2CtrashingUser(kind%2CpermissionId%2Cid)%2CtrashedDate%2Cparents(id)%2CshortcutDetails(targetId%2CtargetMimeType%2CtargetLookupStatus)%2Ccapabilities(canCopy%2CcanDownload%2CcanEdit%2CcanAddChildren%2CcanDelete%2CcanRemoveChildren%2CcanShare%2CcanTrash%2CcanRename%2CcanReadTeamDrive%2CcanMoveTeamDriveItem)%2Clabels(starred%2Ctrashed%2Crestricted%2Cviewed))%2CincompleteSearch&appDataFilter=NO_APP_DATA&spaces=drive&pageToken={page_token}&maxResults=50&supportsTeamDrives=true&includeItemsFromAllDrives=true&corpora=default&orderBy=folder%2Ctitle_natural%20asc&retryCount=0&key={key} HTTP/1.1"
+ _DATA = f'''--{_BOUNDARY}
+content-type: application/http
+content-transfer-encoding: binary
+
+GET %s
+
+--{_BOUNDARY}
+'''
+
+ def _call_api(self, folder_id, key, data, **kwargs):
+ response = self._download_webpage(
+ 'https://clients6.google.com/batch/drive/v2beta',
+ folder_id, data=data.encode('utf-8'),
+ headers={
+ 'Content-Type': 'text/plain;charset=UTF-8;',
+ 'Origin': 'https://drive.google.com',
+ }, query={
+ '$ct': f'multipart/mixed; boundary="{self._BOUNDARY}"',
+ 'key': key
+ }, **kwargs)
+ return self._search_json('', response, 'api response', folder_id, **kwargs) or {}
+
+ def _get_folder_items(self, folder_id, key):
+ page_token = ''
+ while page_token is not None:
+ request = self._REQUEST.format(folder_id=folder_id, page_token=page_token, key=key)
+ page = self._call_api(folder_id, key, self._DATA % request)
+ yield from page['items']
+ page_token = page.get('nextPageToken')
+
+ def _real_extract(self, url):
+ folder_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, folder_id)
+ key = self._search_regex(r'"(\w{39})"', webpage, 'key')
+
+ folder_info = self._call_api(folder_id, key, self._DATA % f'/drive/v2beta/files/{folder_id} HTTP/1.1', fatal=False)
+
+ return self.playlist_from_matches(
+ self._get_folder_items(folder_id, key), folder_id, folder_info.get('title'),
+ ie=GoogleDriveIE, getter=lambda item: f'https://drive.google.com/file/d/{item["id"]}')
diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py
index a7e4424b6..6ecdd390c 100644
--- a/yt_dlp/extractor/hitbox.py
+++ b/yt_dlp/extractor/hitbox.py
@@ -1,13 +1,13 @@
import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- parse_iso8601,
+ determine_ext,
float_or_none,
int_or_none,
- compat_str,
- determine_ext,
+ parse_iso8601,
)
diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py
index 56038f1ca..9e2c9cf47 100644
--- a/yt_dlp/extractor/ina.py
+++ b/yt_dlp/extractor/ina.py
@@ -1,23 +1,19 @@
from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- strip_or_none,
- xpath_attr,
- xpath_text,
-)
+from ..utils import unified_strdate
class InaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:[^/]+/)?(?:video|audio)/(?P<id>\w+)'
_TESTS = [{
- 'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
- 'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
+ 'url': 'https://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
+ 'md5': 'c5a09e5cb5604ed10709f06e7a377dda',
'info_dict': {
'id': 'I12055569',
'ext': 'mp4',
'title': 'François Hollande "Je crois que c\'est clair"',
- 'description': 'md5:3f09eb072a06cb286b8f7e4f77109663',
+ 'description': 'md5:08201f1c86fb250611f0ba415d21255a',
+ 'upload_date': '20070712',
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/3c4/I12055569.jpeg',
}
}, {
'url': 'https://www.ina.fr/video/S806544_001/don-d-organes-des-avancees-mais-d-importants-besoins-video.html',
@@ -31,53 +27,37 @@ class InaIE(InfoExtractor):
}, {
'url': 'http://m.ina.fr/video/I12055569',
'only_matching': True,
+ }, {
+ 'url': 'https://www.ina.fr/ina-eclaire-actu/video/cpb8205116303/les-jeux-electroniques',
+ 'md5': '4b8284a9a3a184fdc7e744225b8251e7',
+ 'info_dict': {
+ 'id': 'CPB8205116303',
+ 'ext': 'mp4',
+ 'title': 'Les jeux électroniques',
+ 'description': 'md5:e09f7683dad1cc60b74950490127d233',
+ 'upload_date': '19821204',
+ 'duration': 657,
+ 'thumbnail': 'https://cdn-hub.ina.fr/notice/690x517/203/CPB8205116303.jpeg',
+ }
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
- info_doc = self._download_xml(
- 'http://player.ina.fr/notices/%s.mrss' % video_id, video_id)
- item = info_doc.find('channel/item')
- title = xpath_text(item, 'title', fatal=True)
- media_ns_xpath = lambda x: self._xpath_ns(x, 'http://search.yahoo.com/mrss/')
- content = item.find(media_ns_xpath('content'))
+ video_id = self._match_id(url).upper()
+ webpage = self._download_webpage(url, video_id)
- get_furl = lambda x: xpath_attr(content, media_ns_xpath(x), 'url')
- formats = []
- for q, w, h in (('bq', 400, 300), ('mq', 512, 384), ('hq', 768, 576)):
- q_url = get_furl(q)
- if not q_url:
- continue
- formats.append({
- 'format_id': q,
- 'url': q_url,
- 'width': w,
- 'height': h,
- })
- if not formats:
- furl = get_furl('player') or content.attrib['url']
- ext = determine_ext(furl)
- formats = [{
- 'url': furl,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'ext': ext,
- }]
+ api_url = self._html_search_regex(
+ r'asset-details-url\s*=\s*["\'](?P<api_url>[^"\']+)',
+ webpage, 'api_url').replace(video_id, f'{video_id}.json')
- thumbnails = []
- for thumbnail in content.findall(media_ns_xpath('thumbnail')):
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'height': int_or_none(thumbnail.get('height')),
- 'width': int_or_none(thumbnail.get('width')),
- })
+ api_response = self._download_json(api_url, video_id)
return {
'id': video_id,
- 'formats': formats,
- 'title': title,
- 'description': strip_or_none(xpath_text(item, 'description')),
- 'thumbnails': thumbnails,
+ 'url': api_response['resourceUrl'],
+ 'ext': {'video': 'mp4', 'audio': 'mp3'}.get(api_response.get('type')),
+ 'title': api_response.get('title'),
+ 'description': api_response.get('description'),
+ 'upload_date': unified_strdate(api_response.get('dateOfBroadcast')),
+ 'duration': api_response.get('duration'),
+ 'thumbnail': api_response.get('resourceThumbnail'),
}
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 05000e2fb..5a824b500 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -410,7 +410,7 @@ class InstagramIE(InstagramBaseIE):
if nodes:
return self.playlist_result(
self._extract_nodes(nodes, True), video_id,
- format_field(username, template='Post by %s'), description)
+ format_field(username, None, 'Post by %s'), description)
video_url = self._og_search_video_url(webpage, secure=False)
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index a0298f1a1..5c316687c 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -351,7 +351,7 @@ class IqIE(InfoExtractor):
'''
def _extract_vms_player_js(self, webpage, video_id):
- player_js_cache = self._downloader.cache.load('iq', 'player_js')
+ player_js_cache = self.cache.load('iq', 'player_js')
if player_js_cache:
return player_js_cache
webpack_js_url = self._proto_relative_url(self._search_regex(
@@ -364,7 +364,7 @@ class IqIE(InfoExtractor):
f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js:
- self._downloader.cache.store('iq', 'player_js', module_js)
+ self.cache.store('iq', 'player_js', module_js)
return module_js
raise ExtractorError('Unable to extract player JS')
@@ -440,7 +440,7 @@ class IqIE(InfoExtractor):
preview_time = traverse_obj(
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
- self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds'))
+ self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds'))
# TODO: Extract audio-only formats
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py
index 4b88da35f..f77c5d44d 100644
--- a/yt_dlp/extractor/iwara.py
+++ b/yt_dlp/extractor/iwara.py
@@ -1,15 +1,16 @@
+import itertools
import re
-import urllib
+import urllib.parse
from .common import InfoExtractor
from ..utils import (
int_or_none,
mimetype2ext,
remove_end,
+ strip_or_none,
+ unified_strdate,
url_or_none,
urljoin,
- unified_strdate,
- strip_or_none,
)
@@ -171,37 +172,70 @@ class IwaraUserIE(IwaraBaseIE):
IE_NAME = 'iwara:user'
_TESTS = [{
- 'url': 'https://ecchi.iwara.tv/users/CuteMMD',
+ 'note': 'number of all videos page is just 1 page. less than 40 videos',
+ 'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
'info_dict': {
- 'id': 'CuteMMD',
+ 'title': 'Uploaded videos from Infinity_YukariP',
+ 'id': 'infinityyukarip',
+ 'uploader': 'Infinity_YukariP',
+ 'uploader_id': 'infinityyukarip',
},
- 'playlist_mincount': 198,
+ 'playlist_mincount': 39,
}, {
- # urlencoded
- 'url': 'https://ecchi.iwara.tv/users/%E5%92%95%E5%98%BF%E5%98%BF',
+ 'note': 'no even all videos page. probably less than 10 videos',
+ 'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
'info_dict': {
- 'id': '咕嘿嘿',
+ 'title': 'Uploaded videos from mmd quintet',
+ 'id': 'mmd-quintet',
+ 'uploader': 'mmd quintet',
+ 'uploader_id': 'mmd-quintet',
},
- 'playlist_mincount': 141,
+ 'playlist_mincount': 6,
+ }, {
+ 'note': 'has paging. more than 40 videos',
+ 'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
+ 'info_dict': {
+ 'title': 'Uploaded videos from TheBlackbirdCalls',
+ 'id': 'theblackbirdcalls',
+ 'uploader': 'TheBlackbirdCalls',
+ 'uploader_id': 'theblackbirdcalls',
+ },
+ 'playlist_mincount': 420,
+ }, {
+ 'note': 'foreign chars in URL. there must be foreign characters in URL',
+ 'url': 'https://ecchi.iwara.tv/users/ぶた丼',
+ 'info_dict': {
+ 'title': 'Uploaded videos from ぶた丼',
+ 'id': 'ぶた丼',
+ 'uploader': 'ぶた丼',
+ 'uploader_id': 'ぶた丼',
+ },
+ 'playlist_mincount': 170,
}]
- def _entries(self, playlist_id, base_url, webpage):
- yield from self._extract_playlist(base_url, webpage)
-
- page_urls = re.findall(
- r'class="pager-item"[^>]*>\s*<a[^<]+href="([^"]+)', webpage)
-
- for n, path in enumerate(page_urls, 2):
+ def _entries(self, playlist_id, base_url):
+ webpage = self._download_webpage(
+ f'{base_url}/users/{playlist_id}', playlist_id)
+ videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
+ if not videos_url:
+ yield from self._extract_playlist(base_url, webpage)
+ return
+
+ videos_url = urljoin(base_url, videos_url)
+
+ for n in itertools.count(1):
+ page = self._download_webpage(
+ videos_url, playlist_id, note=f'Downloading playlist page {n}',
+ query={'page': str(n - 1)} if n > 1 else {})
yield from self._extract_playlist(
- base_url, self._download_webpage(
- urljoin(base_url, path), playlist_id, note=f'Downloading playlist page {n}'))
+ base_url, page)
+
+ if f'page={n}' not in page:
+ break
def _real_extract(self, url):
playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
playlist_id = urllib.parse.unquote(playlist_id)
- webpage = self._download_webpage(
- f'{base_url}/users/{playlist_id}/videos', playlist_id)
-
return self.playlist_result(
- self._entries(playlist_id, base_url, webpage), playlist_id)
+ self._entries(playlist_id, base_url), playlist_id)
diff --git a/yt_dlp/extractor/ixigua.py b/yt_dlp/extractor/ixigua.py
new file mode 100644
index 000000000..163edf480
--- /dev/null
+++ b/yt_dlp/extractor/ixigua.py
@@ -0,0 +1,84 @@
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ str_or_none,
+ traverse_obj,
+)
+
+
+class IxiguaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+'
+ _TESTS = [{
+ 'url': 'https://www.ixigua.com/6996881461559165471',
+ 'info_dict': {
+ 'id': '6996881461559165471',
+ 'ext': 'mp4',
+ 'title': '盲目涉水风险大,亲身示范高水位行车注意事项',
+ 'description': 'md5:8c82f46186299add4a1c455430740229',
+ 'tags': ['video_car'],
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'uploader': '懂车帝原创',
+ 'uploader_id': '6480145787',
+ 'thumbnail': r're:^https?://.+\.(avif|webp)',
+ 'timestamp': 1629088414,
+ 'duration': 1030,
+ }
+ }]
+
+ def _get_json_data(self, webpage, video_id):
+ js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage)
+ if not js_data:
+ if self._cookies_passed:
+ raise ExtractorError('Failed to get SSR_HYDRATED_DATA')
+ raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True)
+
+ return self._parse_json(
+ js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json)
+
+ def _media_selector(self, json_data):
+ for path, override in (
+ (('video_list', ), {}),
+ (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}),
+ (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}),
+ ):
+ for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])):
+ yield {
+ 'url': base64.b64decode(media['main_url']).decode(),
+ 'width': int_or_none(media.get('vwidth')),
+ 'height': int_or_none(media.get('vheight')),
+ 'fps': int_or_none(media.get('fps')),
+ 'vcodec': media.get('codec_type'),
+ 'format_id': str_or_none(media.get('quality_type')),
+ 'filesize': int_or_none(media.get('size')),
+ 'ext': 'mp4',
+ **override,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
+
+ formats = list(self._media_selector(json_data.get('videoResource')))
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': json_data.get('title'),
+ 'description': json_data.get('video_abstract'),
+ 'formats': formats,
+ 'like_count': json_data.get('video_like_count'),
+ 'duration': int_or_none(json_data.get('duration')),
+ 'tags': [json_data.get('tag')],
+ 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')),
+ 'uploader': traverse_obj(json_data, ('user_info', 'name')),
+ 'view_count': json_data.get('video_watch_count'),
+ 'dislike_count': json_data.get('video_unlike_count'),
+ 'timestamp': int_or_none(json_data.get('video_publish_time')),
+ }
diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py
index a01411be1..1c4676e95 100644
--- a/yt_dlp/extractor/joj.py
+++ b/yt_dlp/extractor/joj.py
@@ -70,7 +70,7 @@ class JojIE(InfoExtractor):
r'(\d+)[pP]\.', format_url, 'height', default=None)
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%sp'),
+ 'format_id': format_field(height, None, '%sp'),
'height': int(height),
})
if not formats:
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
index 8dbbb2926..2cb7ca3d7 100644
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -5,7 +5,7 @@ from ..utils import unsmuggle_url
class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
_TESTS = [{
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
@@ -37,6 +37,9 @@ class JWPlatformIE(InfoExtractor):
webpage)
if ret:
return ret
+ mobj = re.search(r'<div\b[^>]* data-video-jw-id="([a-zA-Z0-9]{8})"', webpage)
+ if mobj:
+ return [f'jwplatform:{mobj.group(1)}']
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py
index afad279bd..f4092aa71 100644
--- a/yt_dlp/extractor/kaltura.py
+++ b/yt_dlp/extractor/kaltura.py
@@ -382,5 +382,5 @@ class KalturaIE(InfoExtractor):
'duration': info.get('duration'),
'timestamp': info.get('createdAt'),
'uploader_id': format_field(info, 'userId', ignore=('None', None)),
- 'view_count': info.get('plays'),
+ 'view_count': int_or_none(info.get('plays')),
}
diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py
index 79f9c7fa7..1c2d5c01c 100644
--- a/yt_dlp/extractor/keezmovies.py
+++ b/yt_dlp/extractor/keezmovies.py
@@ -68,7 +68,7 @@ class KeezMoviesIE(InfoExtractor):
video_url, title, 32).decode('utf-8')
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%dp'),
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
'tbr': tbr,
})
diff --git a/yt_dlp/extractor/kicker.py b/yt_dlp/extractor/kicker.py
new file mode 100644
index 000000000..a2c7dd4e8
--- /dev/null
+++ b/yt_dlp/extractor/kicker.py
@@ -0,0 +1,55 @@
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+
+
+class KickerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)kicker\.(?:de)/(?P<id>[\w-]+)/video'
+ _TESTS = [{
+ 'url': 'https://www.kicker.de/pogba-dembel-co-die-top-11-der-abloesefreien-spieler-905049/video',
+ 'info_dict': {
+ 'id': 'km04mrK0DrRAVxy2GcA',
+ 'title': 'md5:b91d145bac5745ac58d5479d8347a875',
+ 'ext': 'mp4',
+ 'duration': 350,
+ 'description': 'md5:a5a3dd77dbb6550dbfb997be100b9998',
+ 'uploader_id': 'x2dfupo',
+ 'timestamp': 1654677626,
+ 'like_count': int,
+ 'uploader': 'kicker.de',
+ 'view_count': int,
+ 'age_limit': 0,
+ 'thumbnail': r're:https://s\d+\.dmcdn\.net/v/T-x741YeYAx8aSZ0Z/x1080',
+ 'tags': ['published', 'category.InternationalSoccer'],
+ 'upload_date': '20220608'
+ }
+ }, {
+ 'url': 'https://www.kicker.de/ex-unioner-in-der-bezirksliga-felix-kroos-vereinschallenge-in-pankow-902825/video',
+ 'info_dict': {
+ 'id': 'k2omNsJKdZ3TxwxYSFJ',
+ 'title': 'md5:72ec24d7f84b8436fe1e89d198152adf',
+ 'ext': 'mp4',
+ 'uploader_id': 'x2dfupo',
+ 'duration': 331,
+ 'timestamp': 1652966015,
+ 'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/TxU4Z1YYCmtisTbMq/x1080',
+ 'tags': ['FELIX KROOS', 'EINFACH MAL LUPPEN', 'KROOS', 'FSV FORTUNA PANKOW', 'published', 'category.Amateurs', 'marketingpreset.Spreekick'],
+ 'age_limit': 0,
+ 'view_count': int,
+ 'upload_date': '20220519',
+ 'uploader': 'kicker.de',
+ 'description': 'md5:0c2060c899a91c8bf40f578f78c5846f',
+ 'like_count': int,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_slug = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_slug)
+ dailymotion_video_id = self._search_regex(
+ r'data-dmprivateid\s*=\s*[\'"](?P<video_id>\w+)', webpage,
+ 'video id', group='video_id')
+
+ return self.url_result(
+ f'https://www.dailymotion.com/video/{dailymotion_video_id}',
+ ie=DailymotionIE, video_title=self._html_extract_title(webpage))
diff --git a/yt_dlp/extractor/kth.py b/yt_dlp/extractor/kth.py
new file mode 100644
index 000000000..e17c6db91
--- /dev/null
+++ b/yt_dlp/extractor/kth.py
@@ -0,0 +1,28 @@
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class KTHIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.kth\.se/(?:[^/]+/)+(?P<id>[a-z0-9_]+)'
+ _TEST = {
+ 'url': 'https://play.kth.se/media/Lunch+breakA+De+nya+aff%C3%A4rerna+inom+Fordonsdalen/0_uoop6oz9',
+ 'md5': 'd83ada6d00ca98b73243a88efe19e8a6',
+ 'info_dict': {
+ 'id': '0_uoop6oz9',
+ 'ext': 'mp4',
+ 'title': 'md5:bd1d6931facb6828762a33e6ce865f37',
+ 'thumbnail': 're:https?://.+/thumbnail/.+',
+ 'duration': 3516,
+ 'timestamp': 1647345358,
+ 'upload_date': '20220315',
+ 'uploader_id': 'md5:0ec23e33a89e795a4512930c8102509f',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ result = self.url_result(
+ smuggle_url('kaltura:308:%s' % video_id, {
+ 'service_url': 'https://api.kaltura.nordu.net'}),
+ 'Kaltura')
+ return result
diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py
index f1221ef1b..4fec2c2b2 100644
--- a/yt_dlp/extractor/kusi.py
+++ b/yt_dlp/extractor/kusi.py
@@ -1,10 +1,10 @@
import random
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
from ..utils import (
- int_or_none,
float_or_none,
+ int_or_none,
timeconvert,
update_url_query,
xpath_text,
@@ -66,7 +66,7 @@ class KUSIIE(InfoExtractor):
formats = []
for quality in quality_options:
formats.append({
- 'url': compat_urllib_parse_unquote_plus(quality.attrib['url']),
+ 'url': urllib.parse.unquote_plus(quality.attrib['url']),
'height': int_or_none(quality.attrib.get('height')),
'width': int_or_none(quality.attrib.get('width')),
'vbr': float_or_none(quality.attrib.get('bitratebits'), scale=1000),
diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py
index 7ba666d06..f14198cfd 100644
--- a/yt_dlp/extractor/lastfm.py
+++ b/yt_dlp/extractor/lastfm.py
@@ -15,7 +15,7 @@ class LastFMPlaylistBaseIE(InfoExtractor):
for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
webpage = self._download_webpage(
url, playlist_id,
- note='Downloading page %d%s' % (page_number, format_field(last_page_number, template=' of %d')),
+ note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')),
query={'page': page_number})
page_entries = [
self.url_result(player_url, 'Youtube')
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 953ce2e18..909720e8b 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -192,10 +192,11 @@ class LBRYIE(LBRYBaseIE):
claim_id, is_live = result['signing_channel']['claim_id'], True
headers = {'referer': 'https://player.odysee.live/'}
live_data = self._download_json(
- f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id,
+ 'https://api.odysee.live/livestream/is_live', claim_id,
+ query={'channel_claim_id': claim_id},
note='Downloading livestream JSON metadata')['data']
- streaming_url = final_url = live_data.get('url')
- if not final_url and not live_data.get('live'):
+ streaming_url = final_url = live_data.get('VideoURL')
+ if not final_url and not live_data.get('Live'):
self.raise_no_formats('This stream is not live', True, claim_id)
else:
raise UnsupportedError(url)
diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py
index 63b6c002a..09c512e50 100644
--- a/yt_dlp/extractor/line.py
+++ b/yt_dlp/extractor/line.py
@@ -34,7 +34,7 @@ class LineLiveBaseIE(InfoExtractor):
'timestamp': int_or_none(item.get('createdAt')),
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://live.line.me/channels/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'),
'duration': int_or_none(item.get('archiveDuration')),
'view_count': int_or_none(item.get('viewerCount')),
'comment_count': int_or_none(item.get('chatCount')),
diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py
index 3bb52777f..9ea08ec5a 100644
--- a/yt_dlp/extractor/lnkgo.py
+++ b/yt_dlp/extractor/lnkgo.py
@@ -1,7 +1,7 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
clean_html,
- compat_str,
format_field,
int_or_none,
parse_iso8601,
diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py
index 527b50cb0..5f0a9b42f 100644
--- a/yt_dlp/extractor/medaltv.py
+++ b/yt_dlp/extractor/medaltv.py
@@ -116,7 +116,7 @@ class MedalTVIE(InfoExtractor):
author = try_get(
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
author_id = str_or_none(author.get('id'))
- author_url = format_field(author_id, template='https://medal.tv/users/%s')
+ author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
return {
'id': video_id,
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index 60c454dda..f396c1bd3 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -20,10 +20,10 @@ class MediasetIE(ThePlatformBaseIE):
(?:
mediaset:|
https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (?:\w+\.)+mediaset\.it/
(?:
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
- player/index\.html\?.*?\bprogramGuid=
+ player/(?:v\d+/)?index\.html\?.*?\bprogramGuid=
)
)(?P<id>[0-9A-Z]{16,})
'''
@@ -159,6 +159,12 @@ class MediasetIE(ThePlatformBaseIE):
}, {
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
'only_matching': True,
+ }, {
+ 'url': 'https://mediasetinfinity.mediaset.it/video/braveandbeautiful/episodio-113_F310948005000402',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/v2/index.html?partnerId=wittytv&configId=&programGuid=FD00000000153323',
+ 'only_matching': True,
}]
@staticmethod
@@ -286,7 +292,7 @@ class MediasetShowIE(MediasetIE):
_VALID_URL = r'''(?x)
(?:
https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (\w+\.)+mediaset\.it/
(?:
(?:fiction|programmi-tv|serie-tv|kids)/(?:.+?/)?
(?:[a-z-]+)_SE(?P<id>\d{12})
diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py
index 31fec86d2..048c74e68 100644
--- a/yt_dlp/extractor/metacafe.py
+++ b/yt_dlp/extractor/metacafe.py
@@ -1,17 +1,14 @@
import json
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse,
- compat_urllib_parse_unquote,
-)
+from ..compat import compat_parse_qs, compat_urllib_parse_unquote
from ..utils import (
- determine_ext,
ExtractorError,
- int_or_none,
+ determine_ext,
get_element_by_attribute,
+ int_or_none,
mimetype2ext,
)
@@ -143,7 +140,7 @@ class MetacafeIE(InfoExtractor):
headers = {
# Disable family filter
- 'Cookie': 'user=%s; ' % compat_urllib_parse.quote(json.dumps({'ffilter': False}))
+ 'Cookie': 'user=%s; ' % urllib.parse.quote(json.dumps({'ffilter': False}))
}
# AnyClip videos require the flashversion cookie so that we get the link
diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py
index 393d20604..8079bbb39 100644
--- a/yt_dlp/extractor/minds.py
+++ b/yt_dlp/extractor/minds.py
@@ -118,7 +118,7 @@ class MindsIE(MindsBaseIE):
'timestamp': int_or_none(entity.get('time_created')),
'uploader': strip_or_none(owner.get('name')),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://www.minds.com/%s'),
+ 'uploader_url': format_field(uploader_id, None, 'https://www.minds.com/%s'),
'view_count': int_or_none(entity.get('play:count')),
'like_count': int_or_none(entity.get('thumbs:up:count')),
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
diff --git a/yt_dlp/extractor/mirrorcouk.py b/yt_dlp/extractor/mirrorcouk.py
new file mode 100644
index 000000000..7b4f95b4b
--- /dev/null
+++ b/yt_dlp/extractor/mirrorcouk.py
@@ -0,0 +1,98 @@
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class MirrorCoUKIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mirror\.co\.uk/[/+[\w-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-island-fans-baffled-after-27163139',
+ 'info_dict': {
+ 'id': 'voyyS7SV',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Gemma Owen enters the villa',
+ 'description': 'Love Island: Michael Owen\'s daughter Gemma Owen enters the villa.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/voyyS7SV/poster.jpg?width=720',
+ 'display_id': '27163139',
+ 'timestamp': 1654547895,
+ 'duration': 57.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/michael-jacksons-son-blankets-new-25344890',
+ 'info_dict': {
+ 'id': 'jyXpdvxp',
+ 'ext': 'mp4',
+ 'title': 'Michael Jackson’s son Bigi calls for action on climate change',
+ 'description': 'md5:d39ceaba2b7a615b4ca6557e7bc40222',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/jyXpdvxp/poster.jpg?width=720',
+ 'display_id': '25344890',
+ 'timestamp': 1635749907,
+ 'duration': 56.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/sport/football/news/antonio-conte-next-tottenham-manager-25346042',
+ 'info_dict': {
+ 'id': 'q6FkKa4p',
+ 'ext': 'mp4',
+ 'title': 'Nuno sacked by Tottenham after fifth Premier League defeat of the season',
+ 'description': 'Nuno Espirito Santo has been sacked as Tottenham boss after only four months in charge.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/q6FkKa4p/poster.jpg?width=720',
+ 'display_id': '25346042',
+ 'timestamp': 1635763157,
+ 'duration': 40.0,
+ 'upload_date': '20211101',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/3am/celebrity-news/johnny-depp-splashes-50k-curry-27160737',
+ 'info_dict': {
+ 'id': 'IT0oa1nH',
+ 'ext': 'mp4',
+ 'title': 'Johnny Depp Leaves The Grand Hotel in Birmingham',
+ 'description': 'Johnny Depp Leaves The Grand Hotel in Birmingham.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/IT0oa1nH/poster.jpg?width=720',
+ 'display_id': '27160737',
+ 'timestamp': 1654524120,
+ 'duration': 65.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/tv/tv-news/love-islands-liam-could-first-27162602',
+ 'info_dict': {
+ 'id': 'EaPr5Z2j',
+ 'ext': 'mp4',
+ 'title': 'Love Island: Davide reveals plot twist after receiving text',
+ 'description': 'Love Island: Davide reveals plot twist after receiving text',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EaPr5Z2j/poster.jpg?width=720',
+ 'display_id': '27162602',
+ 'timestamp': 1654552597,
+ 'duration': 23.0,
+ 'upload_date': '20220606',
+ },
+ }, {
+ 'url': 'https://www.mirror.co.uk/news/uk-news/william-kate-sent-message-george-27160572',
+ 'info_dict': {
+ 'id': 'ygtceXIu',
+ 'ext': 'mp4',
+ 'title': 'Prince William and Kate arrive in Wales with George and Charlotte',
+ 'description': 'Prince William and Kate Middleton arrive in Wales with children Prince George and Princess Charlotte.',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/ygtceXIu/poster.jpg?width=720',
+ 'display_id': '27160572',
+ 'timestamp': 1654349678,
+ 'duration': 106.0,
+ 'upload_date': '20220604',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ data = self._search_json(r'div\s+class="json-placeholder"\s+data-json="',
+ webpage, 'data', display_id, transform_source=unescapeHTML)['videoData']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'jwplatform:{data["videoId"]}',
+ 'ie_key': 'JWPlatform',
+ 'display_id': display_id,
+ }
diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py
index 796f268f4..a77d7e682 100644
--- a/yt_dlp/extractor/mixcloud.py
+++ b/yt_dlp/extractor/mixcloud.py
@@ -3,7 +3,6 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_b64decode,
- compat_chr,
compat_ord,
compat_str,
compat_urllib_parse_unquote,
@@ -72,7 +71,7 @@ class MixcloudIE(MixcloudBaseIE):
def _decrypt_xor_cipher(key, ciphertext):
"""Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
return ''.join([
- compat_chr(compat_ord(ch) ^ compat_ord(k))
+ chr(compat_ord(ch) ^ compat_ord(k))
for ch, k in zip(ciphertext, itertools.cycle(key))])
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
index a230d9cdd..c3b063ffe 100644
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -1,13 +1,19 @@
+import itertools
import re
+from urllib.parse import urlparse, parse_qs
from .common import InfoExtractor
from ..utils import (
+ ExtractorError,
clean_html,
dict_get,
- ExtractorError,
int_or_none,
+ merge_dicts,
parse_duration,
+ traverse_obj,
+ try_call,
try_get,
+ unified_timestamp,
update_url_query,
)
@@ -247,3 +253,134 @@ class NaverLiveIE(InfoExtractor):
'categories': [meta.get('categoryId')],
'is_live': True
}
+
+
+class NaverNowIE(NaverBaseIE):
+ IE_NAME = 'navernow'
+ _VALID_URL = r'https?://now\.naver\.com/show/(?P<id>[0-9]+)'
+ _PAGE_SIZE = 30
+ _API_URL = 'https://apis.naver.com/now_web/nowcms-api-xhmac/cms/v1'
+ _TESTS = [{
+ 'url': 'https://now.naver.com/show/4759?shareReplayId=5901#replay=',
+ 'md5': 'e05854162c21c221481de16b2944a0bc',
+ 'info_dict': {
+ 'id': '4759-5901',
+ 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1650369600,
+ 'upload_date': '20220419',
+ 'uploader_id': 'now',
+ 'view_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ }
+ }, {
+ 'url': 'https://now.naver.com/show/4759?shareHightlight=1078#highlight=',
+ 'md5': '9f6118e398aa0f22b2152f554ea7851b',
+ 'info_dict': {
+ 'id': '4759-1078',
+ 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.',
+ 'ext': 'mp4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20220504',
+ 'timestamp': 1651648042,
+ 'uploader_id': 'now',
+ 'view_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ },
+ }, {
+ 'url': 'https://now.naver.com/show/4759',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 48
+ }, {
+ 'url': 'https://now.naver.com/show/4759?shareReplayId=5901#replay',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 48,
+ }, {
+ 'url': 'https://now.naver.com/show/4759?shareHightlight=1078#highlight=',
+ 'info_dict': {
+ 'id': '4759',
+ 'title': '아이키의 떰즈업',
+ },
+ 'playlist_mincount': 48,
+ }]
+
+ def _extract_replay(self, show_id, replay_id):
+ vod_info = self._download_json(f'{self._API_URL}/shows/{show_id}/vod/{replay_id}', replay_id)
+ in_key = self._download_json(f'{self._API_URL}/shows/{show_id}/vod/{replay_id}/inkey', replay_id)['inKey']
+ return merge_dicts({
+ 'id': f'{show_id}-{replay_id}',
+ 'title': traverse_obj(vod_info, ('episode', 'title')),
+ 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))),
+ 'thumbnail': vod_info.get('thumbnail_image_url'),
+ }, self._extract_video_info(replay_id, vod_info['video_id'], in_key))
+
+ def _extract_show_replays(self, show_id):
+ page = 0
+ while True:
+ show_vod_info = self._download_json(
+ f'{self._API_URL}/vod-shows/{show_id}', show_id,
+ query={'offset': page * self._PAGE_SIZE, 'limit': self._PAGE_SIZE},
+ note=f'Downloading JSON vod list for show {show_id} - page {page}'
+ )['response']['result']
+ for v in show_vod_info.get('vod_list') or []:
+ yield self._extract_replay(show_id, v['id'])
+
+ if try_call(lambda: show_vod_info['count'] <= self._PAGE_SIZE * (page + 1)):
+ break
+ page += 1
+
+ def _extract_show_highlights(self, show_id, highlight_id=None):
+ page = 0
+ while True:
+ highlights_videos = self._download_json(
+ f'{self._API_URL}/shows/{show_id}/highlights/videos/', show_id,
+ query={'offset': page * self._PAGE_SIZE, 'limit': self._PAGE_SIZE},
+ note=f'Downloading JSON highlights for show {show_id} - page {page}')
+
+ for highlight in highlights_videos.get('results') or []:
+ if highlight_id and highlight.get('id') != int(highlight_id):
+ continue
+ yield merge_dicts({
+ 'id': f'{show_id}-{highlight["id"]}',
+ 'title': highlight.get('title'),
+ 'timestamp': unified_timestamp(highlight.get('regdate')),
+ 'thumbnail': highlight.get('thumbnail_url'),
+ }, self._extract_video_info(highlight['id'], highlight['video_id'], highlight['video_inkey']))
+
+ if try_call(lambda: highlights_videos['count'] <= self._PAGE_SIZE * (page + 1)):
+ break
+ page += 1
+
+ def _extract_highlight(self, show_id, highlight_id):
+ try:
+ return next(self._extract_show_highlights(show_id, highlight_id))
+ except StopIteration:
+ raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}')
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ qs = parse_qs(urlparse(url).query)
+
+ if not self._yes_playlist(show_id, qs.get('shareHightlight')):
+ return self._extract_highlight(show_id, qs['shareHightlight'][0])
+ elif not self._yes_playlist(show_id, qs.get('shareReplayId')):
+ return self._extract_replay(show_id, qs['shareReplayId'][0])
+
+ show_info = self._download_json(
+ f'{self._API_URL}/shows/{show_id}', show_id,
+ note=f'Downloading JSON vod list for show {show_id}')
+
+ return self.playlist_result(
+ itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)),
+ show_id, show_info.get('title'))
diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py
index de0142ccf..ad8dbd7a7 100644
--- a/yt_dlp/extractor/ndr.py
+++ b/yt_dlp/extractor/ndr.py
@@ -1,11 +1,15 @@
+import re
+
from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlparse
from ..utils import (
determine_ext,
+ ExtractorError,
int_or_none,
- parse_duration,
+ merge_dicts,
+ parse_iso8601,
qualities,
try_get,
- unified_strdate,
urljoin,
)
@@ -14,120 +18,139 @@ class NDRBaseIE(InfoExtractor):
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = next(group for group in mobj.groups() if group)
- id = mobj.group('id')
webpage = self._download_webpage(url, display_id)
- return self._extract_embed(webpage, display_id, id)
+ return self._extract_embed(webpage, display_id, url)
class NDRIE(NDRBaseIE):
IE_NAME = 'ndr'
IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
- _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[\da-z]+)\.html'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
_TESTS = [{
+ # httpVideo, same content id
'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
'info_dict': {
'id': 'hafengeburtstag988',
+ 'display_id': 'Party-Poette-und-Parade',
'ext': 'mp4',
'title': 'Party, Pötte und Parade',
- 'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg',
'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
- 'series': None,
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20150508',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1431255671,
+ 'upload_date': '20150510',
'duration': 3498,
},
- }, {
- 'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html',
- 'only_matching': True
- }, {
- 'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html',
- 'info_dict': {
- 'id': 'kommunalwahl1296',
- 'ext': 'mp4',
- 'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik',
- 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg',
- 'description': 'md5:5c6e2ad744cef499135735a1036d7aa7',
- 'series': 'Hallo Niedersachsen',
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20210913',
- 'duration': 438,
+ 'params': {
+ 'skip_download': True,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
- 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ # httpVideo, different content id
+ 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+ 'md5': '1043ff203eab307f0c51702ec49e9a71',
'info_dict': {
- 'id': 'sendung1091858',
+ 'id': 'osna272',
+ 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
'ext': 'mp4',
- 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
- 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg',
- 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
- 'series': 'extra 3',
- 'channel': 'NDR Fernsehen',
- 'upload_date': '20201111',
- 'duration': 1749,
- }
+ 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+ 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1442059200,
+ 'upload_date': '20150912',
+ 'duration': 510,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'No longer available',
}, {
+ # httpAudio, same content id
'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
'info_dict': {
'id': 'audio51535',
+ 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
'ext': 'mp3',
'title': 'La Valette entgeht der Hinrichtung',
- 'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg',
'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
- 'upload_date': '20140729',
- 'duration': 884.0,
+ 'uploader': 'ndrinfo',
+ 'timestamp': 1631711863,
+ 'upload_date': '20210915',
+ 'duration': 884,
},
- 'expected_warnings': ['unable to extract json url'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with subtitles
+ 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
+ 'info_dict': {
+ 'id': 'extra18674',
+ 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
+ 'ext': 'mp4',
+ 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
+ 'description': 'md5:700f6de264010585012a72f97b0ac0c9',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20201207',
+ 'timestamp': 1614349457,
+ 'duration': 1749,
+ 'subtitles': {
+ 'de': [{
+ 'ext': 'ttml',
+ 'url': r're:^https://www\.ndr\.de.+',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
+ 'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id, id):
- formats = []
- base_url = 'https://www.ndr.de'
- json_url = self._search_regex(r'<iframe[^>]+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage,
- 'json url', fatal=False)
- if json_url:
- data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json',
- id, fatal=False)
- info_json = data_json.get('_info', {})
- media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray'])
- for media in media_json:
- if media.get('_quality') == 'auto':
- formats.extend(self._extract_m3u8_formats(media['_stream'], id))
- subtitles = {}
- sub_url = data_json.get('_subtitleUrl')
- if sub_url:
- subtitles.setdefault('de', []).append({
- 'url': base_url + sub_url,
- })
- self._sort_formats(formats)
- return {
- 'id': id,
- 'title': info_json.get('clipTitle'),
- 'thumbnail': base_url + data_json.get('_previewImage'),
- 'description': info_json.get('clipDescription'),
- 'series': info_json.get('seriesTitle') or None,
- 'channel': info_json.get('channelTitle'),
- 'upload_date': unified_strdate(info_json.get('clipDate')),
- 'duration': data_json.get('_duration'),
- 'formats': formats,
- 'subtitles': subtitles,
- }
- else:
- json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace(
- '_belongsToPodcast-', '')
- data_json = self._download_json(json_url, id, fatal=False)
- return {
- 'id': id,
- 'title': data_json.get('title'),
- 'thumbnail': base_url + data_json.get('poster'),
- 'description': data_json.get('summary'),
- 'upload_date': unified_strdate(data_json.get('publicationDate')),
- 'duration': parse_duration(data_json.get('duration')),
- 'formats': [{
- 'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])),
- 'vcodec': 'none',
- 'ext': 'mp3',
- }],
- }
+ def _extract_embed(self, webpage, display_id, url):
+ embed_url = (
+ self._html_search_meta(
+ 'embedURL', webpage, 'embed URL',
+ default=None)
+ or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=None)
+ or self._search_regex(
+ r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url', default=''))
+ # some more work needed if we only found sophoraID
+ if re.match(r'^[a-z]+\d+$', embed_url):
+ # get the initial part of the url path,. eg /panorama/archiv/2022/
+ parsed_url = compat_urllib_parse_urlparse(url)
+ path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='')
+ # find tell-tale image with the actual ID
+ ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None)
+ # or try to use special knowledge!
+ NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html'
+ embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, )
+ if not embed_url:
+ raise ExtractorError('Unable to extract embedUrl')
+
+ description = self._search_regex(
+ r'<p[^>]+itemprop="description">([^<]+)</p>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ timestamp = parse_iso8601(
+ self._search_regex(
+ (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"',
+ r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ),
+ webpage, 'upload date', group='cont', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'description': description,
+ 'timestamp': timestamp,
+ }, info)
class NJoyIE(NDRBaseIE):
@@ -151,19 +174,19 @@ class NJoyIE(NDRBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpVideo, different content id
'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
'md5': '417660fffa90e6df2fda19f1b40a64d8',
'info_dict': {
- 'id': 'dockville882',
+ 'id': 'livestream283',
'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
- 'ext': 'mp4',
- 'title': '"Ich hab noch nie" mit Felix Jaehn',
- 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+ 'ext': 'mp3',
+ 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn',
+ 'description': 'md5:681698f527b8601e511e7b79edde7d2c',
'uploader': 'njoy',
- 'upload_date': '20150822',
- 'duration': 211,
+ 'upload_date': '20210830',
},
'params': {
'skip_download': True,
@@ -173,18 +196,25 @@ class NJoyIE(NDRBaseIE):
'only_matching': True,
}]
- def _extract_embed(self, webpage, display_id, id):
+ def _extract_embed(self, webpage, display_id, url=None):
+ # find tell-tale URL with the actual ID, or ...
video_id = self._search_regex(
- r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
- description = self._search_regex(
- r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
- webpage, 'description', fatal=False)
+ (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''',
+ r'<iframe[^>]+id="pp_([\da-z]+)"', ),
+ webpage, 'NDR id', default=None)
+
+ description = (
+ self._html_search_meta('description', webpage)
+ or self._search_regex(
+ r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+ webpage, 'description', fatal=False))
return {
'_type': 'url_transparent',
'ie_key': 'NDREmbedBase',
'url': 'ndr:%s' % video_id,
'display_id': display_id,
'description': description,
+ 'title': display_id.replace('-', ' ').strip(),
}
@@ -287,7 +317,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE(NDREmbedBaseIE):
IE_NAME = 'ndr:embed'
- _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+ _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html'
_TESTS = [{
'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
@@ -300,6 +330,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'upload_date': '20150907',
'duration': 132,
},
+ 'skip': 'No longer available',
}, {
'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
'md5': '002085c44bae38802d94ae5802a36e78',
@@ -315,6 +346,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
'url': 'http://www.ndr.de/info/audio51535-player.html',
'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
@@ -324,7 +356,7 @@ class NDREmbedIE(NDREmbedBaseIE):
'title': 'La Valette entgeht der Hinrichtung',
'is_live': False,
'uploader': 'ndrinfo',
- 'upload_date': '20140729',
+ 'upload_date': '20210915',
'duration': 884,
},
'params': {
@@ -345,15 +377,17 @@ class NDREmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpVideoLive
'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
'info_dict': {
'id': 'livestream217',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
- 'upload_date': '20150910',
+ 'upload_date': '20210409',
+ 'uploader': 'ndrtv',
},
'params': {
'skip_download': True,
@@ -391,9 +425,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'ext': 'mp4',
'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
'is_live': False,
- 'upload_date': '20150807',
+ 'upload_date': '20200826',
'duration': 1011,
},
+ 'expected_warnings': ['Unable to download f4m manifest'],
}, {
# httpAudio
'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
@@ -410,6 +445,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'params': {
'skip_download': True,
},
+ 'skip': 'No longer available',
}, {
# httpAudioLive, no explicit ext
'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
@@ -419,7 +455,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'uploader': 'njoy',
- 'upload_date': '20150810',
+ 'upload_date': '20210830',
},
'params': {
'skip_download': True,
diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py
index fbb033169..bfe52f77d 100644
--- a/yt_dlp/extractor/ndtv.py
+++ b/yt_dlp/extractor/ndtv.py
@@ -1,13 +1,7 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote_plus
-)
-from ..utils import (
- parse_duration,
- remove_end,
- unified_strdate,
- urljoin
-)
+from ..utils import parse_duration, remove_end, unified_strdate, urljoin
class NDTVIE(InfoExtractor):
@@ -80,7 +74,7 @@ class NDTVIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
# '__title' does not contain extra words such as sub-site name, "Video" etc.
- title = compat_urllib_parse_unquote_plus(
+ title = urllib.parse.unquote_plus(
self._search_regex(r"__title\s*=\s*'([^']+)'", webpage, 'title', default=None)
or self._og_search_title(webpage))
diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py
index ff9a2adf0..7057b8b26 100644
--- a/yt_dlp/extractor/nebula.py
+++ b/yt_dlp/extractor/nebula.py
@@ -1,14 +1,11 @@
import itertools
import json
import time
-import urllib
+import urllib.error
+import urllib.parse
-from ..utils import (
- ExtractorError,
- parse_iso8601,
- try_get,
-)
from .common import InfoExtractor
+from ..utils import ExtractorError, parse_iso8601, try_get
class NebulaBaseIE(InfoExtractor):
diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py
index 4def7e76b..f9a67876a 100644
--- a/yt_dlp/extractor/neteasemusic.py
+++ b/yt_dlp/extractor/neteasemusic.py
@@ -1,18 +1,12 @@
-from hashlib import md5
+import itertools
+import re
from base64 import b64encode
from datetime import datetime
-import re
+from hashlib import md5
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_str,
- compat_itertools_count,
-)
-from ..utils import (
- sanitized_Request,
- float_or_none,
-)
+from ..compat import compat_str, compat_urllib_parse_urlencode
+from ..utils import float_or_none, sanitized_Request
class NetEaseMusicBaseIE(InfoExtractor):
@@ -449,7 +443,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
name = None
desc = None
entries = []
- for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
+ for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api(
'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
% (self._PAGE_SIZE, dj_id, offset),
diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py
new file mode 100644
index 000000000..f529682a3
--- /dev/null
+++ b/yt_dlp/extractor/netverse.py
@@ -0,0 +1,176 @@
+import functools
+
+from .common import InfoExtractor
+from .dailymotion import DailymotionIE
+from ..utils import (
+ InAdvancePagedList,
+ smuggle_url,
+ traverse_obj,
+)
+
+
+class NetverseBaseIE(InfoExtractor):
+ _ENDPOINTS = {
+ 'watch': 'watchvideo',
+ 'video': 'watchvideo',
+ 'webseries': 'webseries',
+ }
+
+ def _call_api(self, url, query={}):
+ display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
+
+ json_data = self._download_json(
+ f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[sites_type]}/{display_id}',
+ display_id, query=query)
+
+ return display_id, json_data
+
+
+class NetverseIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
+ _TESTS = [{
+ # Watch video
+ 'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'info_dict': {
+ 'id': 'k4yhqUwINAGtmHx3NkL',
+ 'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
+ 'ext': 'mp4',
+ 'season': 'Season 2016',
+ 'description': 'md5:fc27747c0aa85067b6967c816f01617c',
+ 'thumbnail': 'https://vplayed-uat.s3-ap-southeast-1.amazonaws.com/images/webseries/thumbnails/2021/11/619cfce45c827.jpeg',
+ 'episode_number': 22,
+ 'series': 'Waktu Indonesia Bercanda',
+ 'episode': 'Episode 22',
+ 'uploader_id': 'x2ir3vq',
+ 'age_limit': 0,
+ 'tags': [],
+ 'view_count': int,
+ 'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
+ 'duration': 2990,
+ 'upload_date': '20210722',
+ 'timestamp': 1626919804,
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ }
+ }, {
+ # series
+ 'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
+ 'info_dict': {
+ 'id': 'x88izwc',
+ 'title': 'Jadoo Seorang Model',
+ 'ext': 'mp4',
+ 'season': 'Season 2',
+ 'description': 'md5:c616e8e59d3edf2d3d506e3736120d99',
+ 'thumbnail': 'https://storage.googleapis.com/netprime-live/images/webseries/thumbnails/2021/11/619cf63f105d3.jpeg',
+ 'episode_number': 2,
+ 'series': 'Hello Jadoo',
+ 'episode': 'Episode 2',
+ 'view_count': int,
+ 'like_count': int,
+ 'display_id': 'jadoo-seorang-model',
+ 'uploader_id': 'x2ir3vq',
+ 'duration': 635,
+ 'timestamp': 1646372927,
+ 'tags': ['PG069497-hellojadooseason2eps2'],
+ 'upload_date': '20220304',
+ 'uploader': 'Net Prime',
+ 'age_limit': 0,
+ },
+ 'skip': 'video get Geo-blocked for some country'
+ }, {
+ # non www host
+ 'url': 'https://netverse.id/watch/tetangga-baru',
+ 'info_dict': {
+ 'id': 'k4CNGz7V0HJ7vfwZbXy',
+ 'ext': 'mp4',
+ 'title': 'Tetangga Baru',
+ 'season': 'Season 1',
+ 'description': 'md5:ed6dd355bed84d139b1154c3d8d65957',
+ 'thumbnail': 'https://vplayed-uat.s3-ap-southeast-1.amazonaws.com/images/webseries/thumbnails/2021/11/619cfd9d32c5f.jpeg',
+ 'episode_number': 1,
+ 'series': 'Tetangga Masa Gitu',
+ 'episode': 'Episode 1',
+ 'timestamp': 1624538169,
+ 'view_count': int,
+ 'upload_date': '20210624',
+ 'age_limit': 0,
+ 'uploader_id': 'x2ir3vq',
+ 'like_count': int,
+ 'uploader': 'Net Prime',
+ 'tags': ['PG008534', 'tetangga', 'Baru'],
+ 'display_id': 'tetangga-baru',
+ 'duration': 1406,
+ },
+ }, {
+ # /video url
+ 'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
+ 'title': 'Namaku Choi Jadoo',
+ 'info_dict': {
+ 'id': 'x887jzz',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://storage.googleapis.com/netprime-live/images/webseries/thumbnails/2021/11/619cf63f105d3.jpeg',
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'description': 'md5:c616e8e59d3edf2d3d506e3736120d99',
+ 'title': 'Namaku Choi Jadoo',
+ 'series': 'Hello Jadoo',
+ 'episode': 'Episode 1',
+ 'age_limit': 0,
+ 'like_count': int,
+ 'view_count': int,
+ 'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
+ 'duration': 780,
+ 'display_id': 'pg067482-hellojadoo-season1',
+ 'uploader_id': 'x2ir3vq',
+ 'uploader': 'Net Prime',
+ 'timestamp': 1645764984,
+ 'upload_date': '20220225',
+ },
+ 'skip': 'This video get Geo-blocked for some country'
+ }]
+
+ def _real_extract(self, url):
+ display_id, program_json = self._call_api(url)
+ videos = program_json['response']['videos']
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': DailymotionIE.ie_key(),
+ 'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
+ 'display_id': display_id,
+ 'title': videos.get('title'),
+ 'season': videos.get('season_name'),
+ 'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
+ 'description': traverse_obj(videos, ('program_detail', 'description')),
+ 'episode_number': videos.get('episode_order'),
+ 'series': traverse_obj(videos, ('program_detail', 'title')),
+ }
+
+
+class NetversePlaylistIE(NetverseBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
+ 'info_dict': {
+ 'id': 'tetangga-masa-gitu',
+ 'title': 'Tetangga Masa Gitu',
+ },
+ 'playlist_count': 46,
+ }
+
+ def parse_playlist(self, url, page_num):
+ _, playlist_json = self._call_api(url, query={'page': page_num + 1})
+ for slug in traverse_obj(playlist_json, ('response', 'related', 'data', ..., 'slug')):
+ yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
+
+ def _real_extract(self, url):
+ _, playlist_data = self._call_api(url)
+ webseries_related_info = playlist_data['response']['related']
+ # TODO: get video from other season
+ # The season has id and the next season video is located at api_url/<season_id>?page=<page>
+ return self.playlist_result(
+ InAdvancePagedList(functools.partial(self.parse_playlist, url),
+ webseries_related_info['last_page'],
+ webseries_related_info['to'] - webseries_related_info['from'] + 1),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
+ traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index cf2ec7b79..60d76d1b1 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -11,7 +11,7 @@ from ..utils import (
class NhkBaseIE(InfoExtractor):
- _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
+ _API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
_TYPE_REGEX = r'/(?P<type>video|audio)/'
@@ -27,7 +27,7 @@ class NhkBaseIE(InfoExtractor):
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
- if episode_id.isdigit():
+ if len(episode_id) == 7:
episode_id = episode_id[:4] + '-' + episode_id[4:]
is_video = m_type == 'video'
@@ -89,7 +89,8 @@ class NhkBaseIE(InfoExtractor):
class NhkVodIE(NhkBaseIE):
- _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
+ # the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
+ _VALID_URL = r'%s%s(?P<id>[0-9a-z]{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
# Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{
@@ -129,6 +130,19 @@ class NhkVodIE(NhkBaseIE):
}, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
'only_matching': True,
+ }, {
+ # video, alphabetic character in ID #29670
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a34/',
+ 'only_matching': True,
+ 'info_dict': {
+ 'id': 'qfjay6cg',
+ 'ext': 'mp4',
+ 'title': 'DESIGN TALKS plus - Fishermen’s Finery',
+ 'description': 'md5:8a8f958aaafb0d7cb59d38de53f1e448',
+ 'thumbnail': r're:^https?:/(/[a-z0-9.-]+)+\.jpg\?w=1920&h=1080$',
+ 'upload_date': '20210615',
+ 'timestamp': 1623722008,
+ }
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index a80b544f8..82fb27631 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -647,14 +647,14 @@ class NiconicoSeriesIE(InfoExtractor):
'id': '110226',
'title': 'ご立派ァ!のシリーズ',
},
- 'playlist_mincount': 10, # as of 2021/03/17
+ 'playlist_mincount': 10,
}, {
'url': 'https://www.nicovideo.jp/series/12312/',
'info_dict': {
'id': '12312',
'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
},
- 'playlist_mincount': 97, # as of 2021/03/17
+ 'playlist_mincount': 103,
}, {
'url': 'https://nico.ms/series/203559',
'only_matching': True,
@@ -672,7 +672,7 @@ class NiconicoSeriesIE(InfoExtractor):
title = unescapeHTML(title)
playlist = [
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
- for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+ for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
return self.playlist_result(playlist, list_id, title)
diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py
index 6d93f154c..e677e862d 100644
--- a/yt_dlp/extractor/npr.py
+++ b/yt_dlp/extractor/npr.py
@@ -1,9 +1,5 @@
from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- qualities,
- url_or_none,
-)
+from ..utils import int_or_none, qualities, traverse_obj, url_or_none
class NprIE(InfoExtractor):
@@ -51,6 +47,15 @@ class NprIE(InfoExtractor):
# multimedia, no formats, stream
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
'only_matching': True,
+ }, {
+ 'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert',
+ 'info_dict': {
+ 'id': '1086468851',
+ 'ext': 'mp4',
+ 'title': 'Bonobo: Tiny Desk (Home) Concert',
+ 'duration': 1061,
+ 'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$',
+ },
}]
def _real_extract(self, url):
@@ -110,6 +115,12 @@ class NprIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats(
stream_url, stream_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
+
+ if not formats:
+ raw_json_ld = self._yield_json_ld(self._download_webpage(url, playlist_id), playlist_id, fatal=False)
+ m3u8_url = traverse_obj(list(raw_json_ld), (..., 'subjectOf', ..., 'embedUrl'), get_all=False)
+ formats = self._extract_m3u8_formats(m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
+
self._sort_formats(formats)
entries.append({
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 553c55132..fcbafe418 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -3,18 +3,17 @@ import random
import re
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
- compat_HTTPError,
- determine_ext,
ExtractorError,
+ determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
str_or_none,
try_get,
- urljoin,
url_or_none,
+ urljoin,
)
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py
index 61e3a8b86..79dad09e3 100644
--- a/yt_dlp/extractor/openload.py
+++ b/yt_dlp/extractor/openload.py
@@ -9,7 +9,6 @@ from ..utils import (
ExtractorError,
Popen,
check_executable,
- encodeArgument,
get_exe_version,
is_outdated_version,
)
@@ -132,7 +131,7 @@ class PhantomJSwrapper:
os.remove(self._TMP_FILES[name].name)
def _save_cookies(self, url):
- cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ cookies = cookie_jar_to_list(self.extractor.cookiejar)
for cookie in cookies:
if 'path' not in cookie:
cookie['path'] = '/'
@@ -213,16 +212,14 @@ class PhantomJSwrapper:
else:
self.extractor.to_screen(f'{video_id}: {note2}')
- p = Popen(
+ stdout, stderr, returncode = Popen.run(
[self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate_or_kill()
- if p.returncode != 0:
- raise ExtractorError(
- 'Executing JS failed\n:' + encodeArgument(err))
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode:
+ raise ExtractorError(f'Executing JS failed:\n{stderr}')
with open(self._TMP_FILES['html'].name, 'rb') as f:
html = f.read().decode('utf-8')
self._load_cookies()
- return (html, encodeArgument(out))
+ return html, stdout
diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py
index 8e50ffc7f..3fc05d1f2 100644
--- a/yt_dlp/extractor/peloton.py
+++ b/yt_dlp/extractor/peloton.py
@@ -1,11 +1,9 @@
import json
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse,
-)
+from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
float_or_none,
@@ -125,7 +123,7 @@ class PelotonIE(InfoExtractor):
is_live = False
if ride_data.get('content_format') == 'audio':
- url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token))
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token))
formats = [{
'url': url,
'ext': 'm4a',
@@ -138,9 +136,9 @@ class PelotonIE(InfoExtractor):
url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % (
','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]),
ride_data['vod_stream_url'],
- compat_urllib_parse.quote(compat_urllib_parse.quote(token)))
+ urllib.parse.quote(urllib.parse.quote(token)))
elif ride_data.get('live_stream_url'):
- url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token))
+ url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token))
is_live = True
else:
raise ExtractorError('Missing video URL')
diff --git a/yt_dlp/extractor/playsuisse.py b/yt_dlp/extractor/playsuisse.py
new file mode 100644
index 000000000..a635ac92f
--- /dev/null
+++ b/yt_dlp/extractor/playsuisse.py
@@ -0,0 +1,147 @@
+import json
+
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PlaySuisseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/watch/(?P<id>[0-9]+)'
+ _TESTS = [
+ {
+ 'url': 'https://www.playsuisse.ch/watch/763211/0',
+ 'md5': '82df2a470b2dfa60c2d33772a8a60cf8',
+ 'info_dict': {
+ 'id': '763211',
+ 'ext': 'mp4',
+ 'title': 'Knochen',
+ 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8',
+ 'duration': 3344,
+ 'series': 'Wilder',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Knochen',
+ 'episode_number': 1,
+ 'thumbnail': 'md5:9260abe0c0ec9b69914d0a10d54c5878'
+ }
+ },
+ {
+ 'url': 'https://www.playsuisse.ch/watch/808675/0',
+ 'md5': '818b94c1d2d7c4beef953f12cb8f3e75',
+ 'info_dict': {
+ 'id': '808675',
+ 'ext': 'mp4',
+ 'title': 'Der Läufer',
+ 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd',
+ 'duration': 5280,
+ 'episode': 'Der Läufer',
+ 'thumbnail': 'md5:44af7d65ee02bbba4576b131868bb783'
+ }
+ },
+ {
+ 'url': 'https://www.playsuisse.ch/watch/817193/0',
+ 'md5': '1d6c066f92cd7fffd8b28a53526d6b59',
+ 'info_dict': {
+ 'id': '817193',
+ 'ext': 'mp4',
+ 'title': 'Die Einweihungsparty',
+ 'description': 'md5:91ebf04d3a42cb3ab70666acf750a930',
+ 'duration': 1380,
+ 'series': 'Nr. 47',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode': 'Die Einweihungsparty',
+ 'episode_number': 1,
+ 'thumbnail': 'md5:637585fb106e3a4bcd991958924c7e44'
+ }
+ }
+ ]
+
+ _GRAPHQL_QUERY = '''
+ query AssetWatch($assetId: ID!) {
+ assetV2(id: $assetId) {
+ ...Asset
+ episodes {
+ ...Asset
+ }
+ }
+ }
+ fragment Asset on AssetV2 {
+ id
+ name
+ description
+ duration
+ episodeNumber
+ seasonNumber
+ seriesName
+ medias {
+ type
+ url
+ }
+ thumbnail16x9 {
+ ...ImageDetails
+ }
+ thumbnail2x3 {
+ ...ImageDetails
+ }
+ thumbnail16x9WithTitle {
+ ...ImageDetails
+ }
+ thumbnail2x3WithTitle {
+ ...ImageDetails
+ }
+ }
+ fragment ImageDetails on AssetImage {
+ id
+ url
+ }'''
+
+ def _get_media_data(self, media_id):
+ # NOTE In the web app, the "locale" header is used to switch between languages,
+ # However this doesn't seem to take effect when passing the header here.
+ response = self._download_json(
+ 'https://4bbepzm4ef.execute-api.eu-central-1.amazonaws.com/prod/graphql',
+ media_id, data=json.dumps({
+ 'operationName': 'AssetWatch',
+ 'query': self._GRAPHQL_QUERY,
+ 'variables': {'assetId': media_id}
+ }).encode('utf-8'),
+ headers={'Content-Type': 'application/json', 'locale': 'de'})
+
+ return response['data']['assetV2']
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+ media_data = self._get_media_data(media_id)
+ info = self._extract_single(media_data)
+ if media_data.get('episodes'):
+ info.update({
+ '_type': 'playlist',
+ 'entries': map(self._extract_single, media_data['episodes']),
+ })
+ return info
+
+ def _extract_single(self, media_data):
+ thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail'))
+
+ formats, subtitles = [], {}
+ for media in traverse_obj(media_data, 'medias', default=[]):
+ if not media.get('url') or media.get('type') != 'HLS':
+ continue
+ f, subs = self._extract_m3u8_formats_and_subtitles(
+ media['url'], media_data['id'], 'mp4', m3u8_id='HLS', fatal=False)
+ formats.extend(f)
+ self._merge_subtitles(subs, target=subtitles)
+
+ return {
+ 'id': media_data['id'],
+ 'title': media_data.get('name'),
+ 'description': media_data.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(media_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series': media_data.get('seriesName'),
+ 'season_number': int_or_none(media_data.get('seasonNumber')),
+ 'episode': media_data.get('name'),
+ 'episode_number': int_or_none(media_data.get('episodeNumber')),
+ }
diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py
index 5ffefc934..18aeda7de 100644
--- a/yt_dlp/extractor/playvid.py
+++ b/yt_dlp/extractor/playvid.py
@@ -1,14 +1,9 @@
import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
-)
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError, clean_html
class PlayvidIE(InfoExtractor):
@@ -62,7 +57,7 @@ class PlayvidIE(InfoExtractor):
val = videovars_match.group(2)
if key == 'title':
- video_title = compat_urllib_parse_unquote_plus(val)
+ video_title = urllib.parse.unquote_plus(val)
if key == 'duration':
try:
duration = int(val)
diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py
index eef0d02ca..0911893d4 100644
--- a/yt_dlp/extractor/pokemon.py
+++ b/yt_dlp/extractor/pokemon.py
@@ -1,5 +1,3 @@
-import re
-
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@@ -136,42 +134,3 @@ class PokemonWatchIE(InfoExtractor):
'episode': video_data.get('title'),
'episode_number': int_or_none(video_data.get('episode')),
})
-
-
-class PokemonSoundLibraryIE(InfoExtractor):
- _VALID_URL = r'https?://soundlibrary\.pokemon\.co\.jp'
-
- _TESTS = [{
- 'url': 'https://soundlibrary.pokemon.co.jp/',
- 'info_dict': {
- 'title': 'Pokémon Diamond and Pearl Sound Tracks',
- },
- 'playlist_mincount': 149,
- }]
-
- def _real_extract(self, url):
- musicbox_webpage = self._download_webpage(
- 'https://soundlibrary.pokemon.co.jp/musicbox', None,
- 'Downloading list of songs')
- song_titles = [x.group(1) for x in re.finditer(r'<span>([^>]+?)</span><br/>をてもち曲に加えます。', musicbox_webpage)]
- song_titles = song_titles[4::2]
-
- # each songs don't have permalink; instead we return all songs at once
- song_entries = [{
- 'id': f'pokemon-soundlibrary-{song_id}',
- 'url': f'https://soundlibrary.pokemon.co.jp/api/assets/signing/sounds/wav/{song_id}.wav',
- # note: the server always serves MP3 files, despite its extension of the URL above
- 'ext': 'mp3',
- 'acodec': 'mp3',
- 'vcodec': 'none',
- 'title': song_title,
- 'track': song_title,
- 'artist': 'Nintendo / Creatures Inc. / GAME FREAK inc.',
- 'uploader': 'Pokémon',
- 'release_year': 2006,
- 'release_date': '20060928',
- 'track_number': song_id,
- 'album': 'Pokémon Diamond and Pearl',
- } for song_id, song_title in enumerate(song_titles, 1)]
-
- return self.playlist_result(song_entries, playlist_title='Pokémon Diamond and Pearl Sound Tracks')
diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py
index ed741a07b..ddc5ec8c8 100644
--- a/yt_dlp/extractor/popcorntimes.py
+++ b/yt_dlp/extractor/popcorntimes.py
@@ -1,8 +1,5 @@
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_chr,
-)
+from ..compat import compat_b64decode
from ..utils import int_or_none
@@ -50,7 +47,7 @@ class PopcorntimesIE(InfoExtractor):
c_ord += 13
if upper < c_ord:
c_ord -= 26
- loc_b64 += compat_chr(c_ord)
+ loc_b64 += chr(c_ord)
video_url = compat_b64decode(loc_b64).decode('utf-8')
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index d296ccacb..35468b4fc 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -3,28 +3,26 @@ import itertools
import math
import operator
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
-)
from .openload import PhantomJSwrapper
+from ..compat import compat_HTTPError, compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
clean_html,
determine_ext,
- ExtractorError,
format_field,
int_or_none,
merge_dicts,
- NO_DEFAULT,
orderedSet,
remove_quotes,
+ remove_start,
str_to_int,
update_url_query,
- urlencode_postdata,
url_or_none,
+ urlencode_postdata,
)
@@ -49,7 +47,7 @@ class PornHubBaseIE(InfoExtractor):
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
- if isinstance(url_or_request, compat_urllib_request.Request)
+ if isinstance(url_or_request, urllib.request.Request)
else url_or_request)
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
@@ -199,6 +197,16 @@ class PornHubIE(PornHubBaseIE):
},
'skip': 'This video has been disabled',
}, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a',
+ 'info_dict': {
+ 'id': 'ph601dc30bae19a',
+ 'uploader': 'Projekt Melody',
+ 'uploader_id': 'projekt-melody',
+ 'upload_date': '20210205',
+ 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)',
+ 'thumbnail': r're:https?://.+',
+ },
+ }, {
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
'only_matching': True,
}, {
@@ -429,7 +437,7 @@ class PornHubIE(PornHubBaseIE):
default=None))
formats.append({
'url': format_url,
- 'format_id': format_field(height, template='%dp'),
+ 'format_id': format_field(height, None, '%dp'),
'height': height,
})
@@ -457,9 +465,11 @@ class PornHubIE(PornHubBaseIE):
self._sort_formats(
formats, field_preference=('height', 'width', 'fps', 'format_id'))
+ model_profile = self._search_json(
+ r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False)
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', default=None)
+ webpage, 'uploader', default=None) or model_profile.get('username')
def extract_vote_count(kind, name):
return self._extract_count(
@@ -488,6 +498,7 @@ class PornHubIE(PornHubBaseIE):
return merge_dicts({
'id': video_id,
'uploader': video_uploader,
+ 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'),
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
diff --git a/yt_dlp/extractor/premiershiprugby.py b/yt_dlp/extractor/premiershiprugby.py
new file mode 100644
index 000000000..67d41fdfd
--- /dev/null
+++ b/yt_dlp/extractor/premiershiprugby.py
@@ -0,0 +1,39 @@
+from .common import InfoExtractor
+from ..utils import int_or_none, traverse_obj
+
+
+class PremiershipRugbyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons',
+ 'info_dict': {
+ 'id': '0_mbkb7ldt',
+ 'title': 'Full Match: Harlequins v Newcastle Falcons',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75',
+ 'duration': 6093.0,
+ 'tags': ['video'],
+ 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'],
+ }
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ json_data = self._download_json(
+ f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}',
+ display_id, query={'clientId': 'PRL'})['data']['article']
+
+ formats, subs = self._extract_m3u8_formats_and_subtitles(
+ json_data['heroMedia']['content']['videoLink'], display_id)
+
+ return {
+ 'id': json_data['heroMedia']['content']['sourceSystemId'],
+ 'display_id': display_id,
+ 'title': traverse_obj(json_data, ('heroMedia', 'title')),
+ 'formats': formats,
+ 'subtitles': subs,
+ 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')),
+ 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000),
+ 'tags': json_data.get('tags'),
+ 'categories': traverse_obj(json_data, ('categories', ..., 'text')),
+ }
diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py
index 3c13d1f56..38c5d1109 100644
--- a/yt_dlp/extractor/puls4.py
+++ b/yt_dlp/extractor/puls4.py
@@ -1,9 +1,6 @@
from .prosiebensat1 import ProSiebenSat1BaseIE
-from ..utils import (
- unified_strdate,
- parse_duration,
- compat_str,
-)
+from ..compat import compat_str
+from ..utils import parse_duration, unified_strdate
class Puls4IE(ProSiebenSat1BaseIE):
diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py
index dbb748715..498cc6be9 100644
--- a/yt_dlp/extractor/radiko.py
+++ b/yt_dlp/extractor/radiko.py
@@ -43,7 +43,7 @@ class RadikoBaseIE(InfoExtractor):
}).split(',')[0]
auth_data = (auth_token, area_id)
- self._downloader.cache.store('radiko', 'auth_data', auth_data)
+ self.cache.store('radiko', 'auth_data', auth_data)
return auth_data
def _extract_full_key(self):
@@ -150,7 +150,7 @@ class RadikoIE(RadikoBaseIE):
vid_int = unified_timestamp(video_id, False)
prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int)
- auth_cache = self._downloader.cache.load('radiko', 'auth_data')
+ auth_cache = self.cache.load('radiko', 'auth_data')
for attempt in range(2):
auth_token, area_id = (not attempt and auth_cache) or self._auth_client()
formats = self._extract_formats(
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index 8fef54dab..7b60b2617 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -1,6 +1,7 @@
import re
from .common import InfoExtractor
+from ..utils import parse_duration, unified_strdate
class RadioFranceIE(InfoExtractor):
@@ -54,3 +55,51 @@ class RadioFranceIE(InfoExtractor):
'description': description,
'uploader': uploader,
}
+
+
+class FranceCultureIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?radiofrance\.fr/(?:franceculture|fip|francemusique|mouv|franceinter)/podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d+)($|[?#])'
+ _TESTS = [
+ {
+ 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487',
+ 'info_dict': {
+ 'id': '8440487',
+ 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau',
+ 'ext': 'mp3',
+ 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?',
+ 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?',
+ 'thumbnail': 'https://cdn.radiofrance.fr/s3/cruiser-production/2022/05/d184e7a3-4827-4494-bf94-04ed7b120db4/1200x630_gettyimages-200171095-001.jpg',
+ 'upload_date': '20220514',
+ 'duration': 2750,
+ },
+ },
+ {
+ 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
+ webpage = self._download_webpage(url, display_id)
+
+ # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
+ video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_data['contentUrl'],
+ 'ext': video_data.get('encodingFormat'),
+ 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=self._og_search_title(webpage)),
+ 'description': self._html_search_regex(
+ r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'uploader': self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None),
+ 'upload_date': unified_strdate(self._search_regex(
+ r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False))
+ }
diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py
index dc9897305..d89c9563b 100644
--- a/yt_dlp/extractor/radlive.py
+++ b/yt_dlp/extractor/radlive.py
@@ -80,7 +80,7 @@ class RadLiveIE(InfoExtractor):
'release_timestamp': release_date,
'channel': channel.get('name'),
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://rad.live/content/channel/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'),
}
if content_type == 'episode':
diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py
index ad53d697e..119c5ea3c 100644
--- a/yt_dlp/extractor/rokfin.py
+++ b/yt_dlp/extractor/rokfin.py
@@ -146,7 +146,7 @@ class RokfinIE(InfoExtractor):
for page_n in itertools.count():
raw_comments = self._download_json(
f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50',
- video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, template=" of %s")}',
+ video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}',
fatal=False) or {}
for comment in raw_comments.get('content') or []:
@@ -318,7 +318,7 @@ class RokfinChannelIE(RokfinPlaylistBaseIE):
data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}'
metadata = self._download_json(
data_url, channel_name,
- note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, template=" of %s")}')
+ note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}')
yield from self._get_video_data(metadata)
pages_total = int_or_none(metadata.get('totalPages')) or None
@@ -360,7 +360,7 @@ class RokfinSearchIE(SearchInfoExtractor):
_db_access_key = None
def _real_initialize(self):
- self._db_url, self._db_access_key = self._downloader.cache.load(self.ie_key(), 'auth', default=(None, None))
+ self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None))
if not self._db_url:
self._get_db_access_credentials()
@@ -369,7 +369,7 @@ class RokfinSearchIE(SearchInfoExtractor):
for page_number in itertools.count(1):
search_results = self._run_search_query(
query, data={'query': query, 'page': {'size': 100, 'current': page_number}},
- note=f'Downloading page {page_number}{format_field(total_pages, template=" of ~%s")}')
+ note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}')
total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none)
for result in search_results.get('results') or []:
@@ -405,6 +405,6 @@ class RokfinSearchIE(SearchInfoExtractor):
self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json')
self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}'
- self._downloader.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
+ self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key))
return
raise ExtractorError('Unable to extract access credentials')
diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py
index 42a602968..798dde7fa 100644
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@@ -1,14 +1,12 @@
import base64
import io
+import struct
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_struct_unpack,
-)
+from ..compat import compat_b64decode
from ..utils import (
- determine_ext,
ExtractorError,
+ determine_ext,
float_or_none,
qualities,
remove_end,
@@ -73,7 +71,7 @@ class RTVEALaCartaIE(InfoExtractor):
def _decrypt_url(png):
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
while True:
- length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+ length = struct.unpack('!I', encrypted_data.read(4))[0]
chunk_type = encrypted_data.read(4)
if chunk_type == b'IEND':
break
diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py
index 50c383d79..924f9829f 100644
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@@ -24,6 +24,11 @@ class RumbleEmbedIE(InfoExtractor):
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
'timestamp': 1571611968,
'upload_date': '20191020',
+ 'channel_url': 'https://rumble.com/c/WMAR',
+ 'channel': 'WMAR',
+ 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.OvCc-small-WMAR-2-News-Latest-Headline.jpg',
+ 'duration': 234,
+ 'uploader': 'WMAR',
}
}, {
'url': 'https://rumble.com/embed/vslb7v',
@@ -38,19 +43,21 @@ class RumbleEmbedIE(InfoExtractor):
'channel': 'CTNews',
'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg',
'duration': 901,
+ 'uploader': 'CTNews',
}
}, {
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
'only_matching': True,
}]
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL,
- webpage)]
+ @classmethod
+ def _extract_urls(cls, webpage):
+ embeds = tuple(re.finditer(
+ fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{cls._VALID_URL})', webpage))
+ if embeds:
+ return [mobj.group('url') for mobj in embeds]
+ return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer(
+ r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -77,17 +84,26 @@ class RumbleEmbedIE(InfoExtractor):
formats.append(f)
self._sort_formats(formats)
+ subtitles = {
+ lang: [{
+ 'url': sub_info['path'],
+ 'name': sub_info.get('language') or '',
+ }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path')
+ }
+
author = video.get('author') or {}
return {
'id': video_id,
'title': title,
'formats': formats,
+ 'subtitles': subtitles,
'thumbnail': video.get('i'),
'timestamp': parse_iso8601(video.get('pubDate')),
'channel': author.get('name'),
'channel_url': author.get('url'),
'duration': int_or_none(video.get('duration')),
+ 'uploader': author.get('name'),
}
diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py
index e3dbaab69..df5e79bef 100644
--- a/yt_dlp/extractor/screencast.py
+++ b/yt_dlp/extractor/screencast.py
@@ -1,11 +1,8 @@
+import urllib.request
+
from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_request,
-)
-from ..utils import (
- ExtractorError,
-)
+from ..compat import compat_parse_qs
+from ..utils import ExtractorError
class ScreencastIE(InfoExtractor):
@@ -75,7 +72,7 @@ class ScreencastIE(InfoExtractor):
flash_vars_s = flash_vars_s.replace(',', '&')
if flash_vars_s:
flash_vars = compat_parse_qs(flash_vars_s)
- video_url_raw = compat_urllib_request.quote(
+ video_url_raw = urllib.request.quote(
flash_vars['content'][0])
video_url = video_url_raw.replace('http%3A', 'http:')
diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py
index 5bc097b0d..9a237b320 100644
--- a/yt_dlp/extractor/shared.py
+++ b/yt_dlp/extractor/shared.py
@@ -1,14 +1,13 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_urllib_parse_unquote_plus,
-)
+from ..compat import compat_b64decode
from ..utils import (
- determine_ext,
+ KNOWN_EXTENSIONS,
ExtractorError,
+ determine_ext,
int_or_none,
js_to_json,
- KNOWN_EXTENSIONS,
parse_filesize,
rot47,
url_or_none,
@@ -130,7 +129,7 @@ class VivoIE(SharedBaseIE):
return stream_url
def decode_url(encoded_url):
- return rot47(compat_urllib_parse_unquote_plus(encoded_url))
+ return rot47(urllib.parse.unquote_plus(encoded_url))
return decode_url(self._parse_json(
self._search_regex(
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 6dfa50c60..9e4c8cf25 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -67,7 +67,7 @@ class SoundcloudBaseIE(InfoExtractor):
_HEADERS = {}
def _store_client_id(self, client_id):
- self._downloader.cache.store('soundcloud', 'client_id', client_id)
+ self.cache.store('soundcloud', 'client_id', client_id)
def _update_client_id(self):
webpage = self._download_webpage('https://soundcloud.com/', None)
@@ -104,7 +104,7 @@ class SoundcloudBaseIE(InfoExtractor):
raise
def _initialize_pre_login(self):
- self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+ self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
def _perform_login(self, username, password):
if username != 'oauth':
diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py
index 855f1d6d3..7381ac362 100644
--- a/yt_dlp/extractor/southpark.py
+++ b/yt_dlp/extractor/southpark.py
@@ -109,6 +109,49 @@ class SouthParkDeIE(SouthParkIE):
return
+class SouthParkLatIE(SouthParkIE):
+ IE_NAME = 'southpark.lat'
+ _VALID_URL = r'https?://(?:www\.)?southpark\.lat/(?:en/)?(?:video-?clips?|collections|episod(?:e|io)s)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.southpark.lat/en/collections/29ve08/south-park-heating-up/lydbrc',
+ 'only_matching': True,
+ }, {
+ # clip
+ 'url': 'https://www.southpark.lat/en/video-clips/ct46op/south-park-tooth-fairy-cartman',
+ 'info_dict': {
+ 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30',
+ 'ext': 'mp4',
+ 'title': 'Tooth Fairy Cartman',
+ 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68',
+ },
+ }, {
+ # episode
+ 'url': 'https://www.southpark.lat/episodios/9h0qbg/south-park-orgia-gatuna-temporada-3-ep-7',
+ 'info_dict': {
+ 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'South Park',
+ 'description': 'md5:ae0d875eff169dcbed16b21531857ac1',
+ },
+ }]
+
+ def _get_feed_url(self, uri, url=None):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ f'http://media.mtvnservices.com/pmt/e1/access/index.html?uri={uri}&configtype=edge&ref={url}',
+ video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _get_feed_query(self, uri):
+ return
+
+
class SouthParkNlIE(SouthParkIE):
IE_NAME = 'southpark.nl'
_VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py
index a2068a1b6..fef8d8dd2 100644
--- a/yt_dlp/extractor/spotify.py
+++ b/yt_dlp/extractor/spotify.py
@@ -1,12 +1,15 @@
+import functools
import json
import re
from .common import InfoExtractor
from ..utils import (
+ OnDemandPagedList,
clean_podcast_url,
float_or_none,
int_or_none,
strip_or_none,
+ traverse_obj,
try_get,
unified_strdate,
)
@@ -25,7 +28,7 @@ class SpotifyBaseIE(InfoExtractor):
self._ACCESS_TOKEN = self._download_json(
'https://open.spotify.com/get_access_token', None)['accessToken']
- def _call_api(self, operation, video_id, variables):
+ def _call_api(self, operation, video_id, variables, **kwargs):
return self._download_json(
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
'operationName': 'query' + operation,
@@ -35,7 +38,8 @@ class SpotifyBaseIE(InfoExtractor):
'sha256Hash': self._OPERATION_HASHES[operation],
},
})
- }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
+ }, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN},
+ **kwargs)['data']
def _extract_episode(self, episode, series):
episode_id = episode['id']
@@ -143,22 +147,25 @@ class SpotifyShowIE(SpotifyBaseIE):
},
'playlist_mincount': 36,
}
+ _PER_PAGE = 100
+
+ def _fetch_page(self, show_id, page=0):
+ return self._call_api('ShowEpisodes', show_id, {
+ 'limit': 100,
+ 'offset': page * self._PER_PAGE,
+ 'uri': f'spotify:show:{show_id}',
+ }, note=f'Downloading page {page + 1} JSON metadata')['podcast']
def _real_extract(self, url):
show_id = self._match_id(url)
- podcast = self._call_api('ShowEpisodes', show_id, {
- 'limit': 1000000000,
- 'offset': 0,
- 'uri': 'spotify:show:' + show_id,
- })['podcast']
- podcast_name = podcast.get('name')
-
- entries = []
- for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
- episode = item.get('episode')
- if not episode:
- continue
- entries.append(self._extract_episode(episode, podcast_name))
+ first_page = self._fetch_page(show_id)
+
+ def _entries(page):
+ podcast = self._fetch_page(show_id, page) if page else first_page
+ yield from map(
+ functools.partial(self._extract_episode, series=podcast.get('name')),
+ traverse_obj(podcast, ('episodes', 'items', ..., 'episode')))
return self.playlist_result(
- entries, show_id, podcast_name, podcast.get('description'))
+ OnDemandPagedList(_entries, self._PER_PAGE),
+ show_id, first_page.get('name'), first_page.get('description'))
diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py
index 716190220..035747c31 100644
--- a/yt_dlp/extractor/storyfire.py
+++ b/yt_dlp/extractor/storyfire.py
@@ -44,7 +44,7 @@ class StoryFireBaseIE(InfoExtractor):
'timestamp': int_or_none(video.get('publishDate')),
'uploader': video.get('username'),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://storyfire.com/user/%s/video'),
+ 'uploader_url': format_field(uploader_id, None, 'https://storyfire.com/user/%s/video'),
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
}
diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py
index 85fc3a3c3..849a9882d 100644
--- a/yt_dlp/extractor/streamcz.py
+++ b/yt_dlp/extractor/streamcz.py
@@ -52,8 +52,8 @@ class StreamCZIE(InfoExtractor):
def _extract_formats(self, spl_url, video):
for ext, pref, streams in (
- ('ts', -1, traverse_obj(video, ('http_stream', 'qualities'))),
- ('mp4', 1, video.get('mp4'))):
+ ('ts', -1, traverse_obj(video, ('http_stream', 'qualities')) or {}),
+ ('mp4', 1, video.get('mp4') or {})):
for format_id, stream in streams.items():
if not stream.get('url'):
continue
diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py
index 618dc4329..c879fb52e 100644
--- a/yt_dlp/extractor/stv.py
+++ b/yt_dlp/extractor/stv.py
@@ -1,6 +1,6 @@
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
- compat_str,
float_or_none,
int_or_none,
smuggle_url,
diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py
new file mode 100644
index 000000000..70cf10515
--- /dev/null
+++ b/yt_dlp/extractor/substack.py
@@ -0,0 +1,100 @@
+import re
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import str_or_none, traverse_obj
+
+
+class SubstackIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<username>[\w-]+)\.substack\.com/p/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://haleynahman.substack.com/p/i-made-a-vlog?s=r',
+ 'md5': 'f27e4fc6252001d48d479f45e65cdfd5',
+ 'info_dict': {
+ 'id': '47660949',
+ 'ext': 'mp4',
+ 'title': 'I MADE A VLOG',
+ 'description': 'md5:10c01ff93439a62e70ce963b2aa0b7f6',
+ 'thumbnail': 'md5:bec758a34d8ee9142d43bcebdf33af18',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://haleynahman.substack.com/p/-dear-danny-i-found-my-boyfriends?s=r',
+ 'md5': '0a63eacec877a1171a62cfa69710fcea',
+ 'info_dict': {
+ 'id': '51045592',
+ 'ext': 'mpga',
+ 'title': "🎧 Dear Danny: I found my boyfriend's secret Twitter account",
+ 'description': 'md5:a57f2439319e56e0af92dd0c95d75797',
+ 'thumbnail': 'md5:daa40b6b79249417c14ff8103db29639',
+ 'uploader': 'Maybe Baby',
+ 'uploader_id': '33628',
+ }
+ }, {
+ 'url': 'https://andrewzimmern.substack.com/p/mussels-with-black-bean-sauce-recipe',
+ 'md5': 'fd3c07077b02444ff0130715b5f632bb',
+ 'info_dict': {
+ 'id': '47368578',
+ 'ext': 'mp4',
+ 'title': 'Mussels with Black Bean Sauce: Recipe of the Week #7',
+ 'description': 'md5:b96234a2906c7d854d5229818d889515',
+ 'thumbnail': 'md5:e30bfaa9da40e82aa62354263a9dd232',
+ 'uploader': "Andrew Zimmern's Spilled Milk ",
+ 'uploader_id': '577659',
+ }
+ }]
+
+ @classmethod
+ def _extract_url(cls, webpage, url):
+ if not re.search(r'<script[^>]+src=["\']https://substackcdn.com/[^"\']+\.js', webpage):
+ return
+
+ mobj = re.search(r'{[^}]*["\']subdomain["\']\s*:\s*["\'](?P<subdomain>[^"]+)', webpage)
+ if mobj:
+ parsed = urllib.parse.urlparse(url)
+ return parsed._replace(netloc=f'{mobj.group("subdomain")}.substack.com').geturl()
+
+ def _extract_video_formats(self, video_id, username):
+ formats, subtitles = [], {}
+ for video_format in ('hls', 'mp4'):
+ video_url = f'https://{username}.substack.com/api/v1/video/upload/{video_id}/src?type={video_format}'
+
+ if video_format == 'hls':
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(video_url, video_id, 'mp4', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
+ else:
+ formats.append({
+ 'url': video_url,
+ 'ext': video_format,
+ })
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ display_id, username = self._match_valid_url(url).group('id', 'username')
+ webpage = self._download_webpage(url, display_id)
+
+ webpage_info = self._search_json(r'<script[^>]*>\s*window\._preloads\s*=', webpage, 'preloads', display_id)
+
+ post_type = webpage_info['post']['type']
+ formats, subtitles = [], {}
+ if post_type == 'podcast':
+ formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {}
+ elif post_type == 'video':
+ formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], username)
+ else:
+ self.raise_no_formats(f'Page type "{post_type}" is not supported')
+
+ self._sort_formats(formats)
+ return {
+ 'id': str(webpage_info['post']['id']),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'title': traverse_obj(webpage_info, ('post', 'title')),
+ 'description': traverse_obj(webpage_info, ('post', 'description')),
+ 'thumbnail': traverse_obj(webpage_info, ('post', 'cover_image')),
+ 'uploader': traverse_obj(webpage_info, ('pub', 'name')),
+ 'uploader_id': str_or_none(traverse_obj(webpage_info, ('post', 'publication_id'))),
+ }
diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py
index 80acaf190..3bd7ce3c4 100644
--- a/yt_dlp/extractor/tennistv.py
+++ b/yt_dlp/extractor/tennistv.py
@@ -1,16 +1,17 @@
-import json
+import urllib.parse
from .common import InfoExtractor
-
from ..utils import (
ExtractorError,
+ random_uuidv4,
unified_timestamp,
+ urlencode_postdata,
)
class TennisTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
- _TEST = {
+ _TESTS = [{
'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
'info_dict': {
'id': 'indian-wells-2018-verdasco-fritz',
@@ -25,86 +26,132 @@ class TennisTVIE(InfoExtractor):
'skip_download': True,
},
'skip': 'Requires email and password of a subscribed account',
- }
+ }, {
+ 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5',
+ 'info_dict': {
+ 'id': '2650480',
+ 'ext': 'mp4',
+ 'title': 'Best Matches of 2022 - Part 5',
+ 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c',
+ 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'skip': 'Requires email and password of a subscribed account',
+ }]
_NETRC_MACHINE = 'tennistv'
- _session_token = None
-
- def _perform_login(self, username, password):
- login_form = {
- 'Email': username,
- 'Password': password,
- }
- login_json = json.dumps(login_form).encode('utf-8')
- headers = {
- 'content-type': 'application/json',
- 'Referer': 'https://www.tennistv.com/login',
- 'Origin': 'https://www.tennistv.com',
- }
-
- login_result = self._download_json(
- 'https://www.tennistv.com/api/users/v1/login', None,
- note='Logging in',
- errnote='Login failed (wrong password?)',
- headers=headers,
- data=login_json)
+ access_token, refresh_token = None, None
+ _PARTNER_ID = 3001482
+ _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}'
+ _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect'
+ _HEADERS = {
+ 'origin': 'https://www.tennistv.com',
+ 'referer': 'https://www.tennistv.com/',
+ 'content-Type': 'application/x-www-form-urlencoded'
+ }
- if login_result['error']['errorCode']:
- raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage']))
+ def _perform_login(self, username, password):
+ login_page = self._download_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page',
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://tennistv.com',
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid'
+ })
+
+ post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url')
+ temp_page = self._download_webpage(
+ post_url, None, 'Sending login data', 'Unable to send login data',
+ headers=self._HEADERS, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ 'submitAction': 'Log In'
+ }))
+ if 'Your username or password was incorrect' in temp_page:
+ raise ExtractorError('Your username or password was incorrect', expected=True)
+
+ handle = self._request_webpage(
+ f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS,
+ query={
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
+ 'state': random_uuidv4(),
+ 'response_mode': 'fragment',
+ 'response_type': 'code',
+ 'scope': 'openid',
+ 'nonce': random_uuidv4(),
+ 'prompt': 'none'
+ })
+
+ self.get_token(None, {
+ 'code': urllib.parse.parse_qs(handle.geturl())['code'][-1],
+ 'grant_type': 'authorization_code',
+ 'client_id': 'tennis-tv-web',
+ 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html'
+ })
+
+ def get_token(self, video_id, payload):
+ res = self._download_json(
+ f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens',
+ 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload))
+
+ self.access_token = res.get('access_token') or self.access_token
+ self.refresh_token = res.get('refresh_token') or self.refresh_token
- if login_result['entitlement'] != 'SUBSCRIBED':
- self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME))
+ def _real_initialize(self):
+ if self.access_token and self.refresh_token:
+ return
- self._session_token = login_result['sessionToken']
+ cookies = self._get_cookies('https://www.tennistv.com/')
+ if not cookies.get('access_token') or not cookies.get('refresh_token'):
+ self.raise_login_required()
+ self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value
- def _real_initialize(self):
- if not self._session_token:
- raise self.raise_login_required('Login info is needed for this website', method='password')
+ def _download_session_json(self, video_id, entryid,):
+ return self._download_json(
+ f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}',
+ video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id')
+ entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID')
+ session_json = self._download_session_json(video_id, entryid)
- headers = {
- 'Origin': 'https://www.tennistv.com',
- 'authorization': 'ATP %s' % self._session_token,
- 'content-type': 'application/json',
- 'Referer': url,
- }
- check_data = {
- 'videoID': internal_id,
- 'VideoUrlType': 'HLS',
- }
- check_json = json.dumps(check_data).encode('utf-8')
- check_result = self._download_json(
- 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva',
- video_id, note='Checking video authorization', headers=headers, data=check_json)
- formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4')
- self._sort_formats(formats)
+ k_session = session_json.get('KSession')
+ if k_session is None:
+ self.get_token(video_id, {
+ 'grant_type': 'refresh_token',
+ 'refresh_token': self.refresh_token,
+ 'client_id': 'tennis-tv-web'
+ })
+ k_session = self._download_session_json(video_id, entryid).get('KSession')
+ if k_session is None:
+ raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True)
- vdata = self._download_json(
- 'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id,
- video_id, headers=headers)
+ if session_json.get('ErrorMessage'):
+ self.report_warning(session_json['ErrorMessage'])
- timestamp = unified_timestamp(vdata['timestamp'])
- thumbnail = vdata['video']['thumbnailUrl']
- description = vdata['displayText']['description']
- title = vdata['video']['title']
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
- series = vdata['tour']
- venue = vdata['displayText']['venue']
- round_str = vdata['seo']['round']
+ self._sort_formats(formats)
return {
'id': video_id,
- 'title': title,
- 'description': description,
+ 'title': self._html_extract_title(webpage) or self._og_search_title(webpage),
+ 'description': self._html_search_regex(
+ (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')),
+ webpage, 'description', fatal=False),
+ 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920',
+ 'timestamp': unified_timestamp(self._html_search_regex(
+ r'<span itemprop="description" content=["\']([^"\']+)["\']>', webpage, 'upload time')),
+ 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None,
+ 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None,
+ 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None,
'formats': formats,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'series': series,
- 'season': venue,
- 'episode': round_str,
+ 'subtitles': subtitles,
}
diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py
index 32cae429e..d205fe053 100644
--- a/yt_dlp/extractor/testurl.py
+++ b/yt_dlp/extractor/testurl.py
@@ -11,7 +11,7 @@ class TestURLIE(InfoExtractor):
_VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
def _real_extract(self, url):
- from ..extractor import gen_extractor_classes
+ from . import gen_extractor_classes
extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 4ba993582..680358d5e 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1,28 +1,27 @@
import itertools
+import json
import random
+import re
import string
import time
-import json
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse
-)
+from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
from ..utils import (
ExtractorError,
HEADRequest,
+ LazyList,
UnsupportedError,
+ get_element_by_id,
get_first,
int_or_none,
join_nonempty,
- LazyList,
+ qualities,
srt_subtitles_timecode,
str_or_none,
traverse_obj,
try_get,
url_or_none,
- qualities,
)
@@ -35,6 +34,21 @@ class TikTokBaseIE(InfoExtractor):
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
_WEBPAGE_HOST = 'https://www.tiktok.com/'
QUALITIES = ('360p', '540p', '720p', '1080p')
+ _session_initialized = False
+
+ @staticmethod
+ def _create_url(user_id, video_id):
+ return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}'
+
+ def _get_sigi_state(self, webpage, display_id):
+ return self._parse_json(get_element_by_id(
+ 'SIGI_STATE|sigi-persisted-data', webpage, escape_value=False), display_id)
+
+ def _real_initialize(self):
+ if self._session_initialized:
+ return
+ self._request_webpage(HEADRequest('https://www.tiktok.com'), None, note='Setting up session', fatal=False)
+ TikTokBaseIE._session_initialized = True
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'):
@@ -261,6 +275,9 @@ class TikTokBaseIE(InfoExtractor):
return {
'id': aweme_id,
+ 'extractor_key': TikTokIE.ie_key(),
+ 'extractor': TikTokIE.IE_NAME,
+ 'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
'title': aweme_detail.get('desc'),
'description': aweme_detail.get('desc'),
'view_count': int_or_none(stats_info.get('play_count')),
@@ -361,7 +378,7 @@ class TikTokBaseIE(InfoExtractor):
class TikTokIE(TikTokBaseIE):
- _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P<id>\d+)'
+ _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P<user_id>[\w\.-]+)/video)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610',
@@ -459,14 +476,14 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'expected_warnings': ['Video not available']
+ 'expected_warnings': ['trying with webpage', 'Unable to find video in feed']
}, {
# Video without title and description
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
'info_dict': {
'id': '7059698374567611694',
'ext': 'mp4',
- 'title': 'tiktok video #7059698374567611694',
+ 'title': 'TikTok video #7059698374567611694',
'description': '',
'uploader': 'pokemonlife22',
'creator': 'Pokemon',
@@ -483,13 +500,40 @@ class TikTokIE(TikTokBaseIE):
'repost_count': int,
'comment_count': int,
},
- 'expected_warnings': ['Video not available', 'Creating a generic title']
+ }, {
+ # hydration JSON is sent in a <script> element
+ 'url': 'https://www.tiktok.com/@denidil6/video/7065799023130643713',
+ 'info_dict': {
+ 'id': '7065799023130643713',
+ 'ext': 'mp4',
+ 'title': '#denidil#денидил',
+ 'description': '#denidil#денидил',
+ 'uploader': 'denidil6',
+ 'uploader_id': '7046664115636405250',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAsvMSzFdQ4ikl3uR2TEJwMBbB2yZh2Zxwhx-WCo3rbDpAharE3GQCrFuJArI3C8QJ',
+ 'artist': 'Holocron Music',
+ 'album': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'track': 'Wolf Sounds (1 Hour) Enjoy the Company of the Animal That Is the Majestic King of the Night',
+ 'timestamp': 1645134536,
+ 'duration': 26,
+ 'upload_date': '20220217',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'expected_warnings': ['trying feed workaround', 'Unable to find video in feed']
}, {
# Auto-captions available
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
'only_matching': True
}]
+ @classmethod
+ def _extract_urls(cls, webpage):
+ return [mobj.group('url') for mobj in re.finditer(
+ rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{cls._VALID_URL})', webpage)]
+
def _extract_aweme_app(self, aweme_id):
try:
aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
@@ -497,7 +541,7 @@ class TikTokIE(TikTokBaseIE):
if not aweme_detail:
raise ExtractorError('Video not available', video_id=aweme_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with feed workaround')
+ self.report_warning(f'{e.orig_msg}; trying feed workaround')
feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id,
note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or []
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None)
@@ -506,26 +550,20 @@ class TikTokIE(TikTokBaseIE):
return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
- video_id = self._match_id(url)
-
+ video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with webpage')
+ self.report_warning(f'{e}; trying with webpage')
- # If we only call once, we get a 403 when downlaoding the video.
- self._download_webpage(url, video_id)
- webpage = self._download_webpage(url, video_id, note='Downloading video webpage')
+ url = self._create_url(user_id, video_id)
+ webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
-
if next_data:
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct'), expected_type=dict)
else:
- sigi_json = self._search_regex(
- r'>\s*window\[[\'"]SIGI_STATE[\'"]\]\s*=\s*(?P<sigi_state>{.+});',
- webpage, 'sigi data', group='sigi_state')
- sigi_data = self._parse_json(sigi_json, video_id)
+ sigi_data = self._get_sigi_state(webpage, video_id)
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode'), expected_type=int) or 0
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
@@ -841,7 +879,7 @@ class DouyinIE(TikTokIE):
try:
return self._extract_aweme_app(video_id)
except ExtractorError as e:
- self.report_warning(f'{e}; Retrying with webpage')
+ self.report_warning(f'{e}; trying with webpage')
webpage = self._download_webpage(url, video_id)
render_data_json = self._search_regex(
diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py
index c049025a3..d43411928 100644
--- a/yt_dlp/extractor/trovo.py
+++ b/yt_dlp/extractor/trovo.py
@@ -38,7 +38,7 @@ class TrovoBaseIE(InfoExtractor):
return {
'uploader': streamer_info.get('nickName'),
'uploader_id': str_or_none(streamer_info.get('uid')),
- 'uploader_url': format_field(username, template='https://trovo.live/%s'),
+ 'uploader_url': format_field(username, None, 'https://trovo.live/%s'),
}
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py
index b04575bd5..cebd027c8 100644
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@@ -54,9 +54,24 @@ class TVerIE(InfoExtractor):
video_id = self._match_id(self._search_regex(
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
webpage, 'url regex'))
+
+ episode_info = self._download_json(
+ f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
+ video_id, fatal=False,
+ query={
+ 'platform_uid': self._PLATFORM_UID,
+ 'platform_token': self._PLATFORM_TOKEN,
+ }, headers={
+ 'x-tver-platform-type': 'web'
+ })
+ episode_content = traverse_obj(
+ episode_info, ('result', 'episode', 'content')) or {}
+
video_info = self._download_json(
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
- query={'v': '5'}, headers={
+ query={
+ 'v': str_or_none(episode_content.get('version')) or '5',
+ }, headers={
'Origin': 'https://tver.jp',
'Referer': 'https://tver.jp/',
})
@@ -67,25 +82,13 @@ class TVerIE(InfoExtractor):
if not r_id.isdigit():
r_id = f'ref:{r_id}'
- additional_info = self._download_json(
- f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
- video_id, fatal=False,
- query={
- 'platform_uid': self._PLATFORM_UID,
- 'platform_token': self._PLATFORM_TOKEN,
- }, headers={
- 'x-tver-platform-type': 'web'
- })
-
- additional_content_info = traverse_obj(
- additional_info, ('result', 'episode', 'content'), get_all=False) or {}
- episode = strip_or_none(additional_content_info.get('title'))
- series = str_or_none(additional_content_info.get('seriesTitle'))
+ episode = strip_or_none(episode_content.get('title'))
+ series = str_or_none(episode_content.get('seriesTitle'))
title = (
join_nonempty(series, episode, delim=' ')
or str_or_none(video_info.get('title')))
- provider = str_or_none(additional_content_info.get('productionProviderName'))
- onair_label = str_or_none(additional_content_info.get('broadcastDateLabel'))
+ provider = str_or_none(episode_content.get('productionProviderName'))
+ onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
return {
'_type': 'url_transparent',
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index af6750333..d516aafa2 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -470,7 +470,7 @@ class TwitterIE(TwitterBaseIE):
'uploader': uploader,
'timestamp': unified_timestamp(status.get('created_at')),
'uploader_id': uploader_id,
- 'uploader_url': format_field(uploader_id, template='https://twitter.com/%s'),
+ 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
'like_count': int_or_none(status.get('favorite_count')),
'repost_count': int_or_none(status.get('retweet_count')),
'comment_count': int_or_none(status.get('reply_count')),
diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py
index d35cd0d43..1dc2dbdc4 100644
--- a/yt_dlp/extractor/udemy.py
+++ b/yt_dlp/extractor/udemy.py
@@ -1,16 +1,12 @@
import re
+import urllib.request
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
- compat_urlparse,
-)
+from ..compat import compat_HTTPError, compat_str, compat_urlparse
from ..utils import (
+ ExtractorError,
determine_ext,
extract_attributes,
- ExtractorError,
float_or_none,
int_or_none,
js_to_json,
@@ -148,14 +144,14 @@ class UdemyIE(InfoExtractor):
'X-Udemy-Snail-Case': 'true',
'X-Requested-With': 'XMLHttpRequest',
}
- for cookie in self._downloader.cookiejar:
+ for cookie in self.cookiejar:
if cookie.name == 'client_id':
headers['X-Udemy-Client-Id'] = cookie.value
elif cookie.name == 'access_token':
headers['X-Udemy-Bearer-Token'] = cookie.value
headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
- if isinstance(url_or_request, compat_urllib_request.Request):
+ if isinstance(url_or_request, urllib.request.Request):
for header, value in headers.items():
url_or_request.add_header(header, value)
else:
diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py
index 296799d38..3f687f737 100644
--- a/yt_dlp/extractor/urort.py
+++ b/yt_dlp/extractor/urort.py
@@ -1,10 +1,7 @@
+import urllib.parse
+
from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse,
-)
-from ..utils import (
- unified_strdate,
-)
+from ..utils import unified_strdate
class UrortIE(InfoExtractor):
@@ -31,7 +28,7 @@ class UrortIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- fstr = compat_urllib_parse.quote("InternalBandUrl eq '%s'" % playlist_id)
+ fstr = urllib.parse.quote("InternalBandUrl eq '%s'" % playlist_id)
json_url = 'http://urort.p3.no/breeze/urort/TrackDTOViews?$filter=%s&$orderby=Released%%20desc&$expand=Tags%%2CFiles' % fstr
songs = self._download_json(json_url, playlist_id)
entries = []
diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py
index bc0187511..825089f47 100644
--- a/yt_dlp/extractor/vevo.py
+++ b/yt_dlp/extractor/vevo.py
@@ -33,10 +33,124 @@ class VevoIE(VevoBaseIE):
https?://cache\.vevo\.com/m/html/embed\.html\?video=|
https?://videoplayer\.vevo\.com/embed/embedded\?videoId=|
https?://embed\.vevo\.com/.*?[?&]isrc=|
+ https?://tv\.vevo\.com/watch/artist/(?:[^/]+)/|
vevo:)
(?P<id>[^&?#]+)'''
- _TESTS = []
+ _TESTS = [{
+ 'url': 'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280',
+ 'md5': '95ee28ee45e70130e3ab02b0f579ae23',
+ 'info_dict': {
+ 'id': 'GB1101300280',
+ 'ext': 'mp4',
+ 'title': 'Hurts - Somebody to Die For',
+ 'timestamp': 1372057200,
+ 'upload_date': '20130624',
+ 'uploader': 'Hurts',
+ 'track': 'Somebody to Die For',
+ 'artist': 'Hurts',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'v3 SMIL format',
+ 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
+ 'md5': 'f6ab09b034f8c22969020b042e5ac7fc',
+ 'info_dict': {
+ 'id': 'USUV71302923',
+ 'ext': 'mp4',
+ 'title': 'Cassadee Pope - I Wish I Could Break Your Heart',
+ 'timestamp': 1392796919,
+ 'upload_date': '20140219',
+ 'uploader': 'Cassadee Pope',
+ 'track': 'I Wish I Could Break Your Heart',
+ 'artist': 'Cassadee Pope',
+ 'genre': 'Country',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Age-limited video',
+ 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
+ 'info_dict': {
+ 'id': 'USRV81300282',
+ 'ext': 'mp4',
+ 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
+ 'age_limit': 18,
+ 'timestamp': 1372888800,
+ 'upload_date': '20130703',
+ 'uploader': 'Justin Timberlake',
+ 'track': 'Tunnel Vision (Explicit)',
+ 'artist': 'Justin Timberlake',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'No video_info',
+ 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
+ 'md5': '8b83cc492d72fc9cf74a02acee7dc1b0',
+ 'info_dict': {
+ 'id': 'USUV71503000',
+ 'ext': 'mp4',
+ 'title': 'K Camp ft. T.I. - Till I Die',
+ 'age_limit': 18,
+ 'timestamp': 1449468000,
+ 'upload_date': '20151207',
+ 'uploader': 'K Camp',
+ 'track': 'Till I Die',
+ 'artist': 'K Camp',
+ 'genre': 'Hip-Hop',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Featured test',
+ 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
+ 'md5': 'd28675e5e8805035d949dc5cf161071d',
+ 'info_dict': {
+ 'id': 'USUV71402190',
+ 'ext': 'mp4',
+ 'title': 'Lemaitre ft. LoLo - Wait',
+ 'age_limit': 0,
+ 'timestamp': 1413432000,
+ 'upload_date': '20141016',
+ 'uploader': 'Lemaitre',
+ 'track': 'Wait',
+ 'artist': 'Lemaitre',
+ 'genre': 'Electronic',
+ },
+ 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
+ }, {
+ 'note': 'Only available via webpage',
+ 'url': 'http://www.vevo.com/watch/GBUV71600656',
+ 'md5': '67e79210613865b66a47c33baa5e37fe',
+ 'info_dict': {
+ 'id': 'GBUV71600656',
+ 'ext': 'mp4',
+ 'title': 'ABC - Viva Love',
+ 'age_limit': 0,
+ 'timestamp': 1461830400,
+ 'upload_date': '20160428',
+ 'uploader': 'ABC',
+ 'track': 'Viva Love',
+ 'artist': 'ABC',
+ 'genre': 'Pop',
+ },
+ 'expected_warnings': ['Failed to download video versions info'],
+ }, {
+ # no genres available
+ 'url': 'http://www.vevo.com/watch/INS171400764',
+ 'only_matching': True,
+ }, {
+ # Another case available only via the webpage; using streams/streamsV3 formats
+ # Geo-restricted to Netherlands/Germany
+ 'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.vevo.com/?isrc=USH5V1923499&partnerId=4d61b777-8023-4191-9ede-497ed6c24647&partnerAdCode=',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.vevo.com/watch/artist/janet-jackson/US0450100550',
+ 'only_matching': True,
+ }]
_VERSIONS = {
0: 'youtube', # only in AuthenticateVideo videoVersions
1: 'level3',
@@ -138,6 +252,7 @@ class VevoIE(VevoBaseIE):
fatal=False))
else:
m = re.search(r'''(?xi)
+ _(?P<quality>[a-z0-9]+)
_(?P<width>[0-9]+)x(?P<height>[0-9]+)
_(?P<vcodec>[a-z0-9]+)
_(?P<vbr>[0-9]+)
@@ -149,7 +264,7 @@ class VevoIE(VevoBaseIE):
formats.append({
'url': version_url,
- 'format_id': 'http-%s-%s' % (version, video_version['quality']),
+ 'format_id': f'http-{version}-{video_version.get("quality") or m.group("quality")}',
'vcodec': m.group('vcodec'),
'acodec': m.group('acodec'),
'vbr': int(m.group('vbr')),
diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py
index 251eb78fe..9b05c86a5 100644
--- a/yt_dlp/extractor/videa.py
+++ b/yt_dlp/extractor/videa.py
@@ -1,8 +1,10 @@
import random
import re
import string
+import struct
from .common import InfoExtractor
+from ..compat import compat_b64decode, compat_ord
from ..utils import (
ExtractorError,
int_or_none,
@@ -14,11 +16,6 @@ from ..utils import (
xpath_element,
xpath_text,
)
-from ..compat import (
- compat_b64decode,
- compat_ord,
- compat_struct_pack,
-)
class VideaIE(InfoExtractor):
@@ -102,7 +99,7 @@ class VideaIE(InfoExtractor):
j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i]
k = S[(S[i] + S[j]) % 256]
- res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
+ res += struct.pack('B', k ^ compat_ord(cipher_text[m]))
return res.decode()
diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py
index 906412f08..679574bd7 100644
--- a/yt_dlp/extractor/videocampus_sachsen.py
+++ b/yt_dlp/extractor/videocampus_sachsen.py
@@ -6,14 +6,18 @@ from ..utils import ExtractorError
class VideocampusSachsenIE(InfoExtractor):
- IE_NAME = 'Vimp'
+ IE_NAME = 'ViMP'
_INSTANCES = (
+ 'bergauf.tv',
'campus.demo.vimp.com',
'corporate.demo.vimp.com',
'dancehalldatabase.com',
+ 'drehzahl.tv',
'educhannel.hs-gesundheit.de',
'emedia.ls.haw-hamburg.de',
'globale-evolution.net',
+ 'hohu.tv',
+ 'htvideos.hightechhigh.org',
'k210039.vimp.mivitec.net',
'media.cmslegal.com',
'media.hs-furtwangen.de',
@@ -25,6 +29,7 @@ class VideocampusSachsenIE(InfoExtractor):
'mportal.europa-uni.de',
'pacific.demo.vimp.com',
'slctv.com',
+ 'streaming.prairiesouth.ca',
'tube.isbonline.cn',
'univideo.uni-kassel.de',
'ursula2.genetics.emory.edu',
@@ -52,11 +57,15 @@ class VideocampusSachsenIE(InfoExtractor):
'vimp.weka-fachmedien.de',
'webtv.univ-montp3.fr',
'www.b-tu.de/media',
+ 'www.bergauf.tv',
'www.bigcitytv.de',
'www.cad-videos.de',
+ 'www.drehzahl.tv',
'www.fh-bielefeld.de/medienportal',
+ 'www.hohu.tv',
'www.orvovideo.com',
'www.rwe.tv',
+ 'www.salzi.tv',
'www.wenglor-media.com',
'www2.univ-sba.dz',
)
@@ -73,6 +82,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'e6b9349905c1628631f175712250f2a1',
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
'ext': 'mp4',
},
},
@@ -82,6 +92,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?',
'description': 'md5:196aa3b0509a526db62f84679522a2f5',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
'display_id': 'Was-ist-selbstgesteuertes-Lernen',
'ext': 'mp4',
},
@@ -92,6 +103,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': '09d4ed029002eb1bdda610f1103dd54c',
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
'ext': 'mp4',
},
@@ -103,6 +115,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': '0183356e41af7bfb83d7667b20d9b6a3',
'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
'description': 'md5:508958bd93e0ca002ac731d94182a54f',
+ 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
'ext': 'mp4',
}
@@ -113,6 +126,7 @@ class VideocampusSachsenIE(InfoExtractor):
'id': 'c8816f1cc942c12b6cce57c835cffd7c',
'title': 'Preisverleihung »Produkte des Jahres 2022«',
'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
+ 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
'ext': 'mp4',
},
@@ -124,7 +138,7 @@ class VideocampusSachsenIE(InfoExtractor):
'title': 'Was ist selbstgesteuertes Lernen?',
'ext': 'mp4',
},
- }
+ },
]
def _real_extract(self, url):
@@ -139,12 +153,14 @@ class VideocampusSachsenIE(InfoExtractor):
if not (display_id or tmp_id):
# Title, description from embedded page's meta wouldn't be correct
- title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False)
+ title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
description = None
+ thumbnail = None
else:
title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
description = self._html_search_meta(
- ('og:description', 'twitter:description', 'description'), webpage, default=None)
+ ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
+ thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
formats, subtitles = [], {}
try:
@@ -162,7 +178,8 @@ class VideocampusSachsenIE(InfoExtractor):
'id': video_id,
'title': title,
'description': description,
+ 'thumbnail': thumbnail,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles
+ 'subtitles': subtitles,
}
diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py
index 599996bf9..8092d340e 100644
--- a/yt_dlp/extractor/vidio.py
+++ b/yt_dlp/extractor/vidio.py
@@ -152,7 +152,7 @@ class VidioIE(VidioBaseIE):
'uploader': user.get('name'),
'timestamp': parse_iso8601(video.get('created_at')),
'uploader_id': username,
- 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'),
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
'channel': channel.get('name'),
'channel_id': str_or_none(channel.get('id')),
'view_count': get_count('view_count'),
@@ -283,5 +283,5 @@ class VidioLiveIE(VidioBaseIE):
'uploader': user.get('name'),
'timestamp': parse_iso8601(stream_meta.get('start_time')),
'uploader_id': username,
- 'uploader_url': format_field(username, template='https://www.vidio.com/@%s'),
+ 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
}
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py
index b9845affd..69a75304e 100644
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -100,7 +100,7 @@ class VidLiiIE(InfoExtractor):
uploader = self._search_regex(
r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
webpage, 'uploader', fatal=False)
- uploader_url = format_field(uploader, template='https://www.vidlii.com/user/%s')
+ uploader_url = format_field(uploader, None, 'https://www.vidlii.com/user/%s')
upload_date = unified_strdate(self._html_search_meta(
'datePublished', webpage, default=None) or self._search_regex(
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 59c5353ab..961734345 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -40,6 +40,18 @@ class VimeoBaseInfoExtractor(InfoExtractor):
_LOGIN_REQUIRED = False
_LOGIN_URL = 'https://vimeo.com/log_in'
+ @staticmethod
+ def _smuggle_referrer(url, referrer_url):
+ return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+
+ def _unsmuggle_headers(self, url):
+ """@returns (url, smuggled_data, headers)"""
+ url, data = unsmuggle_url(url, {})
+ headers = self.get_param('http_headers').copy()
+ if 'http_headers' in data:
+ headers.update(data['http_headers'])
+ return url, data, headers
+
def _perform_login(self, username, password):
webpage = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page')
@@ -718,10 +730,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
]
@staticmethod
- def _smuggle_referrer(url, referrer_url):
- return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
-
- @staticmethod
def _extract_urls(url, webpage):
urls = []
# Look for embedded (iframe) Vimeo player
@@ -754,8 +762,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'Content-Type': 'application/x-www-form-urlencoded',
})
checked = self._download_json(
- url + '/check-password', video_id,
- 'Verifying the password', data=data, headers=headers)
+ f'{compat_urlparse.urlsplit(url)._replace(query=None).geturl()}/check-password',
+ video_id, 'Verifying the password', data=data, headers=headers)
if checked is False:
raise ExtractorError('Wrong video password', expected=True)
return checked
@@ -830,10 +838,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
raise
def _real_extract(self, url):
- url, data = unsmuggle_url(url, {})
- headers = self.get_param('http_headers').copy()
- if 'http_headers' in data:
- headers.update(data['http_headers'])
+ url, data, headers = self._unsmuggle_headers(url)
if 'Referer' not in headers:
headers['Referer'] = url
@@ -1383,14 +1388,15 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
@staticmethod
- def _extract_url(webpage):
+ def _extract_url(url, webpage):
mobj = re.search(
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
- return unescapeHTML(mobj.group(1)) if mobj else None
+ return VimeoIE._smuggle_referrer(unescapeHTML(mobj.group(1)), url) if mobj else None
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ url, _, headers = self._unsmuggle_headers(url)
+ webpage = self._download_webpage(url, video_id, headers=headers)
config_url = self._parse_json(self._search_regex(
r'window\.OTTData\s*=\s*({.+})', webpage,
'ott data'), video_id, js_to_json)['config_url']
diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py
index bbf43a83f..947f5cdb6 100644
--- a/yt_dlp/extractor/vine.py
+++ b/yt_dlp/extractor/vine.py
@@ -89,7 +89,7 @@ class VineIE(InfoExtractor):
username = data.get('username')
- alt_title = format_field(username, template='Vine by %s')
+ alt_title = format_field(username, None, 'Vine by %s')
return {
'id': video_id,
diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py
index e4570a03a..feab79138 100644
--- a/yt_dlp/extractor/voicy.py
+++ b/yt_dlp/extractor/voicy.py
@@ -1,3 +1,5 @@
+import itertools
+
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -9,8 +11,6 @@ from ..utils import (
unsmuggle_url,
)
-import itertools
-
class VoicyBaseIE(InfoExtractor):
def _extract_from_playlist_data(self, value):
@@ -105,7 +105,7 @@ class VoicyChannelIE(VoicyBaseIE):
@classmethod
def suitable(cls, url):
- return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
+ return not VoicyIE.suitable(url) and super().suitable(url)
def _entries(self, channel_id):
pager = ''
diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py
index 35662753e..0b9bf2903 100644
--- a/yt_dlp/extractor/vrv.py
+++ b/yt_dlp/extractor/vrv.py
@@ -1,17 +1,14 @@
import base64
-import json
import hashlib
import hmac
+import json
import random
import string
import time
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse_urlencode,
- compat_urllib_parse,
-)
+from ..compat import compat_HTTPError, compat_urllib_parse_urlencode
from ..utils import (
ExtractorError,
float_or_none,
@@ -46,12 +43,12 @@ class VRVBaseIE(InfoExtractor):
headers['Content-Type'] = 'application/json'
base_string = '&'.join([
'POST' if data else 'GET',
- compat_urllib_parse.quote(base_url, ''),
- compat_urllib_parse.quote(encoded_query, '')])
+ urllib.parse.quote(base_url, ''),
+ urllib.parse.quote(encoded_query, '')])
oauth_signature = base64.b64encode(hmac.new(
(self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),
base_string.encode(), hashlib.sha1).digest()).decode()
- encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
+ encoded_query += '&oauth_signature=' + urllib.parse.quote(oauth_signature, '')
try:
return self._download_json(
'?'.join([base_url, encoded_query]), video_id,
diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py
index 8ef75d30e..fd5226bbc 100644
--- a/yt_dlp/extractor/vshare.py
+++ b/yt_dlp/extractor/vshare.py
@@ -1,11 +1,7 @@
import re
from .common import InfoExtractor
-from ..compat import compat_chr
-from ..utils import (
- decode_packed_codes,
- ExtractorError,
-)
+from ..utils import ExtractorError, decode_packed_codes
class VShareIE(InfoExtractor):
@@ -37,7 +33,7 @@ class VShareIE(InfoExtractor):
digits = [int(digit) for digit in digits.split(',')]
key_digit = self._search_regex(
r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
- chars = [compat_chr(d - int(key_digit)) for d in digits]
+ chars = [chr(d - int(key_digit)) for d in digits]
return ''.join(chars)
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py
index 6349e5326..e1062b9b5 100644
--- a/yt_dlp/extractor/wppilot.py
+++ b/yt_dlp/extractor/wppilot.py
@@ -20,7 +20,7 @@ class WPPilotBaseIE(InfoExtractor):
def _get_channel_list(self, cache=True):
if cache is True:
- cache_res = self._downloader.cache.load('wppilot', 'channel-list')
+ cache_res = self.cache.load('wppilot', 'channel-list')
if cache_res:
return cache_res, True
webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage')
@@ -35,7 +35,7 @@ class WPPilotBaseIE(InfoExtractor):
channel_list = try_get(qhash_content, lambda x: x['data']['allChannels']['nodes'])
if channel_list is None:
continue
- self._downloader.cache.store('wppilot', 'channel-list', channel_list)
+ self.cache.store('wppilot', 'channel-list', channel_list)
return channel_list, False
raise ExtractorError('Unable to find the channel list')
@@ -101,7 +101,7 @@ class WPPilotIE(WPPilotBaseIE):
channel = self._get_channel(video_id)
video_id = str(channel['id'])
- is_authorized = next((c for c in self._downloader.cookiejar if c.name == 'netviapisessid'), None)
+ is_authorized = next((c for c in self.cookiejar if c.name == 'netviapisessid'), None)
# cookies starting with "g:" are assigned to guests
is_authorized = True if is_authorized is not None and not is_authorized.value.startswith('g:') else False
diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py
index 28b6ecb6e..63abe4a1f 100644
--- a/yt_dlp/extractor/xfileshare.py
+++ b/yt_dlp/extractor/xfileshare.py
@@ -1,11 +1,10 @@
import re
from .common import InfoExtractor
-from ..compat import compat_chr
from ..utils import (
+ ExtractorError,
decode_packed_codes,
determine_ext,
- ExtractorError,
int_or_none,
js_to_json,
urlencode_postdata,
@@ -32,11 +31,11 @@ def aa_decode(aa_code):
aa_char = aa_char.replace('+ ', '')
m = re.match(r'^\d+', aa_char)
if m:
- ret += compat_chr(int(m.group(0), 8))
+ ret += chr(int(m.group(0), 8))
else:
m = re.match(r'^u([\da-f]+)', aa_char)
if m:
- ret += compat_chr(int(m.group(1), 16))
+ ret += chr(int(m.group(1), 16))
return ret
diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py
index ff15d3707..e42eed7d8 100644
--- a/yt_dlp/extractor/xhamster.py
+++ b/yt_dlp/extractor/xhamster.py
@@ -21,7 +21,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
- _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)'
_VALID_URL = r'''(?x)
https?://
(?:.+?\.)?%s/
@@ -32,7 +32,7 @@ class XHamsterIE(InfoExtractor):
''' % _DOMAINS
_TESTS = [{
'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'md5': '98b4687efb1ffd331c4197854dc09e8f',
+ 'md5': '34e1ab926db5dc2750fed9e1f34304bb',
'info_dict': {
'id': '1509445',
'display_id': 'femaleagent-shy-beauty-takes-the-bait',
@@ -41,6 +41,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1350194821,
'upload_date': '20121014',
'uploader': 'Ruseful2011',
+ 'uploader_id': 'ruseful2011',
'duration': 893,
'age_limit': 18,
},
@@ -70,6 +71,7 @@ class XHamsterIE(InfoExtractor):
'timestamp': 1454948101,
'upload_date': '20160208',
'uploader': 'parejafree',
+ 'uploader_id': 'parejafree',
'duration': 72,
'age_limit': 18,
},
@@ -115,6 +117,9 @@ class XHamsterIE(InfoExtractor):
}, {
'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
'only_matching': True,
+ }, {
+ 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -244,7 +249,6 @@ class XHamsterIE(InfoExtractor):
categories = None
uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL']))
-
return {
'id': video_id,
'display_id': display_id,
@@ -263,7 +267,7 @@ class XHamsterIE(InfoExtractor):
'dislike_count': int_or_none(try_get(
video, lambda x: x['rating']['dislikes'], int)),
'comment_count': int_or_none(video.get('views')),
- 'age_limit': age_limit,
+ 'age_limit': age_limit if age_limit is not None else 18,
'categories': categories,
'formats': formats,
}
@@ -423,6 +427,9 @@ class XHamsterUserIE(InfoExtractor):
'id': 'firatkaan',
},
'playlist_mincount': 1,
+ }, {
+ 'url': 'https://xhday.com/users/mobhunter',
+ 'only_matching': True,
}]
def _entries(self, user_id):
diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py
index 3fe6192bf..8811df6d8 100644
--- a/yt_dlp/extractor/yahoo.py
+++ b/yt_dlp/extractor/yahoo.py
@@ -1,15 +1,15 @@
import hashlib
import itertools
import re
+import urllib.parse
+from .brightcove import BrightcoveNewIE
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse,
-)
+from .youtube import YoutubeIE
+from ..compat import compat_str
from ..utils import (
- clean_html,
ExtractorError,
+ clean_html,
int_or_none,
mimetype2ext,
parse_iso8601,
@@ -18,9 +18,6 @@ from ..utils import (
url_or_none,
)
-from .brightcove import BrightcoveNewIE
-from .youtube import YoutubeIE
-
class YahooIE(InfoExtractor):
IE_DESC = 'Yahoo screen and movies'
@@ -333,7 +330,7 @@ class YahooSearchIE(SearchInfoExtractor):
def _search_results(self, query):
for pagenum in itertools.count(0):
- result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (urllib.parse.quote_plus(query), pagenum * 30)
info = self._download_json(result_url, query,
note='Downloading results page ' + str(pagenum + 1))
yield from (self.url_result(result['rurl']) for result in info['results'])
@@ -434,7 +431,7 @@ class YahooGyaOIE(InfoExtractor):
page = 1
while True:
playlist = self._download_json(
- f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}', program_id,
+ f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id,
note=f'Downloading JSON metadata page {page}')
if not playlist:
break
diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py
index 444785947..27eda9721 100644
--- a/yt_dlp/extractor/ynet.py
+++ b/yt_dlp/extractor/ynet.py
@@ -1,8 +1,8 @@
-import re
import json
+import re
+import urllib.parse
from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote_plus
class YnetIE(InfoExtractor):
@@ -31,7 +31,7 @@ class YnetIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
+ content = urllib.parse.unquote_plus(self._og_search_video_url(webpage))
config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
f4m_url = config['clip']['url']
title = self._og_search_title(webpage)
diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py
index 76d89f3ce..18112ba35 100644
--- a/yt_dlp/extractor/younow.py
+++ b/yt_dlp/extractor/younow.py
@@ -91,7 +91,7 @@ def _extract_moment(item, fatal=True):
uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
uploader_id = try_get(item, lambda x: x['owner']['userId'])
- uploader_url = format_field(uploader, template='https://www.younow.com/%s')
+ uploader_url = format_field(uploader, None, 'https://www.younow.com/%s')
entry = {
'extractor_key': 'YouNowMoment',
diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py
index 5aea82295..b484e08ec 100644
--- a/yt_dlp/extractor/youporn.py
+++ b/yt_dlp/extractor/youporn.py
@@ -135,9 +135,10 @@ class YouPornIE(InfoExtractor):
r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
webpage, 'uploader', fatal=False)
upload_date = unified_strdate(self._html_search_regex(
- [r'UPLOADED:\s*<span>([^<]+)',
+ (r'UPLOADED:\s*<span>([^<]+)',
r'Date\s+[Aa]dded:\s*<span>([^<]+)',
- r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+ r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
+ r'(?s)<label\b[^>]*>Uploaded[^<]*</label>\s*<span\b[^>]*>(.+?)</span>'),
webpage, 'upload date', fatal=False))
age_limit = self._rta_search(webpage)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 5546aa9a3..ebc3381a2 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2,7 +2,6 @@ import base64
import calendar
import copy
import datetime
-import functools
import hashlib
import itertools
import json
@@ -14,18 +13,11 @@ import sys
import threading
import time
import traceback
+import urllib.error
+import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_chr,
- compat_HTTPError,
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
-)
+from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
NO_DEFAULT,
@@ -382,11 +374,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
pref = {}
if pref_cookie:
try:
- pref = dict(compat_urlparse.parse_qsl(pref_cookie.value))
+ pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
except ValueError:
self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
pref.update({'hl': 'en', 'tz': 'UTC'})
- self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref))
+ self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
def _real_initialize(self):
self._initialize_pref()
@@ -397,9 +389,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required('Login details are needed to download this content', method='cookies')
- _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
- _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
def _get_default_ytcfg(self, client='web'):
return copy.deepcopy(INNERTUBE_CLIENTS[client])
@@ -415,15 +406,19 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_client_name(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
def _extract_client_version(self, ytcfg, default_client='web'):
return self._ytcfg_get_safe(
ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
- lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
+ lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
+
+ def _select_api_hostname(self, req_api_hostname, default_client=None):
+ return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
+ or req_api_hostname or self._get_innertube_host(default_client or 'web'))
def _extract_api_key(self, ytcfg=None, default_client='web'):
- return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
+ return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], str, default_client)
def _extract_context(self, ytcfg=None, default_client='web'):
context = get_first(
@@ -470,18 +465,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
real_headers.update({'content-type': 'application/json'})
if headers:
real_headers.update(headers)
+ api_key = (self._configuration_arg('innertube_key', [''], ie_key=YoutubeIE.ie_key(), casesense=True)[0]
+ or api_key or self._extract_api_key(default_client=default_client))
return self._download_json(
- f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}',
+ f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
video_id=video_id, fatal=fatal, note=note, errnote=errnote,
data=json.dumps(data).encode('utf8'), headers=real_headers,
- query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'})
+ query={'key': api_key, 'prettyPrint': 'false'})
def extract_yt_initial_data(self, item_id, webpage, fatal=True):
- data = self._search_regex(
- (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
- if data:
- return self._parse_json(data, item_id, fatal=fatal)
+ return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
@staticmethod
def _extract_session_index(*data):
@@ -497,7 +490,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# Deprecated?
def _extract_identity_token(self, ytcfg=None, webpage=None):
if ytcfg:
- token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
if token:
return token
if webpage:
@@ -513,12 +506,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""
for data in args:
# ytcfg includes channel_syncid if on secondary channel
- delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
+ delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
if delegated_sid:
return delegated_sid
sync_ids = (try_get(
data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
- lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
+ lambda x: x['DATASYNC_ID']), str) or '').split('||')
if len(sync_ids) >= 2 and sync_ids[1]:
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
@@ -534,7 +527,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
expected_type=str)
- @property
+ @functools.cached_property
def is_authenticated(self):
return bool(self._generate_sapisidhash_header())
@@ -550,9 +543,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
self, *, ytcfg=None, account_syncid=None, session_index=None,
visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
- origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
+ origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
headers = {
- 'X-YouTube-Client-Name': compat_str(
+ 'X-YouTube-Client-Name': str(
self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
'Origin': origin,
@@ -612,7 +605,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_continuation_ep_data(cls, continuation_ep: dict):
if isinstance(continuation_ep, dict):
continuation = try_get(
- continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
+ continuation_ep, lambda x: x['continuationCommand']['token'], str)
if not continuation:
return
ctp = continuation_ep.get('clickTrackingParams')
@@ -672,7 +665,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_badges(self, renderer: dict):
badges = set()
for badge in try_get(renderer, lambda x: x['badges'], list) or []:
- label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
+ label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
if label:
badges.add(label.lower())
return badges
@@ -687,7 +680,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
obj = [obj]
for item in obj:
- text = try_get(item, lambda x: x['simpleText'], compat_str)
+ text = try_get(item, lambda x: x['simpleText'], str)
if text:
return text
runs = try_get(item, lambda x: x['runs'], list) or []
@@ -789,20 +782,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if isinstance(e.cause, compat_HTTPError):
+ if isinstance(e.cause, urllib.error.HTTPError):
first_bytes = e.cause.read(512)
if not is_html(first_bytes):
yt_error = try_get(
self._parse_json(
self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
- lambda x: x['error']['message'], compat_str)
+ lambda x: x['error']['message'], str)
if yt_error:
self._report_alerts([('ERROR', yt_error)], fatal=False)
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
@@ -2212,28 +2205,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}, {
# Story. Requires specific player params to work.
# Note: stories get removed after some period of time
- 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
+ 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
'info_dict': {
- 'id': 'yN3x1t3sieA',
+ 'id': 'vv8qTUWmulI',
'ext': 'mp4',
- 'uploader': 'Linus Tech Tips',
- 'duration': 13,
- 'channel': 'Linus Tech Tips',
+ 'availability': 'unlisted',
+ 'view_count': int,
+ 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
+ 'upload_date': '20220526',
+ 'categories': ['Education'],
+ 'title': 'Story',
+ 'channel': 'IT\'S HISTORY',
+ 'description': '',
+ 'uploader_id': 'BlastfromthePast',
+ 'duration': 12,
+ 'uploader': 'IT\'S HISTORY',
'playable_in_embed': True,
- 'tags': [],
'age_limit': 0,
- 'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
- 'upload_date': '20220402',
- 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
- 'title': 'Story',
'live_status': 'not_live',
- 'uploader_id': 'LinusTechTips',
+ 'tags': [],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
+ 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
+ 'info_dict': {
+ 'id': 'tjjjtzRLHvA',
+ 'ext': 'mp4',
+ 'title': 'ハッシュタグ無し };if window.ytcsi',
+ 'upload_date': '20220323',
+ 'like_count': int,
+ 'availability': 'unlisted',
+ 'channel': 'nao20010128nao',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader': 'nao20010128nao',
+ 'uploader_id': 'nao20010128nao',
+ 'categories': ['Music'],
'view_count': int,
'description': '',
- 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
- 'categories': ['Science & Technology'],
- 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
- 'availability': 'unlisted',
+ 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
+ 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'duration': 6,
+ 'tags': [],
+ 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
}
}
]
@@ -2319,7 +2338,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
last_error = ctx.pop('last_error', None)
- expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+ expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
or (mpd_url, stream_number, False))
if not refresh_sequence:
@@ -2386,6 +2405,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
yield {
'url': last_segment_url,
+ 'fragment_count': last_seq,
}
if known_idx == last_seq:
no_fragment_score += 5
@@ -2400,7 +2420,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_player_url(self, *ytcfgs, webpage=None):
player_url = traverse_obj(
ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not player_url:
return
return urljoin('https://www.youtube.com', player_url)
@@ -2417,7 +2437,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _signature_cache_id(self, example_sig):
""" Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+ return '.'.join(str(len(part)) for part in example_sig.split('.'))
@classmethod
def _extract_player_info(cls, player_url):
@@ -2447,7 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
assert os.path.basename(func_id) == func_id
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
+ cache_spec = self.cache.load('youtube-sigfuncs', func_id)
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
@@ -2455,11 +2475,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if code:
res = self._parse_sig_js(code)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ test_string = ''.join(map(chr, range(len(example_sig))))
cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ self.cache.store('youtube-sigfuncs', func_id, cache_spec)
return res
def _print_sig_code(self, func, example_sig):
@@ -2494,12 +2514,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else:
yield _genslice(start, i, step)
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ test_string = ''.join(map(chr, range(len(example_sig))))
cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = ' + '.join(gen_sig_code(cache_spec))
signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ ', '.join(str(len(p)) for p in example_sig.split('.')))
code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code)
@@ -2530,22 +2550,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
try:
player_id = (player_url, self._signature_cache_id(s))
if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
+ func = self._extract_signature_function(video_id, player_url, s)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
self._print_sig_code(func, s)
return func(s)
except Exception as e:
- raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
+ raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
@@ -2580,7 +2594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self._downloader.cache.load('youtube-nsig', player_id)
+ func_code = self.cache.load('youtube-nsig', player_id)
if func_code:
jsi = JSInterpreter(func_code)
@@ -2589,7 +2603,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
funcname = self._extract_n_function_name(jscode)
jsi = JSInterpreter(jscode)
func_code = jsi.extract_function_code(funcname)
- self._downloader.cache.store('youtube-nsig', player_id, func_code)
+ self.cache.store('youtube-nsig', player_id, func_code)
if self.get_param('youtube_print_sig_code'):
self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
@@ -2621,30 +2635,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return sts
def _mark_watched(self, video_id, player_responses):
- playback_url = get_first(
- player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
- expected_type=url_or_none)
- if not playback_url:
- self.report_warning('Unable to mark watched')
- return
- parsed_playback_url = compat_urlparse.urlparse(playback_url)
- qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+ for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
+ label = 'fully ' if is_full else ''
+ url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
+ expected_type=url_or_none)
+ if not url:
+ self.report_warning(f'Unable to mark {label}watched')
+ return
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+
+ # # more consistent results setting it to right before the end
+ video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ 'cmt': video_length,
+ 'el': 'detailpage', # otherwise defaults to "shorts"
+ })
- # cpn generation algorithm is reverse engineered from base.js.
- # In fact it works even with dummy cpn.
- CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))
+ if is_full:
+ # these seem to mark watchtime "history" in the real world
+ # they're required, so send in a single value
+ qs.update({
+ 'st': video_length,
+ 'et': video_length,
+ })
- qs.update({
- 'ver': ['2'],
- 'cpn': [cpn],
- })
- playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+ url = urllib.parse.urlunparse(
+ parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
- self._download_webpage(
- playback_url, video_id, 'Marking watched',
- 'Unable to mark watched', fatal=False)
+ self._download_webpage(
+ url, video_id, f'Marking {label}watched',
+ 'Unable to mark watched', fatal=False)
@staticmethod
def _extract_urls(webpage):
@@ -2713,39 +2742,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
chapter_title = lambda chapter: self._get_text(chapter, 'title')
- return next((
- filter(None, (
- self._extract_chapters(
- traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
- chapter_time, chapter_title, duration)
- for contents in content_list
- ))), [])
-
- def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
- chapters = []
- last_chapter = {'start_time': 0}
- for idx, chapter in enumerate(chapter_list or []):
- title = chapter_title(chapter)
- start_time = chapter_time(chapter)
- if start_time is None:
- continue
- last_chapter['end_time'] = start_time
- if start_time < last_chapter['start_time']:
- if idx == 1:
- chapters.pop()
- self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
- else:
- self.report_warning(f'Invalid start time for chapter "{title}"')
- continue
- last_chapter = {'start_time': start_time, 'title': title}
- chapters.append(last_chapter)
- last_chapter['end_time'] = duration
- return chapters
+ return next(filter(None, (
+ self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
+ chapter_time, chapter_title, duration)
+ for contents in content_list)), [])
- def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
- return self._parse_json(self._search_regex(
- (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- regex), webpage, name, default='{}'), video_id, fatal=False)
+ def _extract_chapters_from_description(self, description, duration):
+ return self._extract_chapters(
+ re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''),
+ chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1],
+ duration=duration, strict=False)
+
+ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True):
+ if not duration:
+ return
+ chapter_list = [{
+ 'start_time': chapter_time(chapter),
+ 'title': chapter_title(chapter),
+ } for chapter in chapter_list or []]
+ if not strict:
+ chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+ chapters = [{'start_time': 0, 'title': '<Untitled>'}]
+ for idx, chapter in enumerate(chapter_list):
+ if chapter['start_time'] is None or not chapter['title']:
+ self.report_warning(f'Incomplete chapter {idx}')
+ elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+ chapters[-1]['end_time'] = chapter['start_time']
+ chapters.append(chapter)
+ else:
+ self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
+ chapters[-1]['end_time'] = duration
+ return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')
@@ -2758,12 +2786,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
author = self._get_text(comment_renderer, 'authorText')
author_id = try_get(comment_renderer,
- lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
+ lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
- lambda x: x['likeCount']), compat_str)) or 0
+ lambda x: x['likeCount']), str)) or 0
author_thumbnail = try_get(comment_renderer,
- lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
+ lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
is_favorited = 'creatorHeart' in (try_get(
@@ -3028,9 +3056,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
initial_pr = None
if webpage:
- initial_pr = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
- video_id, 'initial player response')
+ initial_pr = self._search_json(
+ self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
all_clients = set(clients)
clients = clients[::-1]
@@ -3144,16 +3171,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
fmt_url = fmt.get('url')
if not fmt_url:
- sc = compat_parse_qs(fmt.get('signatureCipher'))
+ sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
encrypted_sig = try_get(sc, lambda x: x['s'][0])
- if not (sc and fmt_url and encrypted_sig):
+ if not all((sc, fmt_url, player_url, encrypted_sig)):
continue
- if not player_url:
+ try:
+ fmt_url += '&%s=%s' % (
+ traverse_obj(sc, ('sp', -1)) or 'signature',
+ self._decrypt_signature(encrypted_sig, video_id, player_url)
+ )
+ except ExtractorError as e:
+ self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
+ self.write_debug(e, only_once=True)
continue
- signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
- sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
- fmt_url += '&' + sp + '=' + signature
query = parse_qs(fmt_url)
throttled = False
@@ -3164,7 +3195,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
except ExtractorError as e:
self.report_warning(
'nsig extraction failed: You may experience throttling for some formats\n'
- f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
+ f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
+ self.write_debug(e, only_once=True)
throttled = True
if itag:
@@ -3380,12 +3412,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Unquote should take place before split on comma (,) since textual
# fields may contain comma as well (see
# https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(
- compat_urllib_parse_unquote_plus(feed))
+ feed_data = urllib.parse.parse_qs(
+ urllib.parse.unquote_plus(feed))
def feed_entry(name):
return try_get(
- feed_data, lambda x: x[name][0], compat_str)
+ feed_data, lambda x: x[name][0], str)
feed_id = feed_entry('id')
if not feed_id:
@@ -3414,6 +3446,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or get_first(microformats, 'lengthSeconds')
or parse_duration(search_meta('duration'))) or None
+ if get_first(video_details, 'isPostLiveDvr'):
+ self.write_debug('Video is in Post-Live Manifestless mode')
+ if duration or 0 > 4 * 3600:
+ self.report_warning(
+ 'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
+ 'This is a known issue and patches are welcome')
+
live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
video_id, microformats, video_details, player_responses, player_url, duration)
@@ -3523,7 +3562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
'uploader_url': owner_profile_url,
'channel_id': channel_id,
- 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'),
+ 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s'),
'duration': duration,
'view_count': int_or_none(
get_first((video_details, microformats), (..., 'viewCount'))
@@ -3593,7 +3632,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'translated_subs' in self._configuration_arg('skip'):
continue
trans_code += f'-{lang_code}'
- trans_name += format_field(lang_name, template=' from %s')
+ trans_name += format_field(lang_name, None, ' from %s')
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
if lang_code == f'a-{orig_trans_code}':
@@ -3605,9 +3644,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
- parsed_url = compat_urllib_parse_urlparse(url)
+ parsed_url = urllib.parse.urlparse(url)
for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
+ query = urllib.parse.parse_qs(component)
for k, v in query.items():
for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
d_k += '_time'
@@ -3616,7 +3655,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Youtube Music Auto-generated description
if video_description:
- mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
+ mobj = re.search(
+ r'''(?xs)
+ (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
+ (?P<album>[^\n]+)
+ (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
+ (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
+ (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
+ .+\nAuto-generated\ by\ YouTube\.\s*$
+ ''', video_description)
if mobj:
release_year = mobj.group('release_year')
release_date = mobj.group('release_date')
@@ -3634,9 +3681,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data = None
if webpage:
- initial_data = self._extract_yt_initial_variable(
- webpage, self._YT_INITIAL_DATA_RE, video_id,
- 'yt initial data')
+ initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
if not initial_data:
query = {'videoId': video_id}
query.update(self._get_checkok_params())
@@ -3646,13 +3691,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
headers=self.generate_api_headers(ytcfg=master_ytcfg),
note='Downloading initial data API JSON')
+ info['comment_count'] = traverse_obj(initial_data, (
+ 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
+ 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', 'simpleText'
+ ), (
+ 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
+ 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', 'runs', ..., 'text'
+ ), expected_type=int_or_none, get_all=False)
+
try: # This will error if there is no livechat
initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
except (KeyError, IndexError, TypeError):
pass
else:
info.setdefault('subtitles', {})['live_chat'] = [{
- 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies
+ # url is needed to set cookies
+ 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
'video_id': video_id,
'ext': 'json',
'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
@@ -3662,6 +3716,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['chapters'] = (
self._extract_chapters_from_json(initial_data, duration)
or self._extract_chapters_from_engagement_panel(initial_data, duration)
+ or self._extract_chapters_from_description(video_description, duration)
or None)
contents = traverse_obj(
@@ -3884,7 +3939,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
# generic endpoint URL support
ep_url = urljoin('https://www.youtube.com/', try_get(
renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
if ep_url:
for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
if ie.suitable(ep_url):
@@ -3928,7 +3983,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _shelf_entries(self, shelf_renderer, skip_channels=False):
ep = try_get(
shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str)
+ str)
shelf_url = urljoin('https://www.youtube.com', ep)
if shelf_url:
# Skipping links to another channels, note that checking for
@@ -3988,7 +4043,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
yield entry
# playlist attachment
playlist_id = try_get(
- post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
+ post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
if playlist_id:
yield self.url_result(
'https://www.youtube.com/playlist?list=%s' % playlist_id,
@@ -3999,7 +4054,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if not isinstance(run, dict):
continue
ep_url = try_get(
- run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
+ run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
if not ep_url:
continue
if not YoutubeIE.suitable(ep_url):
@@ -4015,9 +4070,12 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return
for content in contents:
renderer = content.get('backstagePostThreadRenderer')
- if not isinstance(renderer, dict):
+ if isinstance(renderer, dict):
+ yield from self._post_thread_entries(renderer)
continue
- yield from self._post_thread_entries(renderer)
+ renderer = content.get('videoRenderer')
+ if isinstance(renderer, dict):
+ yield self._video_entry(renderer)
r''' # unused
def _rich_grid_entries(self, contents):
@@ -4173,10 +4231,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
uploader['uploader'] = self._search_regex(
r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
uploader['uploader_id'] = try_get(
- owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
+ owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], str)
uploader['uploader_url'] = urljoin(
'https://www.youtube.com/',
- try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
+ try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
return {k: v for k, v in uploader.items() if v is not None}
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
@@ -4304,13 +4362,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
title = playlist.get('title') or try_get(
- data, lambda x: x['titleText']['simpleText'], compat_str)
+ data, lambda x: x['titleText']['simpleText'], str)
playlist_id = playlist.get('playlistId') or item_id
# Delegating everything except mix playlists to regular tab-based playlist URL
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
- compat_str))
+ str))
# Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
# [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
@@ -4381,7 +4439,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
continue
nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
text = try_get(
- nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
+ nav_item_renderer, lambda x: x['text']['simpleText'], str)
if not text or text.lower() != 'show unavailable videos':
continue
browse_endpoint = try_get(
@@ -4402,7 +4460,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
note='Downloading API JSON with unavailable videos')
- @property
+ @functools.cached_property
def skip_webpage(self):
return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
@@ -4423,7 +4481,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
except ExtractorError as e:
if isinstance(e.cause, network_exceptions):
- if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
+ if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
last_error = error_to_compat_str(e.cause or e.msg)
if count < retries:
continue
@@ -5236,8 +5294,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data):
item_id = self._match_id(url)
- url = compat_urlparse.urlunparse(
- compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+ url = urllib.parse.urlunparse(
+ urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
compat_opts = self.get_param('compat_opts', [])
def get_mobj(url):
@@ -5257,7 +5315,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
mdata = self._extract_tab_endpoint(
f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
- get_all=False, expected_type=compat_str)
+ get_all=False, expected_type=str)
if not murl:
raise ExtractorError('Failed to resolve album to playlist')
return self.url_result(murl, ie=YoutubeTabIE.ie_key())
@@ -5622,11 +5680,13 @@ class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
channel = traverse_obj(
notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
expected_type=str)
+ notification_title = self._get_text(notification, 'shortMessage')
+ if notification_title:
+ notification_title = notification_title.replace('\xad', '') # remove soft hyphens
+ # TODO: handle recommended videos
title = self._search_regex(
- rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),
+ rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
'video title', default=None)
- if title:
- title = title.replace('\xad', '') # remove soft hyphens
upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
else None)
@@ -5778,7 +5838,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
if params:
section = next((k for k, v in self._SECTIONS.items() if v == params), params)
else:
- section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
+ section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower()
params = self._SECTIONS.get(section)
if not params:
section = None
@@ -5925,14 +5985,43 @@ class YoutubeTruncatedURLIE(InfoExtractor):
expected=True)
-class YoutubeClipIE(InfoExtractor):
+class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
IE_NAME = 'youtube:clip'
- IE_DESC = False # Do not list
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # FIXME: Other metadata should be extracted from the clip, not from the base video
+ 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'info_dict': {
+ 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
+ 'ext': 'mp4',
+ 'section_start': 29.0,
+ 'section_end': 39.7,
+ 'duration': 10.7,
+ }
+ }]
def _real_extract(self, url):
- self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
- return self.url_result(url, 'Generic')
+ clip_id = self._match_id(url)
+ _, data = self._extract_webpage(url, clip_id)
+
+ video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
+ if not video_id:
+ raise ExtractorError('Unable to find video ID')
+
+ clip_data = traverse_obj(data, (
+ 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
+ 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
+ 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
+ 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': f'https://www.youtube.com/watch?v={video_id}',
+ 'ie_key': YoutubeIE.ie_key(),
+ 'id': clip_id,
+ 'section_start': int(clip_data['startTimeMs']) / 1000,
+ 'section_end': int(clip_data['endTimeMs']) / 1000,
+ }
class YoutubeTruncatedIDIE(InfoExtractor):
diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py
index 16f827a7e..2a7e85472 100644
--- a/yt_dlp/extractor/zattoo.py
+++ b/yt_dlp/extractor/zattoo.py
@@ -220,7 +220,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
'id': channel_name,
'title': channel_name,
'is_live': True,
- 'format': formats,
+ 'formats': formats,
'subtitles': subtitles
}
diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
index a388ff562..3a7f01f7a 100644
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@@ -69,6 +69,7 @@ class ZDFBaseIE(InfoExtractor):
f.update({
'url': format_url,
'format_id': join_nonempty('http', meta.get('type'), meta.get('quality')),
+ 'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None))
})
new_formats = [f]
formats.extend(merge_dicts(f, {
@@ -108,7 +109,7 @@ class ZDFBaseIE(InfoExtractor):
'class': track.get('class'),
'language': track.get('language'),
})
- self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference'))
+ self._sort_formats(formats, ('tbr', 'res', 'quality', 'language_preference'))
duration = float_or_none(try_get(
ptmd, lambda x: x['attributes']['duration']['value']), scale=1000)
@@ -187,7 +188,7 @@ class ZDFIE(ZDFBaseIE):
},
}, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
- 'md5': '3d6f1049e9682178a11c54b91f3dd065',
+ 'md5': '57af4423db0455a3975d2dc4578536bc',
'info_dict': {
'ext': 'mp4',
'id': 'video_funk_1770473',
@@ -230,6 +231,19 @@ class ZDFIE(ZDFBaseIE):
'timestamp': 1641355200,
'upload_date': '20220105',
},
+ 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"'
+ }, {
+ 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
+ 'info_dict': {
+ 'id': '191205_1800_sendung_sok8',
+ 'ext': 'mp4',
+ 'title': 'Das Geld anderer Leute',
+ 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
+ 'duration': 2581.0,
+ 'timestamp': 1654790700,
+ 'upload_date': '20220609',
+ 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
+ },
}]
def _extract_entry(self, url, player, content, video_id):
diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py
index 70eb3ccd1..d8d259dd6 100644
--- a/yt_dlp/extractor/zhihu.py
+++ b/yt_dlp/extractor/zhihu.py
@@ -58,7 +58,7 @@ class ZhihuIE(InfoExtractor):
'uploader': author.get('name'),
'timestamp': int_or_none(zvideo.get('published_at')),
'uploader_id': author.get('id'),
- 'uploader_url': format_field(url_token, template='https://www.zhihu.com/people/%s'),
+ 'uploader_url': format_field(url_token, None, 'https://www.zhihu.com/people/%s'),
'duration': float_or_none(video.get('duration')),
'view_count': int_or_none(zvideo.get('play_count')),
'like_count': int_or_none(zvideo.get('liked_count')),
diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py
index 70857b798..c95a0ff57 100644
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@@ -6,24 +6,22 @@ import re
from .utils import ExtractorError, remove_quotes
-_OPERATORS = [
- ('|', operator.or_),
- ('^', operator.xor),
- ('&', operator.and_),
- ('>>', operator.rshift),
- ('<<', operator.lshift),
- ('-', operator.sub),
- ('+', operator.add),
- ('%', operator.mod),
- ('/', operator.truediv),
- ('*', operator.mul),
-]
-_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS]
-_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right)))
-
-_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*'
+_NAME_RE = r'[a-zA-Z_$][\w$]*'
+_OPERATORS = {
+ '|': operator.or_,
+ '^': operator.xor,
+ '&': operator.and_,
+ '>>': operator.rshift,
+ '<<': operator.lshift,
+ '-': operator.sub,
+ '+': operator.add,
+ '%': operator.mod,
+ '/': operator.truediv,
+ '*': operator.mul,
+}
_MATCHING_PARENS = dict(zip('({[', ')}]'))
+_QUOTES = '\'"'
class JS_Break(ExtractorError):
@@ -49,13 +47,11 @@ class LocalNameSpace(collections.ChainMap):
class JSInterpreter:
+ __named_object_counter = 0
+
def __init__(self, code, objects=None):
- if objects is None:
- objects = {}
- self.code = code
- self._functions = {}
- self._objects = objects
- self.__named_object_counter = 0
+ self.code, self._functions = code, {}
+ self._objects = {} if objects is None else objects
def _named_object(self, namespace, obj):
self.__named_object_counter += 1
@@ -69,12 +65,17 @@ class JSInterpreter:
return
counters = {k: 0 for k in _MATCHING_PARENS.values()}
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
+ in_quote, escaping = None, False
for idx, char in enumerate(expr):
if char in _MATCHING_PARENS:
counters[_MATCHING_PARENS[char]] += 1
elif char in counters:
counters[char] -= 1
- if char != delim[pos] or any(counters.values()):
+ elif not escaping and char in _QUOTES and in_quote in (char, None):
+ in_quote = None if in_quote else char
+ escaping = not escaping and in_quote and char == '\\'
+
+ if char != delim[pos] or any(counters.values()) or in_quote:
pos = 0
continue
elif pos != delim_len:
@@ -87,9 +88,9 @@ class JSInterpreter:
break
yield expr[start:]
- @staticmethod
- def _separate_at_paren(expr, delim):
- separated = list(JSInterpreter._separate(expr, delim, 1))
+ @classmethod
+ def _separate_at_paren(cls, expr, delim):
+ separated = list(cls._separate(expr, delim, 1))
if len(separated) < 2:
raise ExtractorError(f'No terminating paren {delim} in {expr}')
return separated[0][1:].strip(), separated[1].strip()
@@ -98,33 +99,29 @@ class JSInterpreter:
if allow_recursion < 0:
raise ExtractorError('Recursion limit reached')
- sub_statements = list(self._separate(stmt, ';'))
- stmt = (sub_statements or ['']).pop()
+ should_abort = False
+ sub_statements = list(self._separate(stmt, ';')) or ['']
+ stmt = sub_statements.pop().lstrip()
+
for sub_stmt in sub_statements:
ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1)
if should_abort:
- return ret
+ return ret, should_abort
- should_abort = False
- stmt = stmt.lstrip()
- stmt_m = re.match(r'var\s', stmt)
- if stmt_m:
- expr = stmt[len(stmt_m.group(0)):]
+ m = re.match(r'(?P<var>var\s)|return(?:\s+|$)', stmt)
+ if not m: # Try interpreting it as an expression
+ expr = stmt
+ elif m.group('var'):
+ expr = stmt[len(m.group(0)):]
else:
- return_m = re.match(r'return(?:\s+|$)', stmt)
- if return_m:
- expr = stmt[len(return_m.group(0)):]
- should_abort = True
- else:
- # Try interpreting it as an expression
- expr = stmt
+ expr = stmt[len(m.group(0)):]
+ should_abort = True
- v = self.interpret_expression(expr, local_vars, allow_recursion)
- return v, should_abort
+ return self.interpret_expression(expr, local_vars, allow_recursion), should_abort
def interpret_expression(self, expr, local_vars, allow_recursion):
expr = expr.strip()
- if expr == '': # Empty expression
+ if not expr:
return None
if expr.startswith('{'):
@@ -150,8 +147,8 @@ class JSInterpreter:
for item in self._separate(inner)])
expr = name + outer
- m = re.match(r'try\s*', expr)
- if m:
+ m = re.match(r'(?P<try>try)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr)
+ if m and m.group('try'):
if expr[m.end()] == '{':
try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
else:
@@ -161,21 +158,19 @@ class JSInterpreter:
return ret
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
- m = re.match(r'catch\s*\(', expr)
- if m:
+ elif m and m.group('catch'):
# We ignore the catch block
_, expr = self._separate_at_paren(expr, '}')
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
- m = re.match(r'for\s*\(', expr)
- if m:
+ elif m and m.group('for'):
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
if remaining.startswith('{'):
body, expr = self._separate_at_paren(remaining, '}')
else:
- m = re.match(r'switch\s*\(', remaining) # FIXME
- if m:
- switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')')
+ switch_m = re.match(r'switch\s*\(', remaining) # FIXME
+ if switch_m:
+ switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
body, expr = self._separate_at_paren(remaining, '}')
body = 'switch(%s){%s}' % (switch_val, body)
else:
@@ -200,8 +195,7 @@ class JSInterpreter:
f'Premature return in the initialization of a for loop in {constructor!r}')
return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0]
- m = re.match(r'switch\s*\(', expr)
- if m:
+ elif m and m.group('switch'):
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
body, expr = self._separate_at_paren(remaining, '}')
@@ -244,55 +238,63 @@ class JSInterpreter:
ret = local_vars[var]
expr = expr[:start] + json.dumps(ret) + expr[end:]
- for op, opfunc in _ASSIGN_OPERATORS:
- m = re.match(rf'''(?x)
- (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?
- \s*{re.escape(op)}
- (?P<expr>.*)$''', expr)
- if not m:
- continue
- right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
+ if not expr:
+ return None
- if m.groupdict().get('index'):
- lvar = local_vars[m.group('out')]
- idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
- if not isinstance(idx, int):
- raise ExtractorError(f'List indices must be integers: {idx}')
- cur = lvar[idx]
- val = opfunc(cur, right_val)
- lvar[idx] = val
- return val
+ m = re.match(fr'''(?x)
+ (?P<assign>
+ (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
+ (?P<op>{"|".join(map(re.escape, _OPERATORS))})?
+ =(?P<expr>.*)$
+ )|(?P<return>
+ (?!if|return|true|false|null)(?P<name>{_NAME_RE})$
+ )|(?P<indexing>
+ (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
+ )|(?P<attribute>
+ (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
+ )|(?P<function>
+ (?P<fname>{_NAME_RE})\((?P<args>[\w$,]*)\)$
+ )''', expr)
+ if m and m.group('assign'):
+ if not m.group('op'):
+ opfunc = lambda curr, right: right
else:
- cur = local_vars.get(m.group('out'))
- val = opfunc(cur, right_val)
- local_vars[m.group('out')] = val
- return val
+ opfunc = _OPERATORS[m.group('op')]
+ right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion)
+ left_val = local_vars.get(m.group('out'))
+
+ if not m.group('index'):
+ local_vars[m.group('out')] = opfunc(left_val, right_val)
+ return local_vars[m.group('out')]
+ elif left_val is None:
+ raise ExtractorError(f'Cannot index undefined variable: {m.group("out")}')
+
+ idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion)
+ if not isinstance(idx, int):
+ raise ExtractorError(f'List indices must be integers: {idx}')
+ left_val[idx] = opfunc(left_val[idx], right_val)
+ return left_val[idx]
- if expr.isdigit():
+ elif expr.isdigit():
return int(expr)
- if expr == 'break':
+ elif expr == 'break':
raise JS_Break()
elif expr == 'continue':
raise JS_Continue()
- var_m = re.match(
- r'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE,
- expr)
- if var_m:
- return local_vars[var_m.group('name')]
+ elif m and m.group('return'):
+ return local_vars[m.group('name')]
with contextlib.suppress(ValueError):
return json.loads(expr)
- m = re.match(
- r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)
- if m:
+ if m and m.group('indexing'):
val = local_vars[m.group('in')]
idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion)
return val[idx]
- for op, opfunc in _OPERATORS:
+ for op, opfunc in _OPERATORS.items():
separated = list(self._separate(expr, op))
if len(separated) < 2:
continue
@@ -308,10 +310,7 @@ class JSInterpreter:
raise ExtractorError(f'Premature right-side return of {op} in {expr!r}')
return opfunc(left_val or 0, right_val)
- m = re.match(
- r'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE,
- expr)
- if m:
+ if m and m.group('attribute'):
variable = m.group('var')
member = remove_quotes(m.group('member') or m.group('member2'))
arg_str = expr[m.end():]
@@ -326,7 +325,6 @@ class JSInterpreter:
raise ExtractorError(f'{member} {msg}: {expr}')
def eval_method():
- nonlocal member
if variable == 'String':
obj = str
elif variable in local_vars:
@@ -336,8 +334,8 @@ class JSInterpreter:
self._objects[variable] = self.extract_object(variable)
obj = self._objects[variable]
+ # Member access
if arg_str is None:
- # Member access
if member == 'length':
return len(obj)
return obj[member]
@@ -412,9 +410,7 @@ class JSInterpreter:
except ValueError:
return -1
- if isinstance(obj, list):
- member = int(member)
- return obj[member](argvals)
+ return obj[int(member) if isinstance(obj, list) else member](argvals)
if remaining:
return self.interpret_expression(
@@ -423,9 +419,8 @@ class JSInterpreter:
else:
return eval_method()
- m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr)
- if m:
- fname = m.group('func')
+ elif m and m.group('function'):
+ fname = m.group('fname')
argvals = tuple(
int(v) if v.isdigit() else local_vars[v]
for v in self._separate(m.group('args')))
@@ -435,8 +430,7 @@ class JSInterpreter:
self._functions[fname] = self.extract_function(fname)
return self._functions[fname](argvals)
- if expr:
- raise ExtractorError('Unsupported JS expression %r' % expr)
+ raise ExtractorError(f'Unsupported JS expression {expr!r}')
def extract_object(self, objname):
_FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
@@ -465,14 +459,17 @@ class JSInterpreter:
""" @returns argnames, code """
func_m = re.search(
r'''(?x)
- (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
+ (?:
+ function\s+%(name)s|
+ [{;,]\s*%(name)s\s*=\s*function|
+ var\s+%(name)s\s*=\s*function
+ )\s*
\((?P<args>[^)]*)\)\s*
- (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % (
- re.escape(funcname), re.escape(funcname), re.escape(funcname)),
+ (?P<code>{(?:(?!};)[^"]|"([^"]|\\")*")+})''' % {'name': re.escape(funcname)},
self.code)
code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match
if func_m is None:
- raise ExtractorError('Could not find JS function %r' % funcname)
+ raise ExtractorError(f'Could not find JS function "{funcname}"')
return func_m.group('args').split(','), code
def extract_function(self, funcname):
@@ -486,11 +483,9 @@ class JSInterpreter:
break
start, body_start = mobj.span()
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
- name = self._named_object(
- local_vars,
- self.extract_function_from_code(
- [str.strip(x) for x in mobj.group('args').split(',')],
- body, local_vars, *global_stack))
+ name = self._named_object(local_vars, self.extract_function_from_code(
+ [x.strip() for x in mobj.group('args').split(',')],
+ body, local_vars, *global_stack))
code = code[:start] + name + remaining
return self.build_function(argnames, code, local_vars, *global_stack)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 91095f7f1..dddd5b15b 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1,29 +1,15 @@
-from __future__ import unicode_literals
-
-import os.path
+import collections
+import contextlib
import optparse
+import os.path
import re
+import shlex
+import shutil
+import string
import sys
-from .compat import (
- compat_expanduser,
- compat_get_terminal_size,
- compat_getenv,
- compat_kwargs,
- compat_shlex_split,
-)
-from .utils import (
- Config,
- expand_path,
- get_executable_path,
- OUTTMPL_TYPES,
- POSTPROCESS_WHEN,
- remove_end,
- write_string,
-)
+from .compat import compat_expanduser
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
-from .version import __version__
-
from .downloader.external import list_external_downloaders
from .postprocessor import (
FFmpegExtractAudioPP,
@@ -33,22 +19,27 @@ from .postprocessor import (
SponsorBlockPP,
)
from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE
+from .utils import (
+ OUTTMPL_TYPES,
+ POSTPROCESS_WHEN,
+ Config,
+ expand_path,
+ get_executable_path,
+ join_nonempty,
+ remove_end,
+ write_string,
+)
+from .version import __version__
def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
- parser = create_parser()
- root = Config(parser)
-
+ root = Config(create_parser())
if ignore_config_files == 'if_override':
ignore_config_files = overrideArguments is not None
- if overrideArguments:
- root.append_config(overrideArguments, label='Override')
- else:
- root.append_config(sys.argv[1:], label='Command-line')
def _readUserConf(package_name, default=[]):
# .config
- xdg_config_home = compat_getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
+ xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
userConfFile = os.path.join(xdg_config_home, package_name, 'config')
if not os.path.isfile(userConfFile):
userConfFile = os.path.join(xdg_config_home, '%s.conf' % package_name)
@@ -57,7 +48,7 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
return userConf, userConfFile
# appdata
- appdata_dir = compat_getenv('appdata')
+ appdata_dir = os.getenv('appdata')
if appdata_dir:
userConfFile = os.path.join(appdata_dir, package_name, 'config')
userConf = Config.read_file(userConfFile, default=None)
@@ -80,7 +71,7 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
def add_config(label, path, user=False):
""" Adds config and returns whether to continue """
- if root.parse_args()[0].ignoreconfig:
+ if root.parse_known_args()[0].ignoreconfig:
return False
# Multiple package names can be given here
# Eg: ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for
@@ -99,55 +90,131 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'):
def load_configs():
yield not ignore_config_files
yield add_config('Portable', get_executable_path())
- yield add_config('Home', expand_path(root.parse_args()[0].paths.get('home', '')).strip())
+ yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip())
yield add_config('User', None, user=True)
yield add_config('System', '/etc')
- if all(load_configs()):
- # If ignoreconfig is found inside the system configuration file,
- # the user configuration is removed
- if root.parse_args()[0].ignoreconfig:
- user_conf = next((i for i, conf in enumerate(root.configs) if conf.label == 'User'), None)
- if user_conf is not None:
- root.configs.pop(user_conf)
+ opts = optparse.Values({'verbose': True, 'print_help': False})
+ try:
+ try:
+ if overrideArguments:
+ root.append_config(overrideArguments, label='Override')
+ else:
+ root.append_config(sys.argv[1:], label='Command-line')
+ loaded_all_configs = all(load_configs())
+ except ValueError as err:
+ raise root.parser.error(err)
+
+ if loaded_all_configs:
+ # If ignoreconfig is found inside the system configuration file,
+ # the user configuration is removed
+ if root.parse_known_args()[0].ignoreconfig:
+ user_conf = next((i for i, conf in enumerate(root.configs) if conf.label == 'User'), None)
+ if user_conf is not None:
+ root.configs.pop(user_conf)
+
+ opts, args = root.parse_args()
+ except optparse.OptParseError:
+ with contextlib.suppress(optparse.OptParseError):
+ opts, _ = root.parse_known_args(strict=False)
+ raise
+ except (SystemExit, KeyboardInterrupt):
+ opts.verbose = False
+ raise
+ finally:
+ verbose = opts.verbose and f'\n{root}'.replace('\n| ', '\n[debug] ')[1:]
+ if verbose:
+ write_string(f'{verbose}\n')
+ if opts.print_help:
+ if verbose:
+ write_string('\n')
+ root.parser.print_help()
+ if opts.print_help:
+ sys.exit()
+ return root.parser, opts, args
- opts, args = root.parse_args()
- if opts.verbose:
- write_string(f'\n{root}'.replace('\n| ', '\n[debug] ')[1:] + '\n')
- return parser, opts, args
+
+class _YoutubeDLHelpFormatter(optparse.IndentedHelpFormatter):
+ def __init__(self):
+ # No need to wrap help messages if we're on a wide console
+ max_width = shutil.get_terminal_size().columns or 80
+ # The % is chosen to get a pretty output in README.md
+ super().__init__(width=max_width, max_help_position=int(0.45 * max_width))
+
+ @staticmethod
+ def format_option_strings(option):
+ """ ('-o', '--option') -> -o, --format METAVAR """
+ opts = join_nonempty(
+ option._short_opts and option._short_opts[0],
+ option._long_opts and option._long_opts[0],
+ delim=', ')
+ if option.takes_value():
+ opts += f' {option.metavar}'
+ return opts
class _YoutubeDLOptionParser(optparse.OptionParser):
# optparse is deprecated since python 3.2. So assume a stable interface even for private methods
+ ALIAS_TRIGGER_LIMIT = 100
+
+ def __init__(self):
+ super().__init__(
+ prog='yt-dlp' if detect_variant() == 'source' else None,
+ version=__version__,
+ usage='%prog [OPTIONS] URL [URL...]',
+ epilog='See full documentation at https://github.com/yt-dlp/yt-dlp#readme',
+ formatter=_YoutubeDLHelpFormatter(),
+ conflict_handler='resolve',
+ )
+
+ _UNKNOWN_OPTION = (optparse.BadOptionError, optparse.AmbiguousOptionError)
+ _BAD_OPTION = optparse.OptionValueError
+
+ def parse_known_args(self, args=None, values=None, strict=True):
+ """Same as parse_args, but ignore unknown switches. Similar to argparse.parse_known_args"""
+ self.rargs, self.largs = self._get_args(args), []
+ self.values = values or self.get_default_values()
+ while self.rargs:
+ arg = self.rargs[0]
+ try:
+ if arg == '--':
+ del self.rargs[0]
+ break
+ elif arg.startswith('--'):
+ self._process_long_opt(self.rargs, self.values)
+ elif arg.startswith('-') and arg != '-':
+ self._process_short_opts(self.rargs, self.values)
+ elif self.allow_interspersed_args:
+ self.largs.append(self.rargs.pop(0))
+ else:
+ break
+ except optparse.OptParseError as err:
+ if isinstance(err, self._UNKNOWN_OPTION):
+ self.largs.append(err.opt_str)
+ elif strict:
+ if isinstance(err, self._BAD_OPTION):
+ self.error(str(err))
+ raise
+ return self.check_values(self.values, self.largs)
+
+ def error(self, msg):
+ msg = f'{self.get_prog_name()}: error: {str(msg).strip()}\n'
+ raise optparse.OptParseError(f'{self.get_usage()}\n{msg}' if self.usage else msg)
+
+ def _get_args(self, args):
+ return sys.argv[1:] if args is None else list(args)
def _match_long_opt(self, opt):
"""Improve ambigious argument resolution by comparing option objects instead of argument strings"""
try:
return super()._match_long_opt(opt)
except optparse.AmbiguousOptionError as e:
- if len(set(self._long_opt[p] for p in e.possibilities)) == 1:
+ if len({self._long_opt[p] for p in e.possibilities}) == 1:
return e.possibilities[0]
raise
def create_parser():
- def _format_option_string(option):
- ''' ('-o', '--option') -> -o, --format METAVAR'''
-
- opts = []
-
- if option._short_opts:
- opts.append(option._short_opts[0])
- if option._long_opts:
- opts.append(option._long_opts[0])
- if len(opts) > 1:
- opts.insert(1, ', ')
-
- if option.takes_value():
- opts.append(' %s' % option.metavar)
-
- return ''.join(opts)
-
def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip):
# append can be True, False or -1 (prepend)
current = list(getattr(parser.values, option.dest)) if append else []
@@ -190,9 +257,9 @@ def create_parser():
out_dict = dict(getattr(parser.values, option.dest))
multiple_args = not isinstance(value, str)
if multiple_keys:
- allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys)
+ allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
mobj = re.match(
- r'(?i)(?P<keys>%s)%s(?P<val>.*)$' % (allowed_keys, delimiter),
+ fr'(?i)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
value[0] if multiple_args else value)
if mobj is not None:
keys, val = mobj.group('keys').split(','), mobj.group('val')
@@ -202,7 +269,7 @@ def create_parser():
keys, val = [default_key], value
else:
raise optparse.OptionValueError(
- 'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value))
+ f'wrong {opt_str} formatting; it should be {option.metavar}, not "{value}"')
try:
keys = map(process_key, keys) if process_key else keys
val = process(val) if process else val
@@ -212,30 +279,45 @@ def create_parser():
out_dict[key] = out_dict.get(key, []) + [val] if append else val
setattr(parser.values, option.dest, out_dict)
- # No need to wrap help messages if we're on a wide console
- columns = compat_get_terminal_size().columns
- max_width = columns if columns else 80
- # 47% is chosen because that is how README.md is currently formatted
- # and moving help text even further to the right is undesirable.
- # This can be reduced in the future to get a prettier output
- max_help_position = int(0.47 * max_width)
+ parser = _YoutubeDLOptionParser()
+ alias_group = optparse.OptionGroup(parser, 'Aliases')
+ Formatter = string.Formatter()
- fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
- fmt.format_option_strings = _format_option_string
+ def _create_alias(option, opt_str, value, parser):
+ aliases, opts = value
+ try:
+ nargs = len({i if f == '' else f
+ for i, (_, f, _, _) in enumerate(Formatter.parse(opts)) if f is not None})
+ opts.format(*map(str, range(nargs))) # validate
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} OPTIONS formatting; {err}')
+ if alias_group not in parser.option_groups:
+ parser.add_option_group(alias_group)
- kw = {
- 'version': __version__,
- 'formatter': fmt,
- 'usage': '%prog [OPTIONS] URL [URL...]',
- 'conflict_handler': 'resolve',
- }
+ aliases = (x if x.startswith('-') else f'--{x}' for x in map(str.strip, aliases.split(',')))
+ try:
+ alias_group.add_option(
+ *aliases, help=opts, nargs=nargs, type='str' if nargs else None,
+ dest='_triggered_aliases', default=collections.defaultdict(int),
+ metavar=' '.join(f'ARG{i}' for i in range(nargs)), action='callback',
+ callback=_alias_callback, callback_kwargs={'opts': opts, 'nargs': nargs})
+ except Exception as err:
+ raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}')
- parser = _YoutubeDLOptionParser(**compat_kwargs(kw))
+ def _alias_callback(option, opt_str, value, parser, opts, nargs):
+ counter = getattr(parser.values, option.dest)
+ counter[opt_str] += 1
+ if counter[opt_str] > parser.ALIAS_TRIGGER_LIMIT:
+ raise optparse.OptionValueError(f'Alias {opt_str} exceeded invocation limit')
+ if nargs == 1:
+ value = [value]
+ assert (nargs == 0 and value is None) or len(value) == nargs
+ parser.rargs[:0] = shlex.split(
+ opts if value is None else opts.format(*map(shlex.quote, value)))
general = optparse.OptionGroup(parser, 'General Options')
general.add_option(
- '-h', '--help',
- action='help',
+ '-h', '--help', dest='print_help', action='store_true',
help='Print this help text and exit')
general.add_option(
'--version',
@@ -272,7 +354,12 @@ def create_parser():
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for the search term "large apple". Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching')
+ help=(
+ 'Use this prefix for unqualified URLs. '
+ 'Eg: "gvsearch2:python" downloads two videos from google videos for the search term "python". '
+ 'Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). '
+ '"error" just throws an error. The default value "fixup_error" repairs broken URLs, '
+ 'but emits an error if this is not possible instead of searching'))
general.add_option(
'--ignore-config', '--no-config',
action='store_true', dest='ignoreconfig',
@@ -290,8 +377,8 @@ def create_parser():
'--config-locations',
dest='config_locations', metavar='PATH', action='append',
help=(
- 'Location of the main configuration file; either the path to the config or its containing directory. '
- 'Can be used multiple times and inside other configuration files'))
+ 'Location of the main configuration file; either the path to the config or its containing directory '
+ '("-" for stdin). Can be used multiple times and inside other configuration files'))
general.add_option(
'--flat-playlist',
action='store_const', dest='extract_flat', const='in_playlist', default=False,
@@ -348,16 +435,26 @@ def create_parser():
'Options that can help keep compatibility with youtube-dl or youtube-dlc '
'configurations by reverting some of the changes made in yt-dlp. '
'See "Differences in default behavior" for details'))
+ general.add_option(
+ '--alias', metavar='ALIASES OPTIONS', dest='_', type='str', nargs=2,
+ action='callback', callback=_create_alias,
+ help=(
+ 'Create aliases for an option string. Unless an alias starts with a dash "-", it is prefixed with "--". '
+ 'Arguments are parsed according to the Python string formatting mini-language. '
+ 'Eg: --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options '
+ '"--get-audio" and "-X" that takes an argument (ARG0) and expands to '
+ '"-S=aext:ARG0,abr -x --audio-format ARG0". All defined aliases are listed in the --help output. '
+ 'Alias options can trigger more aliases; so be carefull to avoid defining recursive options. '
+ f'As a safety measure, each alias may be triggered a maximum of {_YoutubeDLOptionParser.ALIAS_TRIGGER_LIMIT} times. '
+ 'This option can be used multiple times'))
network = optparse.OptionGroup(parser, 'Network Options')
network.add_option(
'--proxy', dest='proxy',
default=None, metavar='URL',
help=(
- 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
- 'SOCKS proxy, specify a proper scheme. For example '
- 'socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") '
- 'for direct connection'))
+ 'Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme. '
+ 'Eg: socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection'))
network.add_option(
'--socket-timeout',
dest='socket_timeout', type=float, default=None, metavar='SECONDS',
@@ -410,15 +507,19 @@ def create_parser():
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
- help='Playlist video to start at (default is %default)')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
- help='Playlist video to end at (default is last)')
+ help=optparse.SUPPRESS_HELP)
selection.add_option(
- '--playlist-items',
+ '-I', '--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13')
+ help=(
+ 'Comma seperated playlist_index of the videos to download. '
+ 'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. '
+ 'Use negative indices to count from the right and negative STEP to download in reverse order. '
+ 'Eg: "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15'))
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
@@ -439,9 +540,8 @@ def create_parser():
'--date',
metavar='DATE', dest='date', default=None,
help=(
- 'Download only videos uploaded on this date. '
- 'The date can be "YYYYMMDD" or in the format '
- '"(now|today)[+-][0-9](day|week|month|year)(s)?"'))
+ 'Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format '
+ '[now|today|yesterday][-N[day|week|month|year]]. Eg: --date today-2weeks'))
selection.add_option(
'--datebefore',
metavar='DATE', dest='datebefore', default=None,
@@ -466,7 +566,7 @@ def create_parser():
'--match-filters',
metavar='FILTER', dest='match_filter', action='append',
help=(
- 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
+ 'Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a '
'number or a string using the operators defined in "Filtering formats". '
'You can also simply specify a field to match if the field is present, '
'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
@@ -475,7 +575,8 @@ def create_parser():
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'matches only videos that are not live OR those that have a like count more than 100 '
'(or the like field is not available) and also has a description '
- 'that contains the phrase "cats & dogs" (ignoring case)'))
+ 'that contains the phrase "cats & dogs" (caseless). '
+ 'Use "--match-filter -" to interactively ask whether to download each video'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,
@@ -515,11 +616,11 @@ def create_parser():
selection.add_option(
'--break-per-input',
action='store_true', dest='break_per_url', default=False,
- help='Make --break-on-existing and --break-on-reject act only on the current input URL')
+ help='Make --break-on-existing, --break-on-reject and --max-downloads act only on the current input URL')
selection.add_option(
'--no-break-per-input',
action='store_false', dest='break_per_url',
- help='--break-on-existing and --break-on-reject terminates the entire download queue')
+ help='--break-on-existing and similar options terminates the entire download queue')
selection.add_option(
'--skip-playlist-after-errors', metavar='N',
dest='skip_playlist_after_errors', default=None, type=int,
@@ -574,6 +675,19 @@ def create_parser():
'--ap-list-mso',
action='store_true', dest='ap_list_mso', default=False,
help='List all supported multiple-system operators')
+ authentication.add_option(
+ '--client-certificate',
+ dest='client_certificate', metavar='CERTFILE',
+ help='Path to client certificate file in PEM format. May include the private key')
+ authentication.add_option(
+ '--client-certificate-key',
+ dest='client_certificate_key', metavar='KEYFILE',
+ help='Path to private key file for client certificate')
+ authentication.add_option(
+ '--client-certificate-password',
+ dest='client_certificate_password', metavar='PASSWORD',
+ help='Password for client certificate private key, if encrypted. '
+ 'If not provided, and the key is encrypted, yt-dlp will ask interactively')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
video_format.add_option(
@@ -590,13 +704,11 @@ def create_parser():
action='store_true', dest='format_sort_force', metavar='FORMAT', default=False,
help=(
'Force user specified sort order to have precedence over all fields, '
- 'see "Sorting Formats" for more details'))
+ 'see "Sorting Formats" for more details (Alias: --S-force)'))
video_format.add_option(
'--no-format-sort-force',
action='store_false', dest='format_sort_force', metavar='FORMAT', default=False,
- help=(
- 'Some fields have precedence over the user specified sort order (default), '
- 'see "Sorting Formats" for more details'))
+ help='Some fields have precedence over the user specified sort order (default)')
video_format.add_option(
'--video-multistreams',
action='store_true', dest='allow_multiple_video_streams', default=None,
@@ -695,14 +807,14 @@ def create_parser():
subtitles.add_option(
'--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
- help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
+ help='Subtitle format; accepts formats preference, Eg: "srt" or "ass/srt/best"')
subtitles.add_option(
'--sub-langs', '--srt-langs',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
default=[], callback=_list_from_options_callback,
help=(
'Languages of the subtitles to download (can be regex) or "all" separated by commas. (Eg: --sub-langs "en.*,ja") '
- 'You can prefix the language code with a "-" to exempt it from the requested languages. (Eg: --sub-langs all,-live_chat) '
+ 'You can prefix the language code with a "-" to exclude it from the requested languages. (Eg: --sub-langs all,-live_chat) '
'Use --list-subs for a list of available language tags'))
downloader = optparse.OptionGroup(parser, 'Download Options')
@@ -731,13 +843,26 @@ def create_parser():
dest='fragment_retries', metavar='RETRIES', default=10,
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
downloader.add_option(
+ '--retry-sleep',
+ dest='retry_sleep', metavar='[TYPE:]EXPR', default={}, type='str',
+ action='callback', callback=_dict_from_options_callback,
+ callback_kwargs={
+ 'allowed_keys': 'http|fragment|file_access',
+ 'default_key': 'http',
+ }, help=(
+ 'An expression for the time to sleep between retries in seconds (optionally) prefixed '
+ 'by the type of retry (file_access, fragment, http (default)) to apply the sleep to. '
+ 'EXPR can be a number, linear=START[:END[:STEP=1]] or exp=START[:END[:BASE=2]]. '
+ 'This option can be used multiple times to set the sleep for the different retry types. '
+ 'Eg: --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20'))
+ downloader.add_option(
'--skip-unavailable-fragments', '--no-abort-on-unavailable-fragment',
action='store_true', dest='skip_unavailable_fragments', default=True,
- help='Skip unavailable fragments for DASH, hlsnative and ISM (default) (Alias: --no-abort-on-unavailable-fragment)')
+ help='Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragment)')
downloader.add_option(
'--abort-on-unavailable-fragment', '--no-skip-unavailable-fragments',
action='store_false', dest='skip_unavailable_fragments',
- help='Abort downloading if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
+ help='Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments)')
downloader.add_option(
'--keep-fragments',
action='store_true', dest='keep_fragments', default=False,
@@ -770,17 +895,25 @@ def create_parser():
help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--playlist-reverse',
- action='store_true',
- help='Download playlist videos in reverse order')
+ action='store_true', dest='playlist_reverse',
+ help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--no-playlist-reverse',
action='store_false', dest='playlist_reverse',
- help='Download playlist videos in default order (default)')
+ help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--playlist-random',
- action='store_true',
+ action='store_true', dest='playlist_random',
help='Download playlist videos in random order')
downloader.add_option(
+ '--lazy-playlist',
+ action='store_true', dest='lazy_playlist',
+ help='Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse')
+ downloader.add_option(
+ '--no-lazy-playlist',
+ action='store_false', dest='lazy_playlist',
+ help='Process videos in the playlist only after the entire playlist is parsed (default)')
+ downloader.add_option(
'--xattr-set-filesize',
dest='xattr_set_filesize', action='store_true',
help='Set file xattribute ytdl.filesize with expected file size')
@@ -807,6 +940,14 @@ def create_parser():
'Do not use the mpegts container for HLS videos. '
'This is default when not downloading live streams'))
downloader.add_option(
+ '--download-sections',
+ metavar='REGEX', dest='download_ranges', action='append',
+ help=(
+ 'Download only chapters whose title matches the given regular expression. '
+ 'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
+ 'Eg: --download-sections "*10:15-15:00" --download-sections "intro". '
+ 'Needs ffmpeg. This option can be used multiple times to download multiple sections'))
+ downloader.add_option(
'--downloader', '--external-downloader',
dest='external_downloader', metavar='[PROTO:]NAME', default={}, type='str',
action='callback', callback=_dict_from_options_callback,
@@ -817,11 +958,11 @@ def create_parser():
}, help=(
'Name or path of the external downloader to use (optionally) prefixed by '
'the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. '
- 'Currently supports native, %s (Recommended: aria2c). '
+ f'Currently supports native, {", ".join(list_external_downloaders())}. '
'You can use this option multiple times to set different downloaders for different protocols. '
'For example, --downloader aria2c --downloader "dash,m3u8:native" will use '
'aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads '
- '(Alias: --external-downloader)' % ', '.join(list_external_downloaders())))
+ '(Alias: --external-downloader)'))
downloader.add_option(
'--downloader-args', '--external-downloader-args',
metavar='NAME:ARGS', dest='external_downloader_args', default={}, type='str',
@@ -829,7 +970,7 @@ def create_parser():
callback_kwargs={
'allowed_keys': r'ffmpeg_[io]\d*|%s' % '|'.join(map(re.escape, list_external_downloaders())),
'default_key': 'default',
- 'process': compat_shlex_split
+ 'process': shlex.split
}, help=(
'Give these arguments to the external downloader. '
'Specify the downloader name and the arguments separated by a colon ":". '
@@ -936,7 +1077,8 @@ def create_parser():
}, help=(
'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". '
'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). '
- 'Implies --quiet and --simulate (unless --no-simulate is used). This option can be used multiple times'))
+ 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. '
+ 'This option can be used multiple times'))
verbosity.add_option(
'--print-to-file',
metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2,
@@ -1044,6 +1186,10 @@ def create_parser():
action='store_true', dest='write_pages', default=False,
help='Write downloaded intermediary pages to files in the current directory to debug problems')
verbosity.add_option(
+ '--load-pages',
+ action='store_true', dest='load_pages', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
'--youtube-print-sig-code',
action='store_true', dest='youtube_print_sig_code', default=False,
help=optparse.SUPPRESS_HELP)
@@ -1054,7 +1200,7 @@ def create_parser():
verbosity.add_option(
'-C', '--call-home',
dest='call_home', action='store_true', default=False,
- # help='[Broken] Contact the yt-dlp server for debugging')
+ # help='Contact the yt-dlp server for debugging')
help=optparse.SUPPRESS_HELP)
verbosity.add_option(
'--no-call-home',
@@ -1102,7 +1248,7 @@ def create_parser():
filesystem.add_option(
'--output-na-placeholder',
dest='outtmpl_na_placeholder', metavar='TEXT', default='NA',
- help=('Placeholder value for unavailable meta fields in output filename template (default: "%default")'))
+ help=('Placeholder for unavailable fields in "OUTPUT TEMPLATE" (default: "%default")'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER', type=int,
@@ -1308,26 +1454,27 @@ def create_parser():
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help=(
- 'Specify audio format to convert the audio to when -x is used. Currently supported formats are: '
- 'best (default) or one of %s' % ', '.join(FFmpegExtractAudioPP.SUPPORTED_EXTS)))
+ 'Format to convert the audio to when -x is used. '
+ f'(currently supported: best (default), {", ".join(FFmpegExtractAudioPP.SUPPORTED_EXTS)}). '
+ 'You can specify multiple rules using similar syntax as --remux-video'))
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
- help='Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)')
+ help=(
+ 'Specify ffmpeg audio quality to use when converting the audio with -x. '
+ 'Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)'))
postproc.add_option(
'--remux-video',
metavar='FORMAT', dest='remuxvideo', default=None,
help=(
- 'Remux the video into another container if necessary (currently supported: %s). '
- 'If target container does not support the video/audio codec, remuxing will fail. '
- 'You can specify multiple rules; Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 '
- 'and anything else to mkv.' % ', '.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)))
+ 'Remux the video into another container if necessary '
+ f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). '
+ 'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; '
+ 'Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv'))
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
- help=(
- 'Re-encode the video into another format if re-encoding is necessary. '
- 'The syntax and supported formats are the same as --remux-video'))
+ help='Re-encode the video into another format if necessary. The syntax and supported formats are the same as --remux-video')
postproc.add_option(
'--postprocessor-args', '--ppa',
metavar='NAME:ARGS', dest='postprocessor_args', default={}, type='str',
@@ -1335,7 +1482,7 @@ def create_parser():
callback_kwargs={
'allowed_keys': r'\w+(?:\+\w+)?',
'default_key': 'default-compat',
- 'process': compat_shlex_split,
+ 'process': shlex.split,
'multiple_keys': False
}, help=(
'Give these arguments to the postprocessors. '
@@ -1424,7 +1571,7 @@ def create_parser():
dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3,
help='Replace text in a metadata field using the given regex. This option can be used multiple times')
postproc.add_option(
- '--xattrs',
+ '--xattrs', '--xattr',
action='store_true', dest='xattrs', default=False,
help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
postproc.add_option(
@@ -1497,7 +1644,8 @@ def create_parser():
metavar='FORMAT', dest='convertthumbnails', default=None,
help=(
'Convert the thumbnails to another format '
- '(currently supported: %s) ' % ', '.join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)))
+ f'(currently supported: {", ".join(FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS)}). '
+ 'You can specify multiple rules using similar syntax as --remux-video'))
postproc.add_option(
'--split-chapters', '--split-tracks',
dest='split_chapters', action='store_true', default=False,
@@ -1514,9 +1662,7 @@ def create_parser():
metavar='REGEX', dest='remove_chapters', action='append',
help=(
'Remove chapters whose title matches the given regular expression. '
- 'Time ranges prefixed by a "*" can also be used in place of chapters to remove the specified range. '
- 'Eg: --remove-chapters "*10:15-15:00" --remove-chapters "intro". '
- 'This option can be used multiple times'))
+ 'The syntax is the same as --download-sections. This option can be used multiple times'))
postproc.add_option(
'--no-remove-chapters', dest='remove_chapters', action='store_const', const=None,
help='Do not remove any chapters from the file (default)')
@@ -1524,9 +1670,8 @@ def create_parser():
'--force-keyframes-at-cuts',
action='store_true', dest='force_keyframes_at_cuts', default=False,
help=(
- 'Force keyframes around the chapters before removing/splitting them. '
- 'Requires a re-encode and thus is very slow, but the resulting video '
- 'may have fewer artifacts around the cuts'))
+ 'Force keyframes at cuts when downloading/splitting/removing sections. '
+ 'This is slow due to needing a re-encode, but the resulting video may have fewer artifacts around the cuts'))
postproc.add_option(
'--no-force-keyframes-at-cuts',
action='store_false', dest='force_keyframes_at_cuts',
@@ -1564,8 +1709,8 @@ def create_parser():
'aliases': {'default': ['all']}
}, help=(
'SponsorBlock categories to create chapters for, separated by commas. '
- f'Available categories are all, default(=all), {", ".join(SponsorBlockPP.CATEGORIES.keys())}. '
- 'You can prefix the category with a "-" to exempt it. See [1] for description of the categories. '
+ f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). '
+ 'You can prefix the category with a "-" to exclude it. See [1] for description of the categories. '
'Eg: --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
sponsorblock.add_option(
'--sponsorblock-remove', metavar='CATS',
@@ -1586,9 +1731,9 @@ def create_parser():
'--sponsorblock-chapter-title', metavar='TEMPLATE',
default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title',
help=(
- 'The title template for SponsorBlock chapters created by --sponsorblock-mark. '
- 'The same syntax as the output template is used, but the only available fields are '
- 'start_time, end_time, category, categories, name, category_names. Defaults to "%default"'))
+ 'An output template for the title of the SponsorBlock chapters created by --sponsorblock-mark. '
+ 'The only available fields are start_time, end_time, category, categories, name, category_names. '
+ 'Defaults to "%default"'))
sponsorblock.add_option(
'--no-sponsorblock', default=False,
action='store_true', dest='no_sponsorblock',
diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
index addc46e5b..7c63fe8a4 100644
--- a/yt_dlp/postprocessor/common.py
+++ b/yt_dlp/postprocessor/common.py
@@ -45,9 +45,6 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
an initial argument and then with the returned value of the previous
PostProcessor.
- The chain will be stopped if one of them ever returns None or the end
- of the chain is reached.
-
PostProcessor objects follow a "mutual registration" process similar
to InfoExtractor objects.
@@ -176,6 +173,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
def report_progress(self, s):
s['_default_template'] = '%(postprocessor)s %(status)s' % s
+ if not self._downloader:
+ return
progress_dict = s.copy()
progress_dict.pop('info_dict')
@@ -184,7 +183,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
progress_template = self.get_param('progress_template', {})
tmpl = progress_template.get('postprocess')
if tmpl:
- self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict))
+ self._downloader.to_screen(
+ self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False)
self._downloader.to_console_title(self._downloader.evaluate_outtmpl(
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
@@ -213,5 +213,5 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
-class AudioConversionError(PostProcessingError):
+class AudioConversionError(PostProcessingError): # Deprecated
pass
diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
index d36e0008e..606d90d3d 100644
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -1,11 +1,11 @@
import base64
-import imghdr
import os
import re
import subprocess
from .common import PostProcessor
from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP
+from ..compat import imghdr
from ..dependencies import mutagen
from ..utils import (
Popen,
@@ -157,14 +157,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self._report_run('atomicparsley', filename)
self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate_or_kill()
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {msg}')
+ stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode:
+ self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
# for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file
- if b'No changes' in stdout:
+ if 'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index d1d8e1687..d0a917379 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -6,8 +6,8 @@ import re
import subprocess
import time
-from .common import AudioConversionError, PostProcessor
-from ..compat import compat_str
+from .common import PostProcessor
+from ..compat import functools, imghdr
from ..utils import (
ISO639Utils,
Popen,
@@ -18,6 +18,7 @@ from ..utils import (
dfxp2srt,
encodeArgument,
encodeFilename,
+ filter_dict,
float_or_none,
is_outdated_version,
orderedSet,
@@ -27,6 +28,7 @@ from ..utils import (
traverse_obj,
variadic,
write_json_file,
+ write_string,
)
EXT_TO_OUT_FORMATS = {
@@ -43,17 +45,37 @@ EXT_TO_OUT_FORMATS = {
'vtt': 'webvtt',
}
ACODECS = {
- 'mp3': 'libmp3lame',
- 'aac': 'aac',
- 'flac': 'flac',
- 'm4a': 'aac',
- 'opus': 'libopus',
- 'vorbis': 'libvorbis',
- 'wav': None,
- 'alac': None,
+ # name: (ext, encoder, opts)
+ 'mp3': ('mp3', 'libmp3lame', ()),
+ 'aac': ('m4a', 'aac', ('-f', 'adts')),
+ 'm4a': ('m4a', 'aac', ('-bsf:a', 'aac_adtstoasc')),
+ 'opus': ('opus', 'libopus', ()),
+ 'vorbis': ('ogg', 'libvorbis', ()),
+ 'flac': ('flac', 'flac', ()),
+ 'alac': ('m4a', None, ('-acodec', 'alac')),
+ 'wav': ('wav', None, ('-f', 'wav')),
}
+def create_mapping_re(supported):
+ return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported)))
+
+
+def resolve_mapping(source, mapping):
+ """
+ Get corresponding item from a mapping string like 'A>B/C>D/E'
+ @returns (target, error_message)
+ """
+ for pair in mapping.lower().split('/'):
+ kv = pair.split('>', 1)
+ if len(kv) == 1 or kv[0].strip() == source:
+ target = kv[-1].strip()
+ if target == source:
+ return target, f'already is in target format {source}'
+ return target, None
+ return None, f'could not find a mapping for {source}'
+
+
class FFmpegPostProcessorError(PostProcessingError):
pass
@@ -61,16 +83,8 @@ class FFmpegPostProcessorError(PostProcessingError):
class FFmpegPostProcessor(PostProcessor):
def __init__(self, downloader=None):
PostProcessor.__init__(self, downloader)
- self._determine_executables()
-
- def check_version(self):
- if not self.available:
- raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
-
- required_version = '10-0' if self.basename == 'avconv' else '1.0'
- if is_outdated_version(self._versions[self.basename], required_version):
- self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} '
- f'to version {required_version} or newer if you encounter any errors')
+ self._prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
+ self._paths = self._determine_executables()
@staticmethod
def get_versions_and_features(downloader=None):
@@ -81,88 +95,99 @@ class FFmpegPostProcessor(PostProcessor):
def get_versions(downloader=None):
return FFmpegPostProcessor.get_versions_and_features(downloader)[0]
- _version_cache, _features_cache = {}, {}
+ _ffmpeg_to_avconv = {'ffmpeg': 'avconv', 'ffprobe': 'avprobe'}
def _determine_executables(self):
- programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
-
- def get_ffmpeg_version(path, prog):
- if path in self._version_cache:
- self._versions[prog], self._features = self._version_cache[path], self._features_cache.get(path, {})
- return
- out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug)
- ver = detect_exe_version(out) if out else False
- if ver:
- regexs = [
- r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
- r'n([0-9.]+)$', # Arch Linux
- # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
- ]
- for regex in regexs:
- mobj = re.match(regex, ver)
- if mobj:
- ver = mobj.group(1)
- self._versions[prog] = self._version_cache[path] = ver
- if prog != 'ffmpeg' or not out:
- return
+ programs = [*self._ffmpeg_to_avconv.keys(), *self._ffmpeg_to_avconv.values()]
- mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
- lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
- self._features = self._features_cache[path] = {
- 'fdk': '--enable-libfdk-aac' in out,
- 'setts': 'setts' in out.splitlines(),
- 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
- }
-
- self.basename = None
- self.probe_basename = None
- self._paths = None
- self._versions = None
- self._features = {}
-
- prefer_ffmpeg = self.get_param('prefer_ffmpeg', True)
location = self.get_param('ffmpeg_location')
if location is None:
- self._paths = {p: p for p in programs}
+ return {p: p for p in programs}
+
+ if not os.path.exists(location):
+ self.report_warning(f'ffmpeg-location {location} does not exist! Continuing without ffmpeg')
+ return {}
+ elif os.path.isdir(location):
+ dirname, basename = location, None
else:
- if not os.path.exists(location):
- self.report_warning(
- 'ffmpeg-location %s does not exist! '
- 'Continuing without ffmpeg.' % (location))
- self._versions = {}
- return
- elif os.path.isdir(location):
- dirname, basename = location, None
- else:
- basename = os.path.splitext(os.path.basename(location))[0]
- basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
- dirname = os.path.dirname(os.path.abspath(location))
- if basename in ('ffmpeg', 'ffprobe'):
- prefer_ffmpeg = True
-
- self._paths = {
- p: os.path.join(dirname, p) for p in programs}
- if basename:
- self._paths[basename] = location
-
- self._versions = {}
- # NB: probe must be first for _features to be poulated correctly
- executables = {'probe_basename': ('ffprobe', 'avprobe'), 'basename': ('ffmpeg', 'avconv')}
- if prefer_ffmpeg is False:
- executables = {k: v[::-1] for k, v in executables.items()}
- for var, prefs in executables.items():
- for p in prefs:
- get_ffmpeg_version(self._paths[p], p)
- if self._versions[p]:
- setattr(self, var, p)
- break
-
- if self.basename == 'avconv':
- self.deprecation_warning(
- 'Support for avconv is deprecated and may be removed in a future version. Use ffmpeg instead')
- if self.probe_basename == 'avprobe':
+ basename = os.path.splitext(os.path.basename(location))[0]
+ basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg')
+ dirname = os.path.dirname(os.path.abspath(location))
+ if basename in self._ffmpeg_to_avconv.keys():
+ self._prefer_ffmpeg = True
+
+ paths = {p: os.path.join(dirname, p) for p in programs}
+ if basename:
+ paths[basename] = location
+ return paths
+
+ _version_cache, _features_cache = {None: None}, {}
+
+ def _get_ffmpeg_version(self, prog):
+ path = self._paths.get(prog)
+ if path in self._version_cache:
+ return self._version_cache[path], self._features_cache.get(path, {})
+ out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug)
+ ver = detect_exe_version(out) if out else False
+ if ver:
+ regexs = [
+ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
+ r'n([0-9.]+)$', # Arch Linux
+ # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+ ]
+ for regex in regexs:
+ mobj = re.match(regex, ver)
+ if mobj:
+ ver = mobj.group(1)
+ self._version_cache[path] = ver
+ if prog != 'ffmpeg' or not out:
+ return ver, {}
+
+ mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
+ lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
+ self._features_cache[path] = features = {
+ 'fdk': '--enable-libfdk-aac' in out,
+ 'setts': 'setts' in out.splitlines(),
+ 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
+ }
+ return ver, features
+
+ @property
+ def _versions(self):
+ return filter_dict({self.basename: self._version, self.probe_basename: self._probe_version})
+
+ @functools.cached_property
+ def basename(self):
+ self._version # run property
+ return self.basename
+
+ @functools.cached_property
+ def probe_basename(self):
+ self._probe_version # run property
+ return self.probe_basename
+
+ def _get_version(self, kind):
+ executables = (kind, self._ffmpeg_to_avconv[kind])
+ if not self._prefer_ffmpeg:
+ executables = reversed(executables)
+ basename, version, features = next(filter(
+ lambda x: x[1], ((p, *self._get_ffmpeg_version(p)) for p in executables)), (None, None, {}))
+ if kind == 'ffmpeg':
+ self.basename, self._features = basename, features
+ else:
+ self.probe_basename = basename
+ if basename == self._ffmpeg_to_avconv[kind]:
self.deprecation_warning(
- 'Support for avprobe is deprecated and may be removed in a future version. Use ffprobe instead')
+ f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and may be removed in a future version. Use {kind} instead')
+ return version
+
+ @functools.cached_property
+ def _version(self):
+ return self._get_version('ffmpeg')
+
+ @functools.cached_property
+ def _probe_version(self):
+ return self._get_version('ffprobe')
@property
def available(self):
@@ -170,7 +195,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def executable(self):
- return self._paths[self.basename]
+ return self._paths.get(self.basename)
@property
def probe_available(self):
@@ -178,7 +203,7 @@ class FFmpegPostProcessor(PostProcessor):
@property
def probe_executable(self):
- return self._paths[self.probe_basename]
+ return self._paths.get(self.probe_basename)
@staticmethod
def stream_copy_opts(copy=True, *, ext=None):
@@ -191,6 +216,15 @@ class FFmpegPostProcessor(PostProcessor):
if ext in ('mp4', 'mov', 'm4a'):
yield from ('-c:s', 'mov_text')
+ def check_version(self):
+ if not self.available:
+ raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location')
+
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
+ if is_outdated_version(self._version, required_version):
+ self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} '
+ f'to version {required_version} or newer if you encounter any errors')
+
def get_audio_codec(self, path):
if not self.probe_available and not self.available:
raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location')
@@ -205,14 +239,13 @@ class FFmpegPostProcessor(PostProcessor):
encodeArgument('-i')]
cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}')
- handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout_data, stderr_data = handle.communicate_or_kill()
- expected_ret = 0 if self.probe_available else 1
- if handle.wait() != expected_ret:
+ stdout, stderr, returncode = Popen.run(
+ cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if returncode != (0 if self.probe_available else 1):
return None
except OSError:
return None
- output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
+ output = stdout if self.probe_available else stderr
if self.probe_available:
audio_codec = None
for line in output.split('\n'):
@@ -246,11 +279,10 @@ class FFmpegPostProcessor(PostProcessor):
]
cmd += opts
- cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
- self.write_debug('ffprobe command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate()
- return json.loads(stdout.decode('utf-8', 'replace'))
+ cmd.append(self._ffmpeg_filename_argument(path))
+ self.write_debug(f'ffprobe command line: {shell_quote(cmd)}')
+ stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ return json.loads(stdout)
def get_stream_number(self, path, keys, value):
streams = self.get_metadata_object(path)['streams']
@@ -270,12 +302,12 @@ class FFmpegPostProcessor(PostProcessor):
if fatal:
raise PostProcessingError(f'Unable to determine video duration: {e.msg}')
- def _duration_mismatch(self, d1, d2):
+ def _duration_mismatch(self, d1, d2, tolerance=2):
if not d1 or not d2:
return None
# The duration is often only known to nearest second. So there can be <1sec disparity natually.
# Further excuse an additional <1sec difference.
- return abs(d1 - d2) > 2
+ return abs(d1 - d2) > tolerance
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs):
return self.real_run_ffmpeg(
@@ -312,16 +344,15 @@ class FFmpegPostProcessor(PostProcessor):
for i, (path, opts) in enumerate(path_opts) if path)
self.write_debug('ffmpeg command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate_or_kill()
- if p.returncode not in variadic(expected_retcodes):
- stderr = stderr.decode('utf-8', 'replace').strip()
+ _, stderr, returncode = Popen.run(
+ cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ if returncode not in variadic(expected_retcodes):
self.write_debug(stderr)
- raise FFmpegPostProcessorError(stderr.split('\n')[-1])
+ raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1])
for out_path, _ in output_path_opts:
if out_path:
self.try_utime(out_path, oldest_mtime, oldest_mtime)
- return stderr.decode('utf-8', 'replace')
+ return stderr
def run_ffmpeg(self, path, out_path, opts, **kwargs):
return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs)
@@ -391,11 +422,12 @@ class FFmpegPostProcessor(PostProcessor):
class FFmpegExtractAudioPP(FFmpegPostProcessor):
COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
- SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac')
+ SUPPORTED_EXTS = tuple(ACODECS.keys())
+ FORMAT_RE = create_mapping_re(('best', *SUPPORTED_EXTS))
def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
FFmpegPostProcessor.__init__(self, downloader)
- self._preferredcodec = preferredcodec or 'best'
+ self.mapping = preferredcodec or 'best'
self._preferredquality = float_or_none(preferredquality)
self._nopostoverwrites = nopostoverwrites
@@ -430,71 +462,47 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
try:
FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
except FFmpegPostProcessorError as err:
- raise AudioConversionError(err.msg)
+ raise PostProcessingError(f'audio conversion failed: {err.msg}')
@PostProcessor._restrict_to(images=False)
def run(self, information):
orig_path = path = information['filepath']
- orig_ext = information['ext']
-
- if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
- self.to_screen('Skipping audio extraction since the file is already in a common audio format')
+ target_format, _skip_msg = resolve_mapping(information['ext'], self.mapping)
+ if target_format == 'best' and information['ext'] in self.COMMON_AUDIO_EXTS:
+ target_format, _skip_msg = None, 'the file is already in a common audio format'
+ if not target_format:
+ self.to_screen(f'Not converting audio {orig_path}; {_skip_msg}')
return [], information
filecodec = self.get_audio_codec(path)
if filecodec is None:
raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
- more_opts = []
- if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
- if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
- # Lossless, but in another container
- acodec = 'copy'
- extension = 'm4a'
- more_opts = ['-bsf:a', 'aac_adtstoasc']
- elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
- # Lossless if possible
- acodec = 'copy'
- extension = filecodec
- if filecodec == 'aac':
- more_opts = ['-f', 'adts']
- if filecodec == 'vorbis':
- extension = 'ogg'
- elif filecodec == 'alac':
- acodec = None
- extension = 'm4a'
- more_opts += ['-acodec', 'alac']
- else:
- # MP3 otherwise.
- acodec = 'libmp3lame'
- extension = 'mp3'
- more_opts = self._quality_args(acodec)
+ if filecodec == 'aac' and target_format in ('m4a', 'best'):
+ # Lossless, but in another container
+ extension, _, more_opts, acodec = *ACODECS['m4a'], 'copy'
+ elif target_format == 'best' or target_format == filecodec:
+ # Lossless if possible
+ try:
+ extension, _, more_opts, acodec = *ACODECS[filecodec], 'copy'
+ except KeyError:
+ extension, acodec, more_opts = ACODECS['mp3']
else:
# We convert the audio (lossy if codec is lossy)
- acodec = ACODECS[self._preferredcodec]
+ extension, acodec, more_opts = ACODECS[target_format]
if acodec == 'aac' and self._features.get('fdk'):
- acodec = 'libfdk_aac'
- extension = self._preferredcodec
+ acodec, more_opts = 'libfdk_aac', []
+
+ more_opts = list(more_opts)
+ if acodec != 'copy':
more_opts = self._quality_args(acodec)
- if self._preferredcodec == 'aac':
- more_opts += ['-f', 'adts']
- elif self._preferredcodec == 'm4a':
- more_opts += ['-bsf:a', 'aac_adtstoasc']
- elif self._preferredcodec == 'vorbis':
- extension = 'ogg'
- elif self._preferredcodec == 'wav':
- extension = 'wav'
- more_opts += ['-f', 'wav']
- elif self._preferredcodec == 'alac':
- extension = 'm4a'
- more_opts += ['-acodec', 'alac']
-
- prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
- temp_path = new_path = prefix + sep + extension
+
+ # not os.path.splitext, since the latter does not work on unicode in all setups
+ temp_path = new_path = f'{path.rpartition(".")[0]}.{extension}'
if new_path == path:
if acodec == 'copy':
- self.to_screen(f'File is already in target format {self._preferredcodec}, skipping')
+ self.to_screen(f'Not converting audio {orig_path}; file is already in target format {target_format}')
return [], information
orig_path = prepend_extension(path, 'orig')
temp_path = prepend_extension(path, 'temp')
@@ -503,14 +511,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
self.to_screen('Post-process file %s exists, skipping' % new_path)
return [], information
- try:
- self.to_screen(f'Destination: {new_path}')
- self.run_ffmpeg(path, temp_path, acodec, more_opts)
- except AudioConversionError as e:
- raise PostProcessingError(
- 'audio conversion failed: ' + e.msg)
- except Exception:
- raise PostProcessingError('error running ' + self.basename)
+ self.to_screen(f'Destination: {new_path}')
+ self.run_ffmpeg(path, temp_path, acodec, more_opts)
os.replace(path, orig_path)
os.replace(temp_path, new_path)
@@ -520,26 +522,19 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
# Try to update the date time for extracted audio file.
if information.get('filetime') is not None:
self.try_utime(
- new_path, time.time(), information['filetime'],
- errnote='Cannot update utime of audio file')
+ new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file')
return [orig_path], information
class FFmpegVideoConvertorPP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mka', 'ogg', *FFmpegExtractAudioPP.SUPPORTED_EXTS)
- FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS)))
+ FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
_ACTION = 'converting'
def __init__(self, downloader=None, preferedformat=None):
super().__init__(downloader)
- self._preferedformats = preferedformat.lower().split('/')
-
- def _target_ext(self, source_ext):
- for pair in self._preferedformats:
- kv = pair.split('>')
- if len(kv) == 1 or kv[0].strip() == source_ext:
- return kv[-1].strip()
+ self.mapping = preferedformat
@staticmethod
def _options(target_ext):
@@ -550,11 +545,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
@PostProcessor._restrict_to(images=False)
def run(self, info):
filename, source_ext = info['filepath'], info['ext'].lower()
- target_ext = self._target_ext(source_ext)
- _skip_msg = (
- f'could not find a mapping for {source_ext}' if not target_ext
- else f'already is in target format {source_ext}' if source_ext == target_ext
- else None)
+ target_ext, _skip_msg = resolve_mapping(source_ext, self.mapping)
if _skip_msg:
self.to_screen(f'Not {self._ACTION} media file "{filename}"; {_skip_msg}')
return [], info
@@ -762,7 +753,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
for key, value in info.items():
mobj = re.fullmatch(meta_regex, key)
if value is not None and mobj:
- metadata[mobj.group('i') or 'common'][mobj.group('key')] = value
+ metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '')
# Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags
yield ('-write_id3v1', '1')
@@ -1030,8 +1021,8 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
self.to_screen('Chapter %03d; Destination: %s' % (number, destination))
return (
destination,
- ['-ss', compat_str(chapter['start_time']),
- '-t', compat_str(chapter['end_time'] - chapter['start_time'])])
+ ['-ss', str(chapter['start_time']),
+ '-t', str(chapter['end_time'] - chapter['start_time'])])
@PostProcessor._restrict_to(images=False)
def run(self, info):
@@ -1054,23 +1045,22 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor):
class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
SUPPORTED_EXTS = ('jpg', 'png', 'webp')
+ FORMAT_RE = create_mapping_re(SUPPORTED_EXTS)
def __init__(self, downloader=None, format=None):
super().__init__(downloader)
- self.format = format
+ self.mapping = format
- @staticmethod
- def is_webp(path):
- with open(encodeFilename(path), 'rb') as f:
- b = f.read(12)
- return b[0:4] == b'RIFF' and b[8:] == b'WEBP'
+ @classmethod
+ def is_webp(cls, path):
+ write_string(f'DeprecationWarning: {cls.__module__}.{cls.__name__}.is_webp is deprecated')
+ return imghdr.what(path) == 'webp'
def fixup_webp(self, info, idx=-1):
thumbnail_filename = info['thumbnails'][idx]['filepath']
_, thumbnail_ext = os.path.splitext(thumbnail_filename)
if thumbnail_ext:
- thumbnail_ext = thumbnail_ext[1:].lower()
- if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename):
+ if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp':
self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename)
webp_filename = replace_extension(thumbnail_filename, 'webp')
os.replace(thumbnail_filename, webp_filename)
@@ -1103,18 +1093,17 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
continue
has_thumbnail = True
self.fixup_webp(info, idx)
- _, thumbnail_ext = os.path.splitext(original_thumbnail)
- if thumbnail_ext:
- thumbnail_ext = thumbnail_ext[1:].lower()
+ thumbnail_ext = os.path.splitext(original_thumbnail)[1][1:].lower()
if thumbnail_ext == 'jpeg':
thumbnail_ext = 'jpg'
- if thumbnail_ext == self.format:
- self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail)
+ target_ext, _skip_msg = resolve_mapping(thumbnail_ext, self.mapping)
+ if _skip_msg:
+ self.to_screen(f'Not converting thumbnail "{original_thumbnail}"; {_skip_msg}')
continue
- thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format)
+ thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, target_ext)
files_to_delete.append(original_thumbnail)
info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension(
- info['__files_to_move'][original_thumbnail], self.format)
+ info['__files_to_move'][original_thumbnail], target_ext)
if not has_thumbnail:
self.to_screen('There aren\'t any thumbnails to convert')
diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py
index 8a2ef9065..de3505e11 100644
--- a/yt_dlp/postprocessor/modify_chapters.py
+++ b/yt_dlp/postprocessor/modify_chapters.py
@@ -32,13 +32,13 @@ class ModifyChaptersPP(FFmpegPostProcessor):
real_duration = self._get_real_video_duration(info['filepath'])
if not chapters:
- chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}]
+ chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}]
info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters)
if not cuts:
return [], info
- if self._duration_mismatch(real_duration, info.get('duration')):
+ if self._duration_mismatch(real_duration, info.get('duration'), 1):
if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']):
self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut')
return [], info
diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py
index 1a9f5dc66..ff50d5b4f 100644
--- a/yt_dlp/postprocessor/sponskrub.py
+++ b/yt_dlp/postprocessor/sponskrub.py
@@ -84,17 +84,15 @@ class SponSkrubPP(PostProcessor):
cmd = [encodeArgument(i) for i in cmd]
self.write_debug('sponskrub command line: %s' % shell_quote(cmd))
- pipe = None if self.get_param('verbose') else subprocess.PIPE
- p = Popen(cmd, stdout=pipe)
- stdout = p.communicate_or_kill()[0]
+ stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE)
- if p.returncode == 0:
+ if not returncode:
os.replace(temp_filename, filename)
self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
- elif p.returncode == 3:
+ elif returncode == 3:
self.to_screen('No segments in the SponsorBlock database')
else:
- msg = stdout.decode('utf-8', 'replace').strip() if stdout else ''
- msg = msg.split('\n')[0 if msg.lower().startswith('unrecognised') else -1]
- raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s' % p.returncode)
+ raise PostProcessingError(
+ stdout.strip().splitlines()[0 if stdout.strip().lower().startswith('unrecognised') else -1]
+ or f'sponskrub failed with error code {returncode}')
return [], information
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
index 7f75561db..d79ed7ae7 100644
--- a/yt_dlp/postprocessor/sponsorblock.py
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -1,9 +1,9 @@
import hashlib
import json
import re
+import urllib.parse
from .ffmpeg import FFmpegPostProcessor
-from ..compat import compat_urllib_parse_urlencode
class SponsorBlockPP(FFmpegPostProcessor):
@@ -86,7 +86,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
def _get_sponsor_segments(self, video_id, service):
hash = hashlib.sha256(video_id.encode('ascii')).hexdigest()
# SponsorBlock API recommends using first 4 hash characters.
- url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({
+ url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({
'service': service,
'categories': json.dumps(self._categories),
'actionTypes': json.dumps(['skip', 'poi'])
diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py
index 34ba1394a..f93328f63 100644
--- a/yt_dlp/socks.py
+++ b/yt_dlp/socks.py
@@ -8,8 +8,9 @@
import collections
import socket
+import struct
-from .compat import compat_ord, compat_struct_pack, compat_struct_unpack
+from .compat import compat_ord
__author__ = 'Timo Schmid <coding@timoschmid.de>'
@@ -19,7 +20,7 @@ SOCKS4_REPLY_VERSION = 0x00
# if the client cannot resolve the destination host's domain name to find its
# IP address, it should set the first three bytes of DSTIP to NULL and the last
# byte to a non-zero value.
-SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF)
+SOCKS4_DEFAULT_DSTIP = struct.pack('!BBBB', 0, 0, 0, 0xFF)
SOCKS5_VERSION = 5
SOCKS5_USER_AUTH_VERSION = 0x01
@@ -122,11 +123,11 @@ class sockssocket(socket.socket):
def _recv_bytes(self, cnt):
data = self.recvall(cnt)
- return compat_struct_unpack(f'!{cnt}B', data)
+ return struct.unpack(f'!{cnt}B', data)
@staticmethod
def _len_and_data(data):
- return compat_struct_pack('!B', len(data)) + data
+ return struct.pack('!B', len(data)) + data
def _check_response_version(self, expected_version, got_version):
if got_version != expected_version:
@@ -147,7 +148,7 @@ class sockssocket(socket.socket):
ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a)
- packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
+ packet = struct.pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr
username = (self._proxy.username or '').encode()
packet += username + b'\x00'
@@ -157,7 +158,7 @@ class sockssocket(socket.socket):
self.sendall(packet)
- version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8))
+ version, resp_code, dstport, dsthost = struct.unpack('!BBHI', self.recvall(8))
self._check_response_version(SOCKS4_REPLY_VERSION, version)
@@ -171,14 +172,14 @@ class sockssocket(socket.socket):
self._setup_socks4(address, is_4a=True)
def _socks5_auth(self):
- packet = compat_struct_pack('!B', SOCKS5_VERSION)
+ packet = struct.pack('!B', SOCKS5_VERSION)
auth_methods = [Socks5Auth.AUTH_NONE]
if self._proxy.username and self._proxy.password:
auth_methods.append(Socks5Auth.AUTH_USER_PASS)
- packet += compat_struct_pack('!B', len(auth_methods))
- packet += compat_struct_pack(f'!{len(auth_methods)}B', *auth_methods)
+ packet += struct.pack('!B', len(auth_methods))
+ packet += struct.pack(f'!{len(auth_methods)}B', *auth_methods)
self.sendall(packet)
@@ -194,7 +195,7 @@ class sockssocket(socket.socket):
if method == Socks5Auth.AUTH_USER_PASS:
username = self._proxy.username.encode()
password = self._proxy.password.encode()
- packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION)
+ packet = struct.pack('!B', SOCKS5_USER_AUTH_VERSION)
packet += self._len_and_data(username) + self._len_and_data(password)
self.sendall(packet)
@@ -214,14 +215,14 @@ class sockssocket(socket.socket):
self._socks5_auth()
reserved = 0
- packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
+ packet = struct.pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved)
if ipaddr is None:
destaddr = destaddr.encode()
- packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
+ packet += struct.pack('!B', Socks5AddressType.ATYP_DOMAINNAME)
packet += self._len_and_data(destaddr)
else:
- packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
- packet += compat_struct_pack('!H', port)
+ packet += struct.pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr
+ packet += struct.pack('!H', port)
self.sendall(packet)
@@ -240,7 +241,7 @@ class sockssocket(socket.socket):
destaddr = self.recvall(alen)
elif atype == Socks5AddressType.ATYP_IPV6:
destaddr = self.recvall(16)
- destport = compat_struct_unpack('!H', self.recvall(2))[0]
+ destport = struct.unpack('!H', self.recvall(2))[0]
return (destaddr, destport)
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 8c8ea384b..fba64be5a 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
import atexit
import base64
import binascii
@@ -11,10 +10,13 @@ import datetime
import email.header
import email.utils
import errno
-import functools
import gzip
import hashlib
import hmac
+import html.entities
+import html.parser
+import http.client
+import http.cookiejar
import importlib.util
import io
import itertools
@@ -30,41 +32,28 @@ import re
import shlex
import socket
import ssl
+import struct
import subprocess
import sys
import tempfile
import time
import traceback
+import types
+import urllib.error
import urllib.parse
+import urllib.request
import xml.etree.ElementTree
import zlib
+from .compat import asyncio, functools # isort: split
from .compat import (
- asyncio,
- compat_chr,
- compat_cookiejar,
compat_etree_fromstring,
compat_expanduser,
- compat_html_entities,
- compat_html_entities_html5,
compat_HTMLParseError,
- compat_HTMLParser,
- compat_http_client,
- compat_HTTPError,
compat_os_name,
- compat_parse_qs,
compat_shlex_quote,
- compat_str,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urllib_request,
- compat_urlparse,
)
-from .dependencies import brotli, certifi, websockets
+from .dependencies import brotli, certifi, websockets, xattr
from .socks import ProxyType, sockssocket
@@ -73,8 +62,8 @@ def register_socks_protocols():
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
- if scheme not in compat_urlparse.uses_netloc:
- compat_urlparse.uses_netloc.append(scheme)
+ if scheme not in urllib.parse.uses_netloc:
+ urllib.parse.uses_netloc.append(scheme)
# This is not clearly defined otherwise
@@ -146,6 +135,7 @@ USER_AGENTS = {
NO_DEFAULT = object()
+IDENTITY = lambda x: x
ENGLISH_MONTH_NAMES = [
'January', 'February', 'March', 'April', 'May', 'June',
@@ -248,6 +238,7 @@ JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<jso
NUMBER_RE = r'\d+(?:\.\d+)?'
+@functools.cache
def preferredencoding():
"""Get preferred encoding.
@@ -315,7 +306,7 @@ def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
def _find_xpath(xpath):
return node.find(xpath)
- if isinstance(xpath, (str, compat_str)):
+ if isinstance(xpath, str):
n = _find_xpath(xpath)
else:
for xp in xpath:
@@ -362,14 +353,14 @@ def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
return n.attrib[key]
-def get_element_by_id(id, html):
+def get_element_by_id(id, html, **kwargs):
"""Return the content of the tag with the specified ID in the passed HTML document"""
- return get_element_by_attribute('id', id, html)
+ return get_element_by_attribute('id', id, html, **kwargs)
-def get_element_html_by_id(id, html):
+def get_element_html_by_id(id, html, **kwargs):
"""Return the html of the tag with the specified ID in the passed HTML document"""
- return get_element_html_by_attribute('id', id, html)
+ return get_element_html_by_attribute('id', id, html, **kwargs)
def get_element_by_class(class_name, html):
@@ -384,27 +375,27 @@ def get_element_html_by_class(class_name, html):
return retval[0] if retval else None
-def get_element_by_attribute(attribute, value, html, escape_value=True):
- retval = get_elements_by_attribute(attribute, value, html, escape_value)
+def get_element_by_attribute(attribute, value, html, **kwargs):
+ retval = get_elements_by_attribute(attribute, value, html, **kwargs)
return retval[0] if retval else None
-def get_element_html_by_attribute(attribute, value, html, escape_value=True):
- retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
+def get_element_html_by_attribute(attribute, value, html, **kargs):
+ retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
return retval[0] if retval else None
-def get_elements_by_class(class_name, html):
+def get_elements_by_class(class_name, html, **kargs):
"""Return the content of all tags with the specified class in the passed HTML document as a list"""
return get_elements_by_attribute(
- 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
html, escape_value=False)
def get_elements_html_by_class(class_name, html):
"""Return the html of all tags with the specified class in the passed HTML document as a list"""
return get_elements_html_by_attribute(
- 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name),
html, escape_value=False)
@@ -443,7 +434,7 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value
)
-class HTMLBreakOnClosingTagParser(compat_HTMLParser):
+class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
"""
HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
closing tag for the first opening tag it has encountered, and can be used
@@ -455,7 +446,7 @@ class HTMLBreakOnClosingTagParser(compat_HTMLParser):
def __init__(self):
self.tagstack = collections.deque()
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def __enter__(self):
return self
@@ -520,22 +511,22 @@ def get_element_text_and_html_by_tag(tag, html):
raise compat_HTMLParseError('unexpected end of html')
-class HTMLAttributeParser(compat_HTMLParser):
+class HTMLAttributeParser(html.parser.HTMLParser):
"""Trivial HTML parser to gather the attributes for a single element"""
def __init__(self):
self.attrs = {}
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
self.attrs = dict(attrs)
-class HTMLListAttrsParser(compat_HTMLParser):
+class HTMLListAttrsParser(html.parser.HTMLParser):
"""HTML parser to gather the attributes for the elements of a list"""
def __init__(self):
- compat_HTMLParser.__init__(self)
+ html.parser.HTMLParser.__init__(self)
self.items = []
self._level = 0
@@ -594,6 +585,19 @@ def clean_html(html):
return html.strip()
+class LenientJSONDecoder(json.JSONDecoder):
+ def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs):
+ self.transform_source, self.ignore_extra = transform_source, ignore_extra
+ super().__init__(*args, **kwargs)
+
+ def decode(self, s):
+ if self.transform_source:
+ s = self.transform_source(s)
+ if self.ignore_extra:
+ return self.raw_decode(s.lstrip())[0]
+ return super().decode(s)
+
+
def sanitize_open(filename, open_mode):
"""Try to open the given filename, and slightly tweak it if this fails.
@@ -619,9 +623,9 @@ def sanitize_open(filename, open_mode):
# Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
raise LockingUnsupportedError()
stream = locked_file(filename, open_mode, block=False).__enter__()
- except LockingUnsupportedError:
+ except OSError:
stream = open(filename, open_mode)
- return (stream, filename)
+ return stream, filename
except OSError as err:
if attempt or err.errno in (errno.EACCES,):
raise
@@ -714,7 +718,9 @@ def sanitize_path(s, force=False):
def sanitize_url(url):
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
- if url.startswith('//'):
+ if url is None:
+ return
+ elif url.startswith('//'):
return 'http:%s' % url
# Fix some common typos seen so far
COMMON_TYPOS = (
@@ -730,10 +736,10 @@ def sanitize_url(url):
def extract_basic_auth(url):
- parts = compat_urlparse.urlsplit(url)
+ parts = urllib.parse.urlsplit(url)
if parts.username is None:
return url, None
- url = compat_urlparse.urlunsplit(parts._replace(netloc=(
+ url = urllib.parse.urlunsplit(parts._replace(netloc=(
parts.hostname if parts.port is None
else '%s:%d' % (parts.hostname, parts.port))))
auth_payload = base64.b64encode(
@@ -746,7 +752,7 @@ def sanitized_Request(url, *args, **kwargs):
if auth_header is not None:
headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
headers['Authorization'] = auth_header
- return compat_urllib_request.Request(url, *args, **kwargs)
+ return urllib.request.Request(url, *args, **kwargs)
def expand_path(s):
@@ -754,13 +760,16 @@ def expand_path(s):
return os.path.expandvars(compat_expanduser(s))
-def orderedSet(iterable):
- """ Remove all duplicates from the input iterable """
- res = []
- for el in iterable:
- if el not in res:
- res.append(el)
- return res
+def orderedSet(iterable, *, lazy=False):
+ """Remove all duplicates from the input iterable"""
+ def _iter():
+ seen = [] # Do not use set since the items can be unhashable
+ for x in iterable:
+ if x not in seen:
+ seen.append(x)
+ yield x
+
+ return _iter() if lazy else list(_iter())
def _htmlentity_transform(entity_with_semicolon):
@@ -768,13 +777,13 @@ def _htmlentity_transform(entity_with_semicolon):
entity = entity_with_semicolon[:-1]
# Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
+ if entity in html.entities.name2codepoint:
+ return chr(html.entities.name2codepoint[entity])
# TODO: HTML5 allows entities without a semicolon. For example,
# '&Eacuteric' should be decoded as 'Éric'.
- if entity_with_semicolon in compat_html_entities_html5:
- return compat_html_entities_html5[entity_with_semicolon]
+ if entity_with_semicolon in html.entities.html5:
+ return html.entities.html5[entity_with_semicolon]
mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
if mobj is not None:
@@ -786,7 +795,7 @@ def _htmlentity_transform(entity_with_semicolon):
base = 10
# See https://github.com/ytdl-org/youtube-dl/issues/7518
with contextlib.suppress(ValueError):
- return compat_chr(int(numstr, base))
+ return chr(int(numstr, base))
# Unknown entity in name, return its literal representation
return '&%s;' % entity
@@ -813,12 +822,9 @@ def escapeHTML(text):
def process_communicate_or_kill(p, *args, **kwargs):
- try:
- return p.communicate(*args, **kwargs)
- except BaseException: # Including KeyboardInterrupt
- p.kill()
- p.wait()
- raise
+ write_string('DeprecationWarning: yt_dlp.utils.process_communicate_or_kill is deprecated '
+ 'and may be removed in a future version. Use yt_dlp.utils.Popen.communicate_or_kill instead')
+ return Popen.communicate_or_kill(p, *args, **kwargs)
class Popen(subprocess.Popen):
@@ -828,11 +834,30 @@ class Popen(subprocess.Popen):
else:
_startupinfo = None
- def __init__(self, *args, **kwargs):
+ def __init__(self, *args, text=False, **kwargs):
+ if text is True:
+ kwargs['universal_newlines'] = True # For 3.6 compatibility
+ kwargs.setdefault('encoding', 'utf-8')
+ kwargs.setdefault('errors', 'replace')
super().__init__(*args, **kwargs, startupinfo=self._startupinfo)
def communicate_or_kill(self, *args, **kwargs):
- return process_communicate_or_kill(self, *args, **kwargs)
+ try:
+ return self.communicate(*args, **kwargs)
+ except BaseException: # Including KeyboardInterrupt
+ self.kill(timeout=None)
+ raise
+
+ def kill(self, *, timeout=0):
+ super().kill()
+ if timeout != 0:
+ self.wait(timeout=timeout)
+
+ @classmethod
+ def run(cls, *args, **kwargs):
+ with cls(*args, **kwargs) as proc:
+ stdout, stderr = proc.communicate_or_kill()
+ return stdout or '', stderr or '', proc.returncode
def get_subprocess_encoding():
@@ -859,7 +884,7 @@ def decodeFilename(b, for_subprocess=False):
def encodeArgument(s):
# Legacy code that uses byte strings
# Uncomment the following line after fixing all post processors
- # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+ # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
return s if isinstance(s, str) else s.decode('ascii')
@@ -873,7 +898,7 @@ def decodeOption(optval):
if isinstance(optval, bytes):
optval = optval.decode(preferredencoding())
- assert isinstance(optval, compat_str)
+ assert isinstance(optval, str)
return optval
@@ -919,22 +944,23 @@ def make_HTTPS_handler(params, **kwargs):
context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT
# Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
context.set_ciphers('DEFAULT')
+
context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
if opts_check_certificate:
if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
context.load_verify_locations(cafile=certifi.where())
- else:
- try:
- context.load_default_certs()
- # Work around the issue in load_default_certs when there are bad certificates. See:
- # https://github.com/yt-dlp/yt-dlp/issues/1060,
- # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
- except ssl.SSLError:
- # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
- if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
- for storename in ('CA', 'ROOT'):
- _ssl_load_windows_store_certs(context, storename)
- context.set_default_verify_paths()
+ try:
+ context.load_default_certs()
+ # Work around the issue in load_default_certs when there are bad certificates. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/1060,
+ # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+ except ssl.SSLError:
+ # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
+ if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+ for storename in ('CA', 'ROOT'):
+ _ssl_load_windows_store_certs(context, storename)
+ context.set_default_verify_paths()
+
client_certfile = params.get('client_certificate')
if client_certfile:
try:
@@ -943,6 +969,13 @@ def make_HTTPS_handler(params, **kwargs):
password=params.get('client_certificate_password'))
except ssl.SSLError:
raise YoutubeDLError('Unable to load client certificate')
+
+ # Some servers may reject requests if ALPN extension is not sent. See:
+ # https://github.com/python/cpython/issues/85140
+ # https://github.com/yt-dlp/yt-dlp/issues/3878
+ with contextlib.suppress(NotImplementedError):
+ context.set_alpn_protocols(['http/1.1'])
+
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
@@ -970,7 +1003,7 @@ class YoutubeDLError(Exception):
super().__init__(self.msg)
-network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
if hasattr(ssl, 'CertificateError'):
network_exceptions.append(ssl.CertificateError)
network_exceptions = tuple(network_exceptions)
@@ -993,12 +1026,14 @@ class ExtractorError(YoutubeDLError):
self.video_id = video_id
self.ie = ie
self.exc_info = sys.exc_info() # preserve original exception
+ if isinstance(self.exc_info[1], ExtractorError):
+ self.exc_info = self.exc_info[1].exc_info
super().__init__(''.join((
- format_field(ie, template='[%s] '),
- format_field(video_id, template='%s: '),
+ format_field(ie, None, '[%s] '),
+ format_field(video_id, None, '%s: '),
msg,
- format_field(cause, template=' (caused by %r)'),
+ format_field(cause, None, ' (caused by %r)'),
'' if expected else bug_reports_message())))
def format_traceback(self):
@@ -1220,7 +1255,7 @@ def handle_youtubedl_headers(headers):
return filtered_headers
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+class YoutubeDLHandler(urllib.request.HTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
@@ -1239,11 +1274,11 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
"""
def __init__(self, params, *args, **kwargs):
- compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
self._params = params
def http_open(self, req):
- conn_class = compat_http_client.HTTPConnection
+ conn_class = http.client.HTTPConnection
socks_proxy = req.headers.get('Ytdl-socks-proxy')
if socks_proxy:
@@ -1296,7 +1331,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
req.headers = handle_youtubedl_headers(req.headers)
- return req
+ return super().do_request_(req)
def http_response(self, req, resp):
old_resp = resp
@@ -1318,18 +1353,18 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
break
else:
raise original_ioerror
- resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = io.BytesIO(self.deflate(resp.read()))
- resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# brotli
if resp.headers.get('Content-encoding', '') == 'br':
- resp = compat_urllib_request.addinfourl(
+ resp = urllib.request.addinfourl(
io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
@@ -1352,9 +1387,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
def make_socks_conn_class(base_class, socks_proxy):
assert issubclass(base_class, (
- compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+ http.client.HTTPConnection, http.client.HTTPSConnection))
- url_components = compat_urlparse.urlparse(socks_proxy)
+ url_components = urllib.parse.urlparse(socks_proxy)
if url_components.scheme.lower() == 'socks5':
socks_type = ProxyType.SOCKS5
elif url_components.scheme.lower() in ('socks', 'socks4'):
@@ -1365,7 +1400,7 @@ def make_socks_conn_class(base_class, socks_proxy):
def unquote_if_non_empty(s):
if not s:
return s
- return compat_urllib_parse_unquote_plus(s)
+ return urllib.parse.unquote_plus(s)
proxy_args = (
socks_type,
@@ -1383,7 +1418,7 @@ def make_socks_conn_class(base_class, socks_proxy):
self.sock.settimeout(self.timeout)
self.sock.connect((self.host, self.port))
- if isinstance(self, compat_http_client.HTTPSConnection):
+ if isinstance(self, http.client.HTTPSConnection):
if hasattr(self, '_context'): # Python > 2.6
self.sock = self._context.wrap_socket(
self.sock, server_hostname=self.host)
@@ -1393,10 +1428,10 @@ def make_socks_conn_class(base_class, socks_proxy):
return SocksConnection
-class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
def __init__(self, params, https_conn_class=None, *args, **kwargs):
- compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or http.client.HTTPSConnection
self._params = params
def https_open(self, req):
@@ -1423,7 +1458,7 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
raise
-class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
"""
See [1] for cookie file format.
@@ -1494,7 +1529,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
# Store session cookies with `expires` set to 0 instead of an empty string
for cookie in self:
@@ -1511,7 +1546,7 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
if self.filename is not None:
filename = self.filename
else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
def prepare_line(line):
if line.startswith(self._HTTPONLY_PREFIX):
@@ -1521,10 +1556,10 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
return line
cookie_list = line.split('\t')
if len(cookie_list) != self._ENTRY_LEN:
- raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
cookie = self._CookieFileEntry(*cookie_list)
if cookie.expires_at and not cookie.expires_at.isdigit():
- raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
return line
cf = io.StringIO()
@@ -1532,9 +1567,9 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
for line in f:
try:
cf.write(prepare_line(line))
- except compat_cookiejar.LoadError as e:
+ except http.cookiejar.LoadError as e:
if f'{line.strip()} '[0] in '[{"':
- raise compat_cookiejar.LoadError(
+ raise http.cookiejar.LoadError(
'Cookies file must be Netscape formatted, not JSON. See '
'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl')
write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
@@ -1557,18 +1592,18 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
cookie.discard = True
-class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
- compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+ urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
def http_response(self, request, response):
- return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+ return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
- https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_request = urllib.request.HTTPCookieProcessor.http_request
https_response = http_response
-class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
"""YoutubeDL redirect handler
The code is based on HTTPRedirectHandler implementation from CPython [1].
@@ -1583,7 +1618,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3. https://github.com/ytdl-org/youtube-dl/issues/28768
"""
- http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+ http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
def redirect_request(self, req, fp, code, msg, headers, newurl):
"""Return a Request or None in response to a redirect.
@@ -1598,7 +1633,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
m = req.get_method()
if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST")):
- raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+ raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
# Strictly (according to RFC 2616), 301 or 302 in response to
# a POST MUST NOT cause a redirection without confirmation
# from the user (of urllib.request, in this case). In practice,
@@ -1625,7 +1660,7 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
if code in (301, 302) and m == 'POST':
m = 'GET'
- return compat_urllib_request.Request(
+ return urllib.request.Request(
newurl, headers=newheaders, origin_req_host=req.origin_req_host,
unverifiable=True, method=m)
@@ -1698,7 +1733,7 @@ def unified_strdate(date_str, day_first=True):
with contextlib.suppress(ValueError):
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
if upload_date is not None:
- return compat_str(upload_date)
+ return str(upload_date)
def unified_timestamp(date_str, day_first=True):
@@ -1872,21 +1907,22 @@ class DateRange:
def platform_name():
- """ Returns the platform name as a compat_str """
+ """ Returns the platform name as a str """
res = platform.platform()
if isinstance(res, bytes):
res = res.decode(preferredencoding())
- assert isinstance(res, compat_str)
+ assert isinstance(res, str)
return res
+@functools.cache
def get_windows_version():
- ''' Get Windows version. None if it's not running on Windows '''
+ ''' Get Windows version. returns () if it's not running on Windows '''
if compat_os_name == 'nt':
return version_tuple(platform.win32_ver()[1])
else:
- return None
+ return ()
def write_string(s, out=None, encoding=None):
@@ -1896,15 +1932,14 @@ def write_string(s, out=None, encoding=None):
if compat_os_name == 'nt' and supports_terminal_sequences(out):
s = re.sub(r'([\r\n]+)', r' \1', s)
+ enc, buffer = None, out
if 'b' in getattr(out, 'mode', ''):
- byt = s.encode(encoding or preferredencoding(), 'ignore')
- out.write(byt)
+ enc = encoding or preferredencoding()
elif hasattr(out, 'buffer'):
+ buffer = out.buffer
enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
- byt = s.encode(enc, 'ignore')
- out.buffer.write(byt)
- else:
- out.write(s)
+
+ buffer.write(s.encode(enc, 'ignore') if enc else s)
out.flush()
@@ -1920,11 +1955,11 @@ def bytes_to_intlist(bs):
def intlist_to_bytes(xs):
if not xs:
return b''
- return compat_struct_pack('%dB' % len(xs), *xs)
+ return struct.pack('%dB' % len(xs), *xs)
-class LockingUnsupportedError(IOError):
- msg = 'File locking is not supported on this platform'
+class LockingUnsupportedError(OSError):
+ msg = 'File locking is not supported'
def __init__(self):
super().__init__(self.msg)
@@ -1977,7 +2012,8 @@ if sys.platform == 'win32':
if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
(0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
0, whole_low, whole_high, f._lock_file_overlapped_p):
- raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
+ # NB: No argument form of "ctypes.FormatError" does not work on PyPy
+ raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
def _unlock_file(f):
assert f._lock_file_overlapped_p
@@ -2049,8 +2085,11 @@ class locked_file:
try:
self.f.truncate()
except OSError as e:
- if e.errno != 29: # Illegal seek, expected when self.f is a FIFO
- raise e
+ if e.errno not in (
+ errno.ESPIPE, # Illegal seek - expected for FIFO
+ errno.EINVAL, # Invalid argument - expected for /dev/null
+ ):
+ raise
return self
def unlock(self):
@@ -2077,6 +2116,7 @@ class locked_file:
return iter(self.f)
+@functools.cache
def get_filesystem_encoding():
encoding = sys.getfilesystemencoding()
return encoding if encoding is not None else 'utf-8'
@@ -2098,7 +2138,7 @@ def smuggle_url(url, data):
url, idata = unsmuggle_url(url, {})
data.update(idata)
- sdata = compat_urllib_parse_urlencode(
+ sdata = urllib.parse.urlencode(
{'__youtubedl_smuggle': json.dumps(data)})
return url + '#' + sdata
@@ -2107,7 +2147,7 @@ def unsmuggle_url(smug_url, default=None):
if '#__youtubedl_smuggle' not in smug_url:
return smug_url, default
url, _, sdata = smug_url.rpartition('#')
- jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
data = json.loads(jsond)
return url, data
@@ -2267,7 +2307,7 @@ def parse_resolution(s, *, lenient=False):
def parse_bitrate(s):
- if not isinstance(s, compat_str):
+ if not isinstance(s, str):
return
mobj = re.search(r'\b(\d+)\s*kbps', s)
if mobj:
@@ -2304,7 +2344,7 @@ def fix_xml_ampersands(xml_str):
def setproctitle(title):
- assert isinstance(title, compat_str)
+ assert isinstance(title, str)
# ctypes in Jython is not complete
# http://bugs.jython.org/issue2148
@@ -2352,7 +2392,7 @@ def get_domain(url):
def url_basename(url):
- path = compat_urlparse.urlparse(url).path
+ path = urllib.parse.urlparse(url).path
return path.strip('/').split('/')[-1]
@@ -2363,24 +2403,24 @@ def base_url(url):
def urljoin(base, path):
if isinstance(path, bytes):
path = path.decode()
- if not isinstance(path, compat_str) or not path:
+ if not isinstance(path, str) or not path:
return None
if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode()
- if not isinstance(base, compat_str) or not re.match(
+ if not isinstance(base, str) or not re.match(
r'^(?:https?:)?//', base):
return None
- return compat_urlparse.urljoin(base, path)
+ return urllib.parse.urljoin(base, path)
-class HEADRequest(compat_urllib_request.Request):
+class HEADRequest(urllib.request.Request):
def get_method(self):
return 'HEAD'
-class PUTRequest(compat_urllib_request.Request):
+class PUTRequest(urllib.request.Request):
def get_method(self):
return 'PUT'
@@ -2395,14 +2435,14 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
def str_or_none(v, default=None):
- return default if v is None else compat_str(v)
+ return default if v is None else str(v)
def str_to_int(int_str):
""" A more relaxed version of int_or_none """
if isinstance(int_str, int):
return int_str
- elif isinstance(int_str, compat_str):
+ elif isinstance(int_str, str):
int_str = re.sub(r'[,\.\+]', '', int_str)
return int_or_none(int_str)
@@ -2421,18 +2461,18 @@ def bool_or_none(v, default=None):
def strip_or_none(v, default=None):
- return v.strip() if isinstance(v, compat_str) else default
+ return v.strip() if isinstance(v, str) else default
def url_or_none(url):
- if not url or not isinstance(url, compat_str):
+ if not url or not isinstance(url, str):
return None
url = url.strip()
return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def request_to_url(req):
- if isinstance(req, compat_urllib_request.Request):
+ if isinstance(req, urllib.request.Request):
return req.get_full_url()
else:
return req
@@ -2443,7 +2483,7 @@ def strftime_or_none(timestamp, date_format, default=None):
try:
if isinstance(timestamp, (int, float)): # unix timestamp
datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
- elif isinstance(timestamp, compat_str): # assume YYYYMMDD
+ elif isinstance(timestamp, str): # assume YYYYMMDD
datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
return datetime_object.strftime(date_format)
except (ValueError, TypeError, AttributeError):
@@ -2525,7 +2565,7 @@ def check_executable(exe, args=[]):
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output (like -version) """
try:
- Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
+ Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
return False
return exe
@@ -2538,18 +2578,15 @@ def _get_exe_version_output(exe, args, *, to_screen=None):
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if yt-dlp is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
- out, _ = Popen(
- [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
+ stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
except OSError:
return False
- if isinstance(out, bytes): # Python 2.x
- out = out.decode('ascii', 'ignore')
- return out
+ return stdout
def detect_exe_version(output, version_re=None, unrecognized='present'):
- assert isinstance(output, compat_str)
+ assert isinstance(output, str)
if version_re is None:
version_re = r'version\s+([-0-9._a-zA-Z]+)'
m = re.search(version_re, output)
@@ -2567,6 +2604,16 @@ def get_exe_version(exe, args=['--version'],
return detect_exe_version(out, version_re, unrecognized) if out else False
+def frange(start=0, stop=None, step=1):
+ """Float range"""
+ if stop is None:
+ start, stop = 0, start
+ sign = [-1, 1][step > 0] if step else 0
+ while sign * start < sign * stop:
+ yield start
+ start += step
+
+
class LazyList(collections.abc.Sequence):
"""Lazy immutable list from an iterable
Note that slices of a LazyList are lists and not LazyList"""
@@ -2763,6 +2810,140 @@ class InAdvancePagedList(PagedList):
yield from page_results
+class PlaylistEntries:
+ MissingEntry = object()
+ is_exhausted = False
+
+ def __init__(self, ydl, info_dict):
+ self.ydl = ydl
+
+ # _entries must be assigned now since infodict can change during iteration
+ entries = info_dict.get('entries')
+ if entries is None:
+ raise EntryNotInPlaylist('There are no entries')
+ elif isinstance(entries, list):
+ self.is_exhausted = True
+
+ requested_entries = info_dict.get('requested_entries')
+ self.is_incomplete = bool(requested_entries)
+ if self.is_incomplete:
+ assert self.is_exhausted
+ self._entries = [self.MissingEntry] * max(requested_entries)
+ for i, entry in zip(requested_entries, entries):
+ self._entries[i - 1] = entry
+ elif isinstance(entries, (list, PagedList, LazyList)):
+ self._entries = entries
+ else:
+ self._entries = LazyList(entries)
+
+ PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
+ (?P<start>[+-]?\d+)?
+ (?P<range>[:-]
+ (?P<end>[+-]?\d+|inf(?:inite)?)?
+ (?::(?P<step>[+-]?\d+))?
+ )?''')
+
+ @classmethod
+ def parse_playlist_items(cls, string):
+ for segment in string.split(','):
+ if not segment:
+ raise ValueError('There is two or more consecutive commas')
+ mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
+ if not mobj:
+ raise ValueError(f'{segment!r} is not a valid specification')
+ start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
+ if int_or_none(step) == 0:
+ raise ValueError(f'Step in {segment!r} cannot be zero')
+ yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
+
+ def get_requested_items(self):
+ playlist_items = self.ydl.params.get('playlist_items')
+ playlist_start = self.ydl.params.get('playliststart', 1)
+ playlist_end = self.ydl.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlist_end in (-1, None):
+ playlist_end = ''
+ if not playlist_items:
+ playlist_items = f'{playlist_start}:{playlist_end}'
+ elif playlist_start != 1 or playlist_end:
+ self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
+
+ for index in self.parse_playlist_items(playlist_items):
+ for i, entry in self[index]:
+ yield i, entry
+ if not entry:
+ continue
+ try:
+ # TODO: Add auto-generated fields
+ self.ydl._match_entry(entry, incomplete=True, silent=True)
+ except (ExistingVideoReached, RejectedVideoReached):
+ return
+
+ def get_full_count(self):
+ if self.is_exhausted and not self.is_incomplete:
+ return len(self)
+ elif isinstance(self._entries, InAdvancePagedList):
+ if self._entries._pagesize == 1:
+ return self._entries._pagecount
+
+ @functools.cached_property
+ def _getter(self):
+ if isinstance(self._entries, list):
+ def get_entry(i):
+ try:
+ entry = self._entries[i]
+ except IndexError:
+ entry = self.MissingEntry
+ if not self.is_incomplete:
+ raise self.IndexError()
+ if entry is self.MissingEntry:
+ raise EntryNotInPlaylist(f'Entry {i} cannot be found')
+ return entry
+ else:
+ def get_entry(i):
+ try:
+ return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
+ except (LazyList.IndexError, PagedList.IndexError):
+ raise self.IndexError()
+ return get_entry
+
+ def __getitem__(self, idx):
+ if isinstance(idx, int):
+ idx = slice(idx, idx)
+
+ # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
+ step = 1 if idx.step is None else idx.step
+ if idx.start is None:
+ start = 0 if step > 0 else len(self) - 1
+ else:
+ start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
+
+ # NB: Do not call len(self) when idx == [:]
+ if idx.stop is None:
+ stop = 0 if step < 0 else float('inf')
+ else:
+ stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
+ stop += [-1, 1][step > 0]
+
+ for i in frange(start, stop, step):
+ if i < 0:
+ continue
+ try:
+ entry = self._getter(i)
+ except self.IndexError:
+ self.is_exhausted = True
+ if step > 0:
+ break
+ continue
+ yield i + 1, entry
+
+ def __len__(self):
+ return len(tuple(self[:]))
+
+ class IndexError(IndexError):
+ pass
+
+
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(
@@ -2786,7 +2967,7 @@ def escape_rfc3986(s):
def escape_url(url):
"""Escape URL as suggested by RFC 3986"""
- url_parsed = compat_urllib_parse_urlparse(url)
+ url_parsed = urllib.parse.urlparse(url)
return url_parsed._replace(
netloc=url_parsed.netloc.encode('idna').decode('ascii'),
path=escape_rfc3986(url_parsed.path),
@@ -2797,12 +2978,12 @@ def escape_url(url):
def parse_qs(url):
- return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ return urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
def read_batch_urls(batch_fd):
def fixup(url):
- if not isinstance(url, compat_str):
+ if not isinstance(url, str):
url = url.decode('utf-8', 'replace')
BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
for bom in BOM_UTF8:
@@ -2820,22 +3001,22 @@ def read_batch_urls(batch_fd):
def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+ return urllib.parse.urlencode(*args, **kargs).encode('ascii')
def update_url_query(url, query):
if not query:
return url
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_parse_qs(parsed_url.query)
+ parsed_url = urllib.parse.urlparse(url)
+ qs = urllib.parse.parse_qs(parsed_url.query)
qs.update(query)
- return compat_urlparse.urlunparse(parsed_url._replace(
- query=compat_urllib_parse_urlencode(qs, True)))
+ return urllib.parse.urlunparse(parsed_url._replace(
+ query=urllib.parse.urlencode(qs, True)))
-def update_Request(req, url=None, data=None, headers={}, query={}):
+def update_Request(req, url=None, data=None, headers=None, query=None):
req_headers = req.headers.copy()
- req_headers.update(headers)
+ req_headers.update(headers or {})
req_data = data or req.data
req_url = update_url_query(url or req.get_full_url(), query)
req_get_method = req.get_method()
@@ -2844,7 +3025,7 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
elif req_get_method == 'PUT':
req_type = PUTRequest
else:
- req_type = compat_urllib_request.Request
+ req_type = urllib.request.Request
new_req = req_type(
req_url, data=req_data, headers=req_headers,
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
@@ -2859,9 +3040,9 @@ def _multipart_encode_impl(data, boundary):
out = b''
for k, v in data.items():
out += b'--' + boundary.encode('ascii') + b'\r\n'
- if isinstance(k, compat_str):
+ if isinstance(k, str):
k = k.encode()
- if isinstance(v, compat_str):
+ if isinstance(v, str):
v = v.encode()
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
@@ -2942,7 +3123,7 @@ def merge_dicts(*dicts):
def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
- return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+ return string if isinstance(string, str) else str(string, encoding, errors)
US_RATINGS = {
@@ -2966,7 +3147,7 @@ TV_PARENTAL_GUIDELINES = {
def parse_age_limit(s):
# isinstance(False, int) is True. So type() must be used instead
- if type(s) is int:
+ if type(s) is int: # noqa: E721
return s if 0 <= s <= 21 else None
elif not isinstance(s, str):
return None
@@ -3029,7 +3210,11 @@ def js_to_json(code, vars={}):
return '"%s"' % v
+ def create_map(mobj):
+ return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
+
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+ code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
@@ -3052,7 +3237,7 @@ def qualities(quality_ids):
return q
-POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist')
+POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
DEFAULT_OUTTMPL = {
@@ -3290,14 +3475,13 @@ def is_html(first_bytes):
(b'\xff\xfe', 'utf-16-le'),
(b'\xfe\xff', 'utf-16-be'),
]
+
+ encoding = 'utf-8'
for bom, enc in BOMS:
- if first_bytes.startswith(bom):
- s = first_bytes[len(bom):].decode(enc, 'replace')
- break
- else:
- s = first_bytes.decode('utf-8', 'replace')
+ while first_bytes.startswith(bom):
+ encoding, first_bytes = enc, first_bytes[len(bom):]
- return re.match(r'^\s*<', s)
+ return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
def determine_protocol(info_dict):
@@ -3319,7 +3503,7 @@ def determine_protocol(info_dict):
elif ext == 'f4m':
return 'f4m'
- return compat_urllib_parse_urlparse(url).scheme
+ return urllib.parse.urlparse(url).scheme
def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
@@ -3376,16 +3560,15 @@ def _match_one(filter_part, dct, incomplete):
else:
is_incomplete = lambda k: k in incomplete
- operator_rex = re.compile(r'''(?x)\s*
+ operator_rex = re.compile(r'''(?x)
(?P<key>[a-z_]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
(?P<strval>.+?)
)
- \s*$
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
+ m = operator_rex.fullmatch(filter_part.strip())
if m:
m = m.groupdict()
unnegated_op = COMPARISON_OPERATORS[m['op']]
@@ -3421,11 +3604,10 @@ def _match_one(filter_part, dct, incomplete):
'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
}
- operator_rex = re.compile(r'''(?x)\s*
+ operator_rex = re.compile(r'''(?x)
(?P<op>%s)\s*(?P<key>[a-z_]+)
- \s*$
''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
+ m = operator_rex.fullmatch(filter_part.strip())
if m:
op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
@@ -3467,6 +3649,23 @@ def match_filter_func(filters):
return _match_func
+def download_range_func(chapters, ranges):
+ def inner(info_dict, ydl):
+ warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
+ else 'Cannot match chapters since chapter information is unavailable')
+ for regex in chapters or []:
+ for i, chapter in enumerate(info_dict.get('chapters') or []):
+ if re.search(regex, chapter['title']):
+ warning = None
+ yield {**chapter, 'index': i}
+ if chapters and warning:
+ ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
+
+ yield from ({'start_time': start, 'end_time': end} for start, end in ranges or [])
+
+ return inner
+
+
def parse_dfxp_time_expr(time_expr):
if not time_expr:
return
@@ -3653,26 +3852,21 @@ def dfxp2srt(dfxp_data):
return ''.join(out)
-def cli_option(params, command_option, param):
+def cli_option(params, command_option, param, separator=None):
param = params.get(param)
- if param:
- param = compat_str(param)
- return [command_option, param] if param is not None else []
+ return ([] if param is None
+ else [command_option, str(param)] if separator is None
+ else [f'{command_option}{separator}{param}'])
def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
param = params.get(param)
- if param is None:
- return []
- assert isinstance(param, bool)
- if separator:
- return [command_option + separator + (true_value if param else false_value)]
- return [command_option, true_value if param else false_value]
+ assert param in (True, False, None)
+ return cli_option({True: true_value, False: false_value}, command_option, param, separator)
def cli_valueless_option(params, command_option, param, expected_value=True):
- param = params.get(param)
- return [command_option] if param == expected_value else []
+ return [command_option] if params.get(param) == expected_value else []
def cli_configuration_args(argdict, keys, default=[], use_compat=True):
@@ -4165,6 +4359,9 @@ class ISO3166Utils:
'YE': 'Yemen',
'ZM': 'Zambia',
'ZW': 'Zimbabwe',
+ # Not ISO 3166 codes, but used for IP blocks
+ 'AP': 'Asia/Pacific Region',
+ 'EU': 'Europe',
}
@classmethod
@@ -4427,20 +4624,20 @@ class GeoUtils:
else:
block = code_or_block
addr, preflen = block.split('/')
- addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
- return compat_str(socket.inet_ntoa(
- compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+ return str(socket.inet_ntoa(
+ struct.pack('!L', random.randint(addr_min, addr_max))))
-class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+class PerRequestProxyHandler(urllib.request.ProxyHandler):
def __init__(self, proxies=None):
# Set default handlers
for type in ('http', 'https'):
setattr(self, '%s_open' % type,
lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
meth(r, proxy, type))
- compat_urllib_request.ProxyHandler.__init__(self, proxies)
+ urllib.request.ProxyHandler.__init__(self, proxies)
def proxy_open(self, req, proxy, type):
req_proxy = req.headers.get('Ytdl-request-proxy')
@@ -4450,11 +4647,11 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
if proxy == '__noproxy__':
return None # No Proxy
- if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
req.add_header('Ytdl-socks-proxy', proxy)
# yt-dlp's http/https handlers do wrapping the socket with socks
return None
- return compat_urllib_request.ProxyHandler.proxy_open(
+ return urllib.request.ProxyHandler.proxy_open(
self, req, proxy, type)
@@ -4474,7 +4671,7 @@ def long_to_bytes(n, blocksize=0):
s = b''
n = int(n)
while n > 0:
- s = compat_struct_pack('>I', n & 0xffffffff) + s
+ s = struct.pack('>I', n & 0xffffffff) + s
n = n >> 32
# strip off leading zeros
for i in range(len(s)):
@@ -4505,7 +4702,7 @@ def bytes_to_long(s):
s = b'\000' * extra + s
length = length + extra
for i in range(0, length, 4):
- acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
return acc
@@ -4541,22 +4738,42 @@ def pkcs1pad(data, length):
return [0, 2] + pseudo_random + [0] + data
-def encode_base_n(num, n, table=None):
- FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- if not table:
- table = FULL_TABLE[:n]
+def _base_n_table(n, table):
+ if not table and not n:
+ raise ValueError('Either table or n must be specified')
+ table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
- if n > len(table):
- raise ValueError('base %d exceeds table length %d' % (n, len(table)))
+ if n != len(table):
+ raise ValueError(f'base {n} exceeds table length {len(table)}')
+ return table
- if num == 0:
+
+def encode_base_n(num, n=None, table=None):
+ """Convert given int to a base-n string"""
+ table = _base_n_table(n, table)
+ if not num:
return table[0]
- ret = ''
+ result, base = '', len(table)
while num:
- ret = table[num % n] + ret
- num = num // n
- return ret
+ result = table[num % base] + result
+ num = num // base
+ return result
+
+
+def decode_base_n(string, n=None, table=None):
+ """Convert given base-n string to int"""
+ table = {char: index for index, char in enumerate(_base_n_table(n, table))}
+ result, base = 0, len(table)
+ for char in string:
+ result = result * base + table[char]
+ return result
+
+
+def decode_base(value, digits):
+ write_string('DeprecationWarning: yt_dlp.utils.decode_base is deprecated '
+ 'and may be removed in a future version. Use yt_dlp.decode_base_n instead')
+ return decode_base_n(value, table=digits)
def decode_packed_codes(code):
@@ -4613,7 +4830,7 @@ def decode_png(png_data):
raise OSError('Not a valid PNG file.')
int_map = {1: '>B', 2: '>H', 4: '>I'}
- unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+ unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
chunks = []
@@ -4725,7 +4942,6 @@ def write_xattr(path, key, value):
return
# UNIX Method 1. Use xattrs/pyxattrs modules
- from .dependencies import xattr
setxattr = None
if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
@@ -4753,14 +4969,13 @@ def write_xattr(path, key, value):
value = value.decode()
try:
- p = Popen(
+ _, stderr, returncode = Popen.run(
[exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
except OSError as e:
raise XAttrMetadataError(e.errno, e.strerror)
- stderr = p.communicate_or_kill()[1].decode('utf-8', 'replace')
- if p.returncode:
- raise XAttrMetadataError(p.returncode, stderr)
+ if returncode:
+ raise XAttrMetadataError(returncode, stderr)
def random_birthday(year_field, month_field, day_field):
@@ -4815,7 +5030,7 @@ def iri_to_uri(iri):
The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
"""
- iri_parts = compat_urllib_parse_urlparse(iri)
+ iri_parts = urllib.parse.urlparse(iri)
if '[' in iri_parts.netloc:
raise ValueError('IPv6 URIs are not, yet, supported.')
@@ -4860,11 +5075,11 @@ def to_high_limit_path(path):
return path
-def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
+def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
val = traverse_obj(obj, *variadic(field))
- if val in ignore:
+ if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
return default
- return template % (func(val) if func else val)
+ return template % func(val)
def clean_podcast_url(url):
@@ -4904,14 +5119,9 @@ def make_dir(path, to_screen=None):
def get_executable_path():
- from zipimport import zipimporter
- if hasattr(sys, 'frozen'): # Running from PyInstaller
- path = os.path.dirname(sys.executable)
- elif isinstance(__loader__, zipimporter): # Running from ZIP
- path = os.path.join(os.path.dirname(__file__), '../..')
- else:
- path = os.path.join(os.path.dirname(__file__), '..')
- return os.path.abspath(path)
+ from .update import _get_variant_and_executable_path
+
+ return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
def load_plugins(name, suffix, namespace):
@@ -5010,10 +5220,8 @@ def traverse_obj(
if isinstance(expected_type, type):
type_test = lambda val: val if isinstance(val, expected_type) else None
- elif expected_type is not None:
- type_test = expected_type
else:
- type_test = lambda val: val
+ type_test = expected_type or IDENTITY
for path in path_list:
depth = 0
@@ -5046,17 +5254,6 @@ def variadic(x, allowed_types=(str, bytes, dict)):
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
-def decode_base(value, digits):
- # This will convert given base-x string to scalar (long or int)
- table = {char: index for index, char in enumerate(digits)}
- result = 0
- base = len(digits)
- for chr in value:
- result *= base
- result += table[chr]
- return result
-
-
def time_seconds(**kwargs):
t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
return t.timestamp()
@@ -5088,10 +5285,13 @@ def jwt_decode_hs256(jwt):
return payload_data
+WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
+
+
+@functools.cache
def supports_terminal_sequences(stream):
if compat_os_name == 'nt':
- from .compat import WINDOWS_VT_MODE # Must be imported locally
- if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
+ if not WINDOWS_VT_MODE:
return False
elif not os.getenv('TERM'):
return False
@@ -5101,6 +5301,19 @@ def supports_terminal_sequences(stream):
return False
+def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
+ if get_windows_version() < (10, 0, 10586):
+ return
+ global WINDOWS_VT_MODE
+ try:
+ Popen.run('', shell=True)
+ except Exception:
+ return
+
+ WINDOWS_VT_MODE = True
+ supports_terminal_sequences.cache_clear()
+
+
_terminal_sequences_re = re.compile('\033\\[[^m]+m')
@@ -5114,7 +5327,7 @@ def number_of_digits(number):
def join_nonempty(*values, delim='-', from_dict=None):
if from_dict is not None:
- values = map(from_dict.get, values)
+ values = (traverse_obj(from_dict, variadic(v)) for v in values)
return delim.join(map(str, filter(None, values)))
@@ -5150,13 +5363,20 @@ def parse_http_range(range):
return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
+def read_stdin(what):
+ eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
+ write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
+ return sys.stdin
+
+
class Config:
own_args = None
+ parsed_args = None
filename = None
__initialized = False
def __init__(self, parser, label=None):
- self._parser, self.label = parser, label
+ self.parser, self.label = parser, label
self._loaded_paths, self.configs = set(), []
def init(self, args=None, filename=None):
@@ -5169,14 +5389,19 @@ class Config:
return False
self._loaded_paths.add(location)
- self.__initialized = True
- self.own_args, self.filename = args, filename
- for location in self._parser.parse_args(args)[0].config_locations or []:
+ self.own_args, self.__initialized = args, True
+ opts, _ = self.parser.parse_known_args(args)
+ self.parsed_args, self.filename = args, filename
+
+ for location in opts.config_locations or []:
+ if location == '-':
+ self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
+ continue
location = os.path.join(directory, expand_path(location))
if os.path.isdir(location):
location = os.path.join(location, 'yt-dlp.conf')
if not os.path.exists(location):
- self._parser.error(f'config location {location} does not exist')
+ self.parser.error(f'config location {location} does not exist')
self.append_config(self.read_file(location), location)
return True
@@ -5199,6 +5424,8 @@ class Config:
# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
contents = optionf.read()
res = shlex.split(contents, comments=True)
+ except Exception as err:
+ raise ValueError(f'Unable to parse "{filename}": {err}')
finally:
optionf.close()
return res
@@ -5222,7 +5449,7 @@ class Config:
return opts
def append_config(self, *args, label=None):
- config = type(self)(self._parser, label)
+ config = type(self)(self.parser, label)
config._loaded_paths = self._loaded_paths
if config.init(*args):
self.configs.append(config)
@@ -5231,10 +5458,13 @@ class Config:
def all_args(self):
for config in reversed(self.configs):
yield from config.all_args
- yield from self.own_args or []
+ yield from self.parsed_args or []
+
+ def parse_known_args(self, **kwargs):
+ return self.parser.parse_known_args(self.all_args, **kwargs)
def parse_args(self):
- return self._parser.parse_args(self.all_args)
+ return self.parser.parse_args(self.all_args)
class WebSocketsWrapper():
@@ -5314,16 +5544,25 @@ def merge_headers(*dicts):
class classproperty:
- def __init__(self, f):
- functools.update_wrapper(self, f)
- self.f = f
+ """classmethod(property(func)) that works in py < 3.9"""
+
+ def __init__(self, func):
+ functools.update_wrapper(self, func)
+ self.func = func
def __get__(self, _, cls):
- return self.f(cls)
+ return self.func(cls)
+
+class Namespace(types.SimpleNamespace):
+ """Immutable namespace"""
-def Namespace(**kwargs):
- return collections.namedtuple('Namespace', kwargs)(**kwargs)
+ def __iter__(self):
+ return iter(self.__dict__.values())
+
+ @property
+ def items_(self):
+ return self.__dict__.items()
# Deprecated
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index fb3ec8c6d..0ebc96f8d 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,5 +1,5 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2022.04.08'
+__version__ = '2022.06.22.1'
-RELEASE_GIT_HEAD = '7884ade65'
+RELEASE_GIT_HEAD = 'a86e01e74'