aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-12-29 19:12:28 -0500
committerJesús <heckyel@hyperbola.info>2021-12-29 19:12:28 -0500
commit5aac4e0267e32d98eb68692afedafda3b41ea629 (patch)
treec3b0f52d6a8cf4ad74e7f17f1ccd7653e1071471
parent4f0875462ee497cc13c02d0b852f52f4887b5cea (diff)
parent96f13f01a609add83555ca86fbf35d11441361d8 (diff)
downloadhypervideo-pre-5aac4e0267e32d98eb68692afedafda3b41ea629.tar.lz
hypervideo-pre-5aac4e0267e32d98eb68692afedafda3b41ea629.tar.xz
hypervideo-pre-5aac4e0267e32d98eb68692afedafda3b41ea629.zip
updated from upstream | 29/12/2021 at 19:12
-rw-r--r--.gitignore27
-rw-r--r--CONTRIBUTORS23
-rw-r--r--Changelog.md118
-rw-r--r--docs/Contributing.md5
-rw-r--r--supportedsites.md30
-rw-r--r--test/test_YoutubeDL.py15
-rw-r--r--test/test_cookies.py36
-rw-r--r--test/test_utils.py20
-rw-r--r--test/test_youtube_signature.py4
-rw-r--r--yt_dlp/YoutubeDL.py123
-rw-r--r--yt_dlp/__init__.py33
-rw-r--r--yt_dlp/cookies.py320
-rw-r--r--yt_dlp/downloader/common.py19
-rw-r--r--yt_dlp/downloader/dash.py2
-rw-r--r--yt_dlp/downloader/external.py8
-rw-r--r--yt_dlp/downloader/fragment.py11
-rw-r--r--yt_dlp/downloader/http.py3
-rw-r--r--yt_dlp/extractor/bilibili.py138
-rw-r--r--yt_dlp/extractor/cbc.py3
-rw-r--r--yt_dlp/extractor/common.py10
-rw-r--r--yt_dlp/extractor/dplay.py14
-rw-r--r--yt_dlp/extractor/drooble.py116
-rw-r--r--yt_dlp/extractor/dropout.py212
-rw-r--r--yt_dlp/extractor/extractors.py26
-rw-r--r--yt_dlp/extractor/fancode.py2
-rw-r--r--yt_dlp/extractor/gamejolt.py540
-rw-r--r--yt_dlp/extractor/generic.py14
-rw-r--r--yt_dlp/extractor/gfycat.py28
-rw-r--r--yt_dlp/extractor/instagram.py91
-rw-r--r--yt_dlp/extractor/lbry.py40
-rw-r--r--yt_dlp/extractor/njpwworld.py9
-rw-r--r--yt_dlp/extractor/npr.py3
-rw-r--r--yt_dlp/extractor/opencast.py177
-rw-r--r--yt_dlp/extractor/pixivsketch.py122
-rw-r--r--yt_dlp/extractor/pornhub.py3
-rw-r--r--yt_dlp/extractor/rcti.py123
-rw-r--r--yt_dlp/extractor/roosterteeth.py4
-rw-r--r--yt_dlp/extractor/soundcloud.py2
-rw-r--r--yt_dlp/extractor/steam.py140
-rw-r--r--yt_dlp/extractor/tiktok.py178
-rw-r--r--yt_dlp/extractor/voicy.py7
-rw-r--r--yt_dlp/extractor/youtube.py116
-rw-r--r--yt_dlp/extractor/zee5.py55
-rw-r--r--yt_dlp/options.py25
-rw-r--r--yt_dlp/postprocessor/embedthumbnail.py71
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py10
-rw-r--r--yt_dlp/postprocessor/metadataparser.py2
-rw-r--r--yt_dlp/utils.py58
-rw-r--r--yt_dlp/version.py4
49 files changed, 2583 insertions, 557 deletions
diff --git a/.gitignore b/.gitignore
index 8a72e3ea9..fb09c3d6d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,27 +1,32 @@
# Config
*.conf
-*.spec
cookies
*cookies.txt
.netrc
# Downloaded
-*.3gp
*.annotations.xml
-*.ape
*.aria2
-*.avi
*.description
-*.desktop
*.dump
-*.flac
-*.flv
*.frag
+*.frag.aria2
*.frag.urls
*.info.json
+*.live_chat.json
+*.part*
+*.unknown_video
+*.ytdl
+.cache/
+
+*.3gp
+*.ape
+*.avi
+*.desktop
+*.flac
+*.flv
*.jpeg
*.jpg
-*.live_chat.json
*.m4a
*.m4v
*.mhtml
@@ -31,23 +36,18 @@ cookies
*.mp4
*.ogg
*.opus
-*.part
-*.part-*
*.png
*.sbv
*.srt
*.swf
*.swp
*.ttml
-*.unknown_video
*.url
*.vtt
*.wav
*.webloc
*.webm
*.webp
-*.ytdl
-.cache/
# Allow config/media files in testdata
!test/**
@@ -86,7 +86,6 @@ CONTRIBUTING.md
*.1
*.bash-completion
*.fish
-*.exe
*.tar.gz
*.zsh
test/testdata/player-*.js
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index b3e3e97fa..35a0764a2 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -155,3 +155,26 @@ staubichsauger
xenova
Yakabuff
zulaport
+ehoogeveen-medweb
+PilzAdam
+zmousm
+iw0nderhow
+unit193
+TwoThousandHedgehogs
+Jertzukka
+cypheron
+Hyeeji
+bwildenhain
+C0D3D3V
+kebianizao
+Lapin0t
+abdullah-if
+DavidSkrundz
+mkubecek
+raleeper
+YuenSzeHong
+Sematre
+jaller94
+r5d
+julien-hadleyjack
+git-anony-mouse
diff --git a/Changelog.md b/Changelog.md
index 4e9a448cb..f46c22a32 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -10,6 +10,124 @@
* Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master
-->
+
+### 2021.12.27
+
+* Avoid recursion error when re-extracting info
+* [ffmpeg] Fix position of `--ppa`
+* [aria2c] Don't show progress when `--no-progress`
+* [cookies] Support other keyrings by [mbway](https://github.com/mbway)
+* [EmbedThumbnail] Prefer AtomicParsley over ffmpeg if available
+* [generic] Fix HTTP KVS Player by [git-anony-mouse](https://github.com/git-anony-mouse)
+* [ThumbnailsConvertor] Fix for when there are no thumbnails
+* [docs] Add examples for using `TYPES:` in `-P`/`-o`
+* [PixivSketch] Add extractors by [nao20010128nao](https://github.com/nao20010128nao)
+* [tiktok] Add music, sticker and tag IEs by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [BiliIntl] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [CBC] Fix URL regex
+* [tiktok] Fix `extractor_key` used in archive
+* [youtube] **End `live-from-start` properly when stream ends with 403**
+* [Zee5] Fix VALID_URL for tv-shows by [Ashish0804](https://github.com/Ashish0804)
+
+### 2021.12.25
+
+* [dash,youtube] **Download live from start to end** by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan)
+ * Add option `--live-from-start` to enable downloading live videos from start
+ * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start
+ * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments
+ * [fragment] Allow multiple live dash formats to download simultaneously
+ * [youtube] Implement fragment re-fetching for the live dash formats
+ * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs)
+ * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms
+ * Known issues:
+ * Ctrl+C doesn't work on Windows when downloading multiple formats
+ * If video becomes private, download hangs
+* [SponsorBlock] Add `Filler` and `Highlight` categories by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan)
+ * Change `--sponsorblock-cut all` to `--sponsorblock-cut default` if you do not want filler sections to be removed
+* Add field `webpage_url_domain`
+* Add interactive format selection with `-f -`
+* Add option `--file-access-retries` by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb)
+* [outtmpl] Add alternate forms `S`, `D` and improve `id` detection
+* [outtmpl] Add operator `&` for replacement text by [PilzAdam](https://github.com/PilzAdam)
+* [EmbedSubtitle] Disable duration check temporarily
+* [extractor] Add `_search_nuxt_data` by [nao20010128nao](https://github.com/nao20010128nao)
+* [extractor] Ignore errors in comment extraction when `-i` is given
+* [extractor] Standardize `_live_title`
+* [FormatSort] Prevent incorrect deprecation warning
+* [generic] Extract m3u8 formats from JSON-LD
+* [postprocessor/ffmpeg] Always add `faststart`
+* [utils] Fix parsing `YYYYMMDD` dates in Nov/Dec by [wlritchi](https://github.com/wlritchi)
+* [utils] Improve `parse_count`
+* [utils] Update `std_headers` by [kikuyan](https://github.com/kikuyan), [fstirlitz](https://github.com/fstirlitz)
+* [lazy_extractors] Fix for search IEs
+* [extractor] Support default implicit graph in JSON-LD by [zmousm](https://github.com/zmousm)
+* Allow `--no-write-thumbnail` to override `--write-all-thumbnail`
+* Fix `--throttled-rate`
+* Fix control characters being printed to `--console-title`
+* Fix PostProcessor hooks not registered for some PPs
+* Pre-process when using `--flat-playlist`
+* Remove known invalid thumbnails from `info_dict`
+* Add warning when using `-f best`
+* Use `parse_duration` for `--wait-for-video` and some minor fix
+* [test/download] Add more fields
+* [test/download] Ignore field `webpage_url_domain` by [std-move](https://github.com/std-move)
+* [compat] Suppress errors in enabling VT mode
+* [docs] Improve manpage format by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan)
+* [docs,cleanup] Minor fixes and cleanup
+* [cleanup] Fix some typos by [unit193](https://github.com/unit193)
+* [ABC:iview] Add show extractor by [pabs3](https://github.com/pabs3)
+* [dropout] Add extractor by [TwoThousandHedgehogs](https://github.com/TwoThousandHedgehogs), [pukkandan](https://github.com/pukkandan)
+* [GameJolt] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [gofile] Add extractor by [Jertzukka](https://github.com/Jertzukka), [Ashish0804](https://github.com/Ashish0804)
+* [hse] Add extractors by [cypheron](https://github.com/cypheron), [pukkandan](https://github.com/pukkandan)
+* [NateTV] Add NateIE and NateProgramIE by [Ashish0804](https://github.com/Ashish0804), [Hyeeji](https://github.com/Hyeeji)
+* [OpenCast] Add extractors by [bwildenhain](https://github.com/bwildenhain), [C0D3D3V](https://github.com/C0D3D3V)
+* [rtve] Add `RTVEAudioIE` by [kebianizao](https://github.com/kebianizao)
+* [Rutube] Add RutubeChannelIE by [Ashish0804](https://github.com/Ashish0804)
+* [skeb] Add extractor by [nao20010128nao](https://github.com/nao20010128nao)
+* [soundcloud] Add related tracks extractor by [Lapin0t](https://github.com/Lapin0t)
+* [toggo] Add extractor by [nyuszika7h](https://github.com/nyuszika7h)
+* [TrueID] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [audiomack] Update album and song VALID_URL by [abdullah-if](https://github.com/abdullah-if), [dirkf](https://github.com/dirkf)
+* [CBC Gem] Extract 1080p formats by [DavidSkrundz](https://github.com/DavidSkrundz)
+* [ceskatelevize] Fetch iframe from nextJS data by [mkubecek](https://github.com/mkubecek)
+* [crackle] Look for non-DRM formats by [raleeper](https://github.com/raleeper)
+* [dplay] Temporary fix for `discoveryplus.com/it`
+* [DiscoveryPlusShowBaseIE] yield actual video id by [Ashish0804](https://github.com/Ashish0804)
+* [Facebook] Handle redirect URLs
+* [fujitv] Extract 1080p from `tv_android` m3u8 by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [gronkh] Support new URL pattern by [Sematre](https://github.com/Sematre)
+* [instagram] Expand valid URL by [u-spec-png](https://github.com/u-spec-png)
+* [Instagram] Try bypassing login wall with embed page by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [Jamendo] Fix use of `_VALID_URL_RE` by [jaller94](https://github.com/jaller94)
+* [LBRY] Support livestreams by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan)
+* [NJPWWorld] Extract formats from m3u8 by [aarubui](https://github.com/aarubui)
+* [NovaEmbed] update player regex by [std-move](https://github.com/std-move)
+* [npr] Make SMIL extraction non-fatal by [r5d](https://github.com/r5d)
+* [ntvcojp] Extract NUXT data by [nao20010128nao](https://github.com/nao20010128nao)
+* [ok.ru] add mobile fallback by [nao20010128nao](https://github.com/nao20010128nao)
+* [olympics] Add uploader and cleanup by [u-spec-png](https://github.com/u-spec-png)
+* [ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack)
+* [PlutoTV] Expand `_VALID_URL`
+* [RaiNews] Fix extractor by [nixxo](https://github.com/nixxo)
+* [RCTIPlusSeries] Lazy extraction and video type selection by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [redtube] Handle formats delivered inside a JSON by [dirkf](https://github.com/dirkf), [nixxo](https://github.com/nixxo)
+* [SonyLiv] Add OTP login support by [Ashish0804](https://github.com/Ashish0804)
+* [Steam] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [TikTok] Pass cookies to mobile API by [MinePlayersPE](https://github.com/MinePlayersPE)
+* [trovo] Fix inheritance of `TrovoChannelBaseIE`
+* [TVer] Extract better thumbnails by [YuenSzeHong](https://github.com/YuenSzeHong)
+* [vimeo] Extract chapters
+* [web.archive:youtube] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:comments] Add more options for limiting number of comments extracted by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Extract more metadata from feeds/channels/playlists by [coletdjnz](https://github.com/coletdjnz)
+* [youtube:tab] Extract video thumbnails from playlist by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan)
+* [youtube:tab] Ignore query when redirecting channel to playlist and cleanup of related code
+* [youtube] Fix `ytsearchdate`
+* [zdf] Support videos with different ptmd location by [iw0nderhow](https://github.com/iw0nderhow)
+* [zee5] Support /episodes in URL
+
+
### 2021.12.01
* **Add option `--wait-for-video` to wait for scheduled streams**
diff --git a/docs/Contributing.md b/docs/Contributing.md
new file mode 100644
index 000000000..60fe46909
--- /dev/null
+++ b/docs/Contributing.md
@@ -0,0 +1,5 @@
+---
+orphan: true
+---
+```{include} ../Contributing.md
+```
diff --git a/supportedsites.md b/supportedsites.md
index 2c13a28b7..9dc94f27d 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -21,6 +21,7 @@
- **9now.com.au**
- **abc.net.au**
- **abc.net.au:iview**
+ - **abc.net.au:iview:showseries**
- **abcnews**
- **abcnews:video**
- **abcotvs**: ABC Owned Television Stations
@@ -273,6 +274,7 @@
- **DiscoveryPlus**
- **DiscoveryPlusIndia**
- **DiscoveryPlusIndiaShow**
+ - **DiscoveryPlusItaly**
- **DiscoveryPlusItalyShow**
- **DiscoveryVR**
- **Disney**
@@ -287,6 +289,8 @@
- **DPlay**
- **DRBonanza**
- **Dropbox**
+ - **Dropout**
+ - **DropoutSeason**
- **DrTuber**
- **drtv**
- **drtv:live**
@@ -379,6 +383,12 @@
- **GabTV**
- **Gaia**
- **GameInformer**
+ - **GameJolt**
+ - **GameJoltCommunity**
+ - **GameJoltGame**
+ - **GameJoltGameSoundtrack**
+ - **GameJoltSearch**
+ - **GameJoltUser**
- **GameSpot**
- **GameStar**
- **Gaskrank**
@@ -399,6 +409,7 @@
- **GloboArticle**
- **Go**
- **GodTube**
+ - **Gofile**
- **Golem**
- **google:podcasts**
- **google:podcasts:feed**
@@ -436,6 +447,8 @@
- **hrfernsehen**
- **HRTi**
- **HRTiPlaylist**
+ - **HSEProduct**
+ - **HSEShow**
- **Huajiao**: 花椒直播
- **HuffPost**: Huffington Post
- **Hungama**
@@ -652,6 +665,8 @@
- **n-tv.de**
- **N1Info:article**
- **N1InfoAsset**
+ - **Nate**
+ - **NateProgram**
- **natgeo:video**
- **NationalGeographicTV**
- **Naver**
@@ -766,6 +781,8 @@
- **OnionStudios**
- **Ooyala**
- **OoyalaExternal**
+ - **Opencast**
+ - **OpencastPlaylist**
- **openrec**
- **openrec:capture**
- **OraTV**
@@ -819,6 +836,8 @@
- **Pinkbike**
- **Pinterest**
- **PinterestCollection**
+ - **pixiv:sketch**
+ - **pixiv:sketch:user**
- **Pladform**
- **PlanetMarathi**
- **Platzi**
@@ -941,6 +960,7 @@
- **RTRFM**
- **RTS**: RTS.ch
- **rtve.es:alacarta**: RTVE a la carta
+ - **rtve.es:audio**: RTVE audio
- **rtve.es:infantil**: RTVE infantil
- **rtve.es:live**: RTVE.es live streams
- **rtve.es:television**
@@ -950,11 +970,12 @@
- **RumbleChannel**
- **RumbleEmbed**
- **rutube**: Rutube videos
- - **rutube:channel**: Rutube channels
+ - **rutube:channel**: Rutube channel
- **rutube:embed**: Rutube embedded videos
- **rutube:movie**: Rutube movies
- **rutube:person**: Rutube person videos
- **rutube:playlist**: Rutube playlists
+ - **rutube:tags**: Rutube tags
- **RUTV**: RUTV.RU
- **Ruutu**
- **Ruv**
@@ -994,6 +1015,7 @@
- **simplecast:episode**
- **simplecast:podcast**
- **Sina**
+ - **Skeb**
- **sky.it**
- **sky:news**
- **sky:news:story**
@@ -1013,6 +1035,7 @@
- **SonyLIVSeries**
- **soundcloud**
- **soundcloud:playlist**
+ - **soundcloud:related**
- **soundcloud:search**: Soundcloud search; "scsearch:" prefix
- **soundcloud:set**
- **soundcloud:trackstation**
@@ -1120,12 +1143,16 @@
- **ThreeSpeak**
- **ThreeSpeakUser**
- **TikTok**
+ - **tiktok:effect**
+ - **tiktok:sound**
+ - **tiktok:tag**
- **tiktok:user**
- **tinypic**: tinypic.com videos
- **TMZ**
- **TNAFlix**
- **TNAFlixNetworkEmbed**
- **toggle**
+ - **toggo**
- **Tokentube**
- **Tokentube:channel**
- **ToonGoggles**
@@ -1138,6 +1165,7 @@
- **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
- **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix
- **TrovoVod**
+ - **TrueID**
- **TruNews**
- **TruTV**
- **Tube8**
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 6c2530046..61923513e 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -717,6 +717,7 @@ class TestYoutubeDL(unittest.TestCase):
test('%(id)s', '.abcd', info={'id': '.abcd'})
test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
+ test('%(id.0)s', '-', info={'id': '--'})
# Invalid templates
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
@@ -777,6 +778,10 @@ class TestYoutubeDL(unittest.TestCase):
test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
test('%(title5)+U', 'áéí A')
test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
+ test('%(height)D', '1K')
+ test('%(height)5.2D', ' 1.08K')
+ test('%(title4)#S', 'foo_bar_test')
+ test('%(title4).10S', ('foo \'bar\' ', 'foo \'bar\'' + ('#' if compat_os_name == 'nt' else ' ')))
if compat_os_name == 'nt':
test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', "'id 1' 'id 2' 'id 3'"))
@@ -808,6 +813,11 @@ class TestYoutubeDL(unittest.TestCase):
test('%(width-100,height+width|def)s', 'def')
test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00')
+ # Replacement
+ test('%(id&foo)s.bar', 'foo.bar')
+ test('%(title&foo)s.bar', 'NA.bar')
+ test('%(title&foo|baz)s.bar', 'baz.bar')
+
# Laziness
def gen():
yield from range(5)
@@ -836,11 +846,6 @@ class TestYoutubeDL(unittest.TestCase):
test('%(title3)s', ('foo/bar\\test', 'foo_bar_test'))
test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep))
- # Replacement
- test('%(id&foo)s.bar', 'foo.bar')
- test('%(title&foo)s.bar', 'NA.bar')
- test('%(title&foo|baz)s.bar', 'baz.bar')
-
def test_format_note(self):
ydl = YoutubeDL()
self.assertEqual(ydl._format_note({}), '')
diff --git a/test/test_cookies.py b/test/test_cookies.py
index 7d509ebe8..842ebcb99 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -8,6 +8,8 @@ from yt_dlp.cookies import (
WindowsChromeCookieDecryptor,
parse_safari_cookies,
pbkdf2_sha1,
+ _get_linux_desktop_environment,
+ _LinuxDesktopEnvironment,
)
@@ -42,6 +44,37 @@ class MonkeyPatch:
class TestCookies(unittest.TestCase):
+ def test_get_desktop_environment(self):
+ """ based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """
+ test_cases = [
+ ({}, _LinuxDesktopEnvironment.OTHER),
+
+ ({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME),
+ ({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME),
+ ({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE),
+ ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE),
+ ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE),
+
+ ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME),
+ ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE),
+
+ ({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON),
+ ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
+
+ ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
+ ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE),
+ ({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE),
+ ({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON),
+ ({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY),
+ ({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY),
+ ({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY),
+ ]
+
+ for env, expected_desktop_environment in test_cases:
+ self.assertEqual(_get_linux_desktop_environment(env), expected_desktop_environment)
+
def test_chrome_cookie_decryptor_linux_derive_key(self):
key = LinuxChromeCookieDecryptor.derive_key(b'abc')
self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17')
@@ -58,8 +91,7 @@ class TestCookies(unittest.TestCase):
self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_linux_v11(self):
- with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b'',
- 'KEYRING_AVAILABLE': True}):
+ with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd'
value = 'tz=Europe.London'
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
diff --git a/test/test_utils.py b/test/test_utils.py
index 22dda4f37..2e33308c7 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -37,6 +37,7 @@ from yt_dlp.utils import (
ExtractorError,
find_xpath_attr,
fix_xml_ampersands,
+ format_bytes,
float_or_none,
get_element_by_class,
get_element_by_attribute,
@@ -1156,9 +1157,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_count('1000'), 1000)
self.assertEqual(parse_count('1.000'), 1000)
self.assertEqual(parse_count('1.1k'), 1100)
+ self.assertEqual(parse_count('1.1 k'), 1100)
+ self.assertEqual(parse_count('1,1 k'), 1100)
self.assertEqual(parse_count('1.1kk'), 1100000)
self.assertEqual(parse_count('1.1kk '), 1100000)
+ self.assertEqual(parse_count('1,1kk'), 1100000)
+ self.assertEqual(parse_count('100 views'), 100)
+ self.assertEqual(parse_count('1,100 views'), 1100)
self.assertEqual(parse_count('1.1kk views'), 1100000)
+ self.assertEqual(parse_count('10M views'), 10000000)
+ self.assertEqual(parse_count('has 10M views'), 10000000)
def test_parse_resolution(self):
self.assertEqual(parse_resolution(None), {})
@@ -1681,6 +1689,18 @@ Line 1
ll = reversed(ll)
test(ll, -15, 14, range(15))
+ def test_format_bytes(self):
+ self.assertEqual(format_bytes(0), '0.00B')
+ self.assertEqual(format_bytes(1000), '1000.00B')
+ self.assertEqual(format_bytes(1024), '1.00KiB')
+ self.assertEqual(format_bytes(1024**2), '1.00MiB')
+ self.assertEqual(format_bytes(1024**3), '1.00GiB')
+ self.assertEqual(format_bytes(1024**4), '1.00TiB')
+ self.assertEqual(format_bytes(1024**5), '1.00PiB')
+ self.assertEqual(format_bytes(1024**6), '1.00EiB')
+ self.assertEqual(format_bytes(1024**7), '1.00ZiB')
+ self.assertEqual(format_bytes(1024**8), '1.00YiB')
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 3359ac457..5f8114a1c 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -82,6 +82,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js',
'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q',
),
+ (
+ 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js',
+ 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw',
+ ),
]
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index d542d22e6..ed1881da5 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -67,6 +67,7 @@ from .utils import (
float_or_none,
format_bytes,
format_field,
+ format_decimal_suffix,
formatSeconds,
GeoRestrictedError,
get_domain,
@@ -315,10 +316,10 @@ class YoutubeDL(object):
break_per_url: Whether break_on_reject and break_on_existing
should act on each input URL as opposed to for the entire queue
cookiefile: File name where cookies should be read from and dumped to
- cookiesfrombrowser: A tuple containing the name of the browser and the profile
- name/path from where cookies are loaded.
- Eg: ('chrome', ) or ('vivaldi', 'default')
- nocheckcertificate:Do not verify SSL certificates
+ cookiesfrombrowser: A tuple containing the name of the browser, the profile
+ name/pathfrom where cookies are loaded, and the name of the
+ keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
+ nocheckcertificate: Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
proxy: URL of the proxy server to use
@@ -448,8 +449,8 @@ class YoutubeDL(object):
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see yt_dlp/downloader/common.py):
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
- max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
- noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
+ max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
+ continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
external_downloader_args, concurrent_fragment_downloads.
The following options are used by the post processors:
@@ -1004,7 +1005,7 @@ class YoutubeDL(object):
def validate_outtmpl(cls, outtmpl):
''' @return None or Exception object '''
outtmpl = re.sub(
- STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
+ STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
lambda mobj: f'{mobj.group(0)[:-1]}s',
cls._outtmpl_expandpath(outtmpl))
try:
@@ -1020,8 +1021,12 @@ class YoutubeDL(object):
info_dict.pop(key, None)
return info_dict
- def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
- """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
+ """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
+ @param sanitize Whether to sanitize the output as a filename.
+ For backward compatibility, a function can also be passed
+ """
+
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
info_dict = self._copy_infodict(info_dict)
@@ -1042,7 +1047,7 @@ class YoutubeDL(object):
}
TMPL_DICT = {}
- EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
MATH_FUNCTIONS = {
'+': float.__add__,
'-': float.__sub__,
@@ -1050,7 +1055,7 @@ class YoutubeDL(object):
# Field is of the form key1.key2...
# where keys (except first) can be string, int or slice
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
- MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+ MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
INTERNAL_FORMAT_RE = re.compile(r'''(?x)
(?P<negate>-)?
@@ -1106,6 +1111,13 @@ class YoutubeDL(object):
na = self.params.get('outtmpl_na_placeholder', 'NA')
+ def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
+ return sanitize_filename(str(value), restricted=restricted,
+ is_id=re.search(r'(^|[_.])id(\.|$)', key))
+
+ sanitizer = sanitize if callable(sanitize) else filename_sanitizer
+ sanitize = bool(sanitize)
+
def _dumpjson_default(obj):
if isinstance(obj, (set, LazyList)):
return list(obj)
@@ -1116,7 +1128,7 @@ class YoutubeDL(object):
return outer_mobj.group(0)
key = outer_mobj.group('key')
mobj = re.match(INTERNAL_FORMAT_RE, key)
- initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
+ initial_field = mobj.group('fields') if mobj else ''
value, replacement, default = None, None, na
while mobj:
mobj = mobj.groupdict()
@@ -1138,7 +1150,7 @@ class YoutubeDL(object):
str_fmt = f'{fmt[:-1]}s'
if fmt[-1] == 'l': # list
delim = '\n' if '#' in flags else ', '
- value, fmt = delim.join(variadic(value)), str_fmt
+ value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt
elif fmt[-1] == 'j': # json
value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
elif fmt[-1] == 'q': # quoted
@@ -1152,6 +1164,10 @@ class YoutubeDL(object):
# "+" = compatibility equivalence, "#" = NFD
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
value), str_fmt
+ elif fmt[-1] == 'D': # decimal suffix
+ value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's'
+ elif fmt[-1] == 'S': # filename sanitization
+ value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
elif fmt[-1] == 'c':
if value:
value = str(value)[0]
@@ -1168,7 +1184,7 @@ class YoutubeDL(object):
# So we convert it to repr first
value, fmt = repr(value), str_fmt
if fmt[-1] in 'csr':
- value = sanitize(initial_field, value)
+ value = sanitizer(initial_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value
@@ -1182,12 +1198,8 @@ class YoutubeDL(object):
def _prepare_filename(self, info_dict, tmpl_type='default'):
try:
- sanitize = lambda k, v: sanitize_filename(
- compat_str(v),
- restricted=self.params.get('restrictfilenames'),
- is_id=(k == 'id' or k.endswith('_id')))
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
- filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
+ filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
force_ext = OUTTMPL_TYPES.get(tmpl_type)
if filename and force_ext is not None:
@@ -1335,31 +1347,33 @@ class YoutubeDL(object):
def __handle_extraction_exceptions(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
- try:
- return func(self, *args, **kwargs)
- except GeoRestrictedError as e:
- msg = e.msg
- if e.countries:
- msg += '\nThis video is available in %s.' % ', '.join(
- map(ISO3166Utils.short2full, e.countries))
- msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
- self.report_error(msg)
- except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
- except ReExtractInfo as e:
- if e.expected:
- self.to_screen(f'{e}; Re-extracting data')
- else:
- self.to_stderr('\r')
- self.report_warning(f'{e}; Re-extracting data')
- return wrapper(self, *args, **kwargs)
- except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
- raise
- except Exception as e:
- if self.params.get('ignoreerrors'):
- self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- else:
+ while True:
+ try:
+ return func(self, *args, **kwargs)
+ except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
raise
+ except ReExtractInfo as e:
+ if e.expected:
+ self.to_screen(f'{e}; Re-extracting data')
+ else:
+ self.to_stderr('\r')
+ self.report_warning(f'{e}; Re-extracting data')
+ continue
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(str(e), e.format_traceback())
+ except Exception as e:
+ if self.params.get('ignoreerrors'):
+ self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
+ else:
+ raise
+ break
return wrapper
def _wait_for_video(self, ie_result):
@@ -1482,7 +1496,7 @@ class YoutubeDL(object):
self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
ie_result['additional_entries'] = [
self.extract_info(
- url, download, extra_info,
+ url, download, extra_info=extra_info,
force_generic_extractor=self.params.get('force_generic_extractor'))
for url in additional_urls
]
@@ -2461,10 +2475,7 @@ class YoutubeDL(object):
info_dict['id'], automatic_captions, 'automatic captions')
self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
if self.params.get('listformats') or interactive_format_selection:
- if not info_dict.get('formats') and not info_dict.get('url'):
- self.to_screen('%s has no formats' % info_dict['id'])
- else:
- self.list_formats(info_dict)
+ self.list_formats(info_dict)
if list_only:
# Without this printing, -F --print-json will not work
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
@@ -3135,9 +3146,8 @@ class YoutubeDL(object):
'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
}
- empty_values = (None, {}, [], set(), tuple())
reject = lambda k, v: k not in keep_keys and (
- k.startswith('_') or k in remove_keys or v in empty_values)
+ k.startswith('_') or k in remove_keys or v is None)
else:
reject = lambda k, v: k in remove_keys
@@ -3348,6 +3358,11 @@ class YoutubeDL(object):
return headers
def list_formats(self, info_dict):
+ if not info_dict.get('formats') and not info_dict.get('url'):
+ self.to_screen('%s has no formats' % info_dict['id'])
+ return
+ self.to_screen('[info] Available formats for %s:' % info_dict['id'])
+
formats = info_dict.get('formats', [info_dict])
new_format = self.params.get('listformats_table', True) is not False
if new_format:
@@ -3362,7 +3377,7 @@ class YoutubeDL(object):
delim,
format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
format_field(f, 'tbr', '\t%dk'),
- shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
+ shorten_protocol_name(f.get('protocol', '')),
delim,
format_field(f, 'vcodec', default='unknown').replace(
'none',
@@ -3398,8 +3413,6 @@ class YoutubeDL(object):
if f.get('preference') is None or f['preference'] >= -1000]
header_line = ['format code', 'extension', 'resolution', 'note']
- self.to_screen(
- '[info] Available formats for %s:' % info_dict['id'])
self.to_stdout(render_table(
header_line, table,
extra_gap=(0 if new_format else 1),
@@ -3527,11 +3540,11 @@ class YoutubeDL(object):
from .downloader.websocket import has_websockets
from .postprocessor.embedthumbnail import has_mutagen
- from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
+ from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
lib_str = join_nonempty(
compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
- KEYRING_AVAILABLE and 'keyring',
+ SECRETSTORAGE_AVAILABLE and 'secretstorage',
has_mutagen and 'mutagen',
SQLITE_AVAILABLE and 'sqlite',
has_websockets and 'websockets',
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 198962aa5..4fa2e2d8c 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -16,10 +16,11 @@ from .options import (
)
from .compat import (
compat_getpass,
+ compat_os_name,
compat_shlex_quote,
workaround_optparse_bug9161,
)
-from .cookies import SUPPORTED_BROWSERS
+from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .utils import (
DateRange,
decodeOption,
@@ -92,7 +93,8 @@ def _real_main(argv=None):
if opts.batchfile is not None:
try:
if opts.batchfile == '-':
- write_string('Reading URLs from stdin:\n')
+ write_string('Reading URLs from stdin - EOF (%s) to end:\n' % (
+ 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'))
batchfd = sys.stdin
else:
batchfd = io.open(
@@ -134,10 +136,10 @@ def _real_main(argv=None):
# Conflicting, missing and erroneous options
if opts.format == 'best':
- warnings.append('.\n '.join(
+ warnings.append('.\n '.join((
'"-f best" selects the best pre-merged format which is often not the best option',
'To let yt-dlp download and merge the best available formats, simply do not pass any format selection',
- 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))
+ 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')))
if opts.usenetrc and (opts.username is not None or opts.password is not None):
parser.error('using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
@@ -217,6 +219,8 @@ def _real_main(argv=None):
return parsed_retries
if opts.retries is not None:
opts.retries = parse_retries(opts.retries)
+ if opts.file_access_retries is not None:
+ opts.file_access_retries = parse_retries(opts.file_access_retries, 'file access ')
if opts.fragment_retries is not None:
opts.fragment_retries = parse_retries(opts.fragment_retries, 'fragment ')
if opts.extractor_retries is not None:
@@ -259,10 +263,20 @@ def _real_main(argv=None):
if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS:
parser.error('invalid thumbnail format specified')
if opts.cookiesfrombrowser is not None:
- opts.cookiesfrombrowser = [
- part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)]
- if opts.cookiesfrombrowser[0].lower() not in SUPPORTED_BROWSERS:
- parser.error('unsupported browser specified for cookies')
+ mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser)
+ if mobj is None:
+ parser.error(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}')
+ browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile')
+ browser_name = browser_name.lower()
+ if browser_name not in SUPPORTED_BROWSERS:
+ parser.error(f'unsupported browser specified for cookies: "{browser_name}". '
+ f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}')
+ if keyring is not None:
+ keyring = keyring.upper()
+ if keyring not in SUPPORTED_KEYRINGS:
+ parser.error(f'unsupported keyring specified for cookies: "{keyring}". '
+ f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}')
+ opts.cookiesfrombrowser = (browser_name, profile, keyring)
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
if geo_bypass_code is not None:
try:
@@ -515,7 +529,7 @@ def _real_main(argv=None):
if len(dur) == 2 and all(t is not None for t in dur):
remove_ranges.append(tuple(dur))
continue
- parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form ?start-end')
+ parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form *start-end')
try:
remove_chapters_patterns.append(re.compile(regex))
except re.error as err:
@@ -668,6 +682,7 @@ def _real_main(argv=None):
'throttledratelimit': opts.throttledratelimit,
'overwrites': opts.overwrites,
'retries': opts.retries,
+ 'file_access_retries': opts.file_access_retries,
'fragment_retries': opts.fragment_retries,
'extractor_retries': opts.extractor_retries,
'skip_unavailable_fragments': opts.skip_unavailable_fragments,
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index ec68a809d..74e133bc9 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1,3 +1,4 @@
+import contextlib
import ctypes
import json
import os
@@ -7,6 +8,7 @@ import subprocess
import sys
import tempfile
from datetime import datetime, timedelta, timezone
+from enum import Enum, auto
from hashlib import pbkdf2_hmac
from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes
@@ -15,7 +17,6 @@ from .compat import (
compat_cookiejar_Cookie,
)
from .utils import (
- bug_reports_message,
expand_path,
Popen,
YoutubeDLCookieJar,
@@ -31,19 +32,16 @@ except ImportError:
try:
- import keyring
- KEYRING_AVAILABLE = True
- KEYRING_UNAVAILABLE_REASON = f'due to unknown reasons{bug_reports_message()}'
+ import secretstorage
+ SECRETSTORAGE_AVAILABLE = True
except ImportError:
- KEYRING_AVAILABLE = False
- KEYRING_UNAVAILABLE_REASON = (
- 'as the `keyring` module is not installed. '
- 'Please install by running `python3 -m pip install keyring`. '
- 'Depending on your platform, additional packages may be required '
- 'to access the keyring; see https://pypi.org/project/keyring')
+ SECRETSTORAGE_AVAILABLE = False
+ SECRETSTORAGE_UNAVAILABLE_REASON = (
+ 'as the `secretstorage` module is not installed. '
+ 'Please install by running `python3 -m pip install secretstorage`.')
except Exception as _err:
- KEYRING_AVAILABLE = False
- KEYRING_UNAVAILABLE_REASON = 'as the `keyring` module could not be initialized: %s' % _err
+ SECRETSTORAGE_AVAILABLE = False
+ SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}'
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
@@ -74,8 +72,8 @@ class YDLLogger:
def load_cookies(cookie_file, browser_specification, ydl):
cookie_jars = []
if browser_specification is not None:
- browser_name, profile = _parse_browser_specification(*browser_specification)
- cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl)))
+ browser_name, profile, keyring = _parse_browser_specification(*browser_specification)
+ cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring))
if cookie_file is not None:
cookie_file = expand_path(cookie_file)
@@ -87,13 +85,13 @@ def load_cookies(cookie_file, browser_specification, ydl):
return _merge_cookie_jars(cookie_jars)
-def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger()):
+def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None):
if browser_name == 'firefox':
return _extract_firefox_cookies(profile, logger)
elif browser_name == 'safari':
return _extract_safari_cookies(profile, logger)
elif browser_name in CHROMIUM_BASED_BROWSERS:
- return _extract_chrome_cookies(browser_name, profile, logger)
+ return _extract_chrome_cookies(browser_name, profile, keyring, logger)
else:
raise ValueError('unknown browser: {}'.format(browser_name))
@@ -207,7 +205,7 @@ def _get_chromium_based_browser_settings(browser_name):
}
-def _extract_chrome_cookies(browser_name, profile, logger):
+def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.info('Extracting cookies from {}'.format(browser_name))
if not SQLITE_AVAILABLE:
@@ -234,7 +232,7 @@ def _extract_chrome_cookies(browser_name, profile, logger):
raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root))
logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path))
- decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger)
+ decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
cursor = None
@@ -247,6 +245,7 @@ def _extract_chrome_cookies(browser_name, profile, logger):
'expires_utc, {} FROM cookies'.format(secure_column))
jar = YoutubeDLCookieJar()
failed_cookies = 0
+ unencrypted_cookies = 0
for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall():
host_key = host_key.decode('utf-8')
name = name.decode('utf-8')
@@ -258,6 +257,8 @@ def _extract_chrome_cookies(browser_name, profile, logger):
if value is None:
failed_cookies += 1
continue
+ else:
+ unencrypted_cookies += 1
cookie = compat_cookiejar_Cookie(
version=0, name=name, value=value, port=None, port_specified=False,
@@ -270,6 +271,9 @@ def _extract_chrome_cookies(browser_name, profile, logger):
else:
failed_message = ''
logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message))
+ counts = decryptor.cookie_counts.copy()
+ counts['unencrypted'] = unencrypted_cookies
+ logger.debug('cookie version breakdown: {}'.format(counts))
return jar
finally:
if cursor is not None:
@@ -305,10 +309,14 @@ class ChromeCookieDecryptor:
def decrypt(self, encrypted_value):
raise NotImplementedError
+ @property
+ def cookie_counts(self):
+ raise NotImplementedError
+
-def get_cookie_decryptor(browser_root, browser_keyring_name, logger):
+def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
if sys.platform in ('linux', 'linux2'):
- return LinuxChromeCookieDecryptor(browser_keyring_name, logger)
+ return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
elif sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger)
elif sys.platform == 'win32':
@@ -319,13 +327,12 @@ def get_cookie_decryptor(browser_root, browser_keyring_name, logger):
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
- def __init__(self, browser_keyring_name, logger):
+ def __init__(self, browser_keyring_name, logger, *, keyring=None):
self._logger = logger
self._v10_key = self.derive_key(b'peanuts')
- if KEYRING_AVAILABLE:
- self._v11_key = self.derive_key(_get_linux_keyring_password(browser_keyring_name))
- else:
- self._v11_key = None
+ password = _get_linux_keyring_password(browser_keyring_name, keyring, logger)
+ self._v11_key = None if password is None else self.derive_key(password)
+ self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
@staticmethod
def derive_key(password):
@@ -333,20 +340,27 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
+ @property
+ def cookie_counts(self):
+ return self._cookie_counts
+
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
if version == b'v10':
+ self._cookie_counts['v10'] += 1
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
elif version == b'v11':
+ self._cookie_counts['v11'] += 1
if self._v11_key is None:
- self._logger.warning(f'cannot decrypt cookie {KEYRING_UNAVAILABLE_REASON}', only_once=True)
+ self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
return None
return _decrypt_aes_cbc(ciphertext, self._v11_key, self._logger)
else:
+ self._cookie_counts['other'] += 1
return None
@@ -355,6 +369,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
self._logger = logger
password = _get_mac_keyring_password(browser_keyring_name, logger)
self._v10_key = None if password is None else self.derive_key(password)
+ self._cookie_counts = {'v10': 0, 'other': 0}
@staticmethod
def derive_key(password):
@@ -362,11 +377,16 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
+ @property
+ def cookie_counts(self):
+ return self._cookie_counts
+
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
if version == b'v10':
+ self._cookie_counts['v10'] += 1
if self._v10_key is None:
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None
@@ -374,6 +394,7 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
return _decrypt_aes_cbc(ciphertext, self._v10_key, self._logger)
else:
+ self._cookie_counts['other'] += 1
# other prefixes are considered 'old data' which were stored as plaintext
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm
return encrypted_value
@@ -383,12 +404,18 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_root, logger):
self._logger = logger
self._v10_key = _get_windows_v10_key(browser_root, logger)
+ self._cookie_counts = {'v10': 0, 'other': 0}
+
+ @property
+ def cookie_counts(self):
+ return self._cookie_counts
def decrypt(self, encrypted_value):
version = encrypted_value[:3]
ciphertext = encrypted_value[3:]
if version == b'v10':
+ self._cookie_counts['v10'] += 1
if self._v10_key is None:
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None
@@ -408,6 +435,7 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
else:
+ self._cookie_counts['other'] += 1
# any other prefix means the data is DPAPI encrypted
# https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc
return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8')
@@ -577,42 +605,221 @@ def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
return jar
-def _get_linux_keyring_password(browser_keyring_name):
- password = keyring.get_password('{} Keys'.format(browser_keyring_name),
- '{} Safe Storage'.format(browser_keyring_name))
- if password is None:
- # this sometimes occurs in KDE because chrome does not check hasEntry and instead
- # just tries to read the value (which kwallet returns "") whereas keyring checks hasEntry
- # to verify this:
- # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
- # while starting chrome.
- # this may be a bug as the intended behaviour is to generate a random password and store
- # it, but that doesn't matter here.
- password = ''
- return password.encode('utf-8')
+class _LinuxDesktopEnvironment(Enum):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
+ DesktopEnvironment
+ """
+ OTHER = auto()
+ CINNAMON = auto()
+ GNOME = auto()
+ KDE = auto()
+ PANTHEON = auto()
+ UNITY = auto()
+ XFCE = auto()
-def _get_mac_keyring_password(browser_keyring_name, logger):
- if KEYRING_AVAILABLE:
- logger.debug('using keyring to obtain password')
- password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name)
- return password.encode('utf-8')
+class _LinuxKeyring(Enum):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.h
+ SelectedLinuxBackend
+ """
+ KWALLET = auto()
+ GNOMEKEYRING = auto()
+ BASICTEXT = auto()
+
+
+SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
+
+
+def _get_linux_desktop_environment(env):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
+ GetDesktopEnvironment
+ """
+ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
+ desktop_session = env.get('DESKTOP_SESSION', None)
+ if xdg_current_desktop is not None:
+ xdg_current_desktop = xdg_current_desktop.split(':')[0].strip()
+
+ if xdg_current_desktop == 'Unity':
+ if desktop_session is not None and 'gnome-fallback' in desktop_session:
+ return _LinuxDesktopEnvironment.GNOME
+ else:
+ return _LinuxDesktopEnvironment.UNITY
+ elif xdg_current_desktop == 'GNOME':
+ return _LinuxDesktopEnvironment.GNOME
+ elif xdg_current_desktop == 'X-Cinnamon':
+ return _LinuxDesktopEnvironment.CINNAMON
+ elif xdg_current_desktop == 'KDE':
+ return _LinuxDesktopEnvironment.KDE
+ elif xdg_current_desktop == 'Pantheon':
+ return _LinuxDesktopEnvironment.PANTHEON
+ elif xdg_current_desktop == 'XFCE':
+ return _LinuxDesktopEnvironment.XFCE
+ elif desktop_session is not None:
+ if desktop_session in ('mate', 'gnome'):
+ return _LinuxDesktopEnvironment.GNOME
+ elif 'kde' in desktop_session:
+ return _LinuxDesktopEnvironment.KDE
+ elif 'xfce' in desktop_session:
+ return _LinuxDesktopEnvironment.XFCE
+ else:
+ if 'GNOME_DESKTOP_SESSION_ID' in env:
+ return _LinuxDesktopEnvironment.GNOME
+ elif 'KDE_FULL_SESSION' in env:
+ return _LinuxDesktopEnvironment.KDE
+ else:
+ return _LinuxDesktopEnvironment.OTHER
+
+
+def _choose_linux_keyring(logger):
+ """
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/key_storage_util_linux.cc
+ SelectBackend
+ """
+ desktop_environment = _get_linux_desktop_environment(os.environ)
+ logger.debug('detected desktop environment: {}'.format(desktop_environment.name))
+ if desktop_environment == _LinuxDesktopEnvironment.KDE:
+ linux_keyring = _LinuxKeyring.KWALLET
+ elif desktop_environment == _LinuxDesktopEnvironment.OTHER:
+ linux_keyring = _LinuxKeyring.BASICTEXT
else:
- logger.debug('using find-generic-password to obtain password')
+ linux_keyring = _LinuxKeyring.GNOMEKEYRING
+ return linux_keyring
+
+
+def _get_kwallet_network_wallet(logger):
+ """ The name of the wallet used to store network passwords.
+
+ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/kwallet_dbus.cc
+ KWalletDBus::NetworkWallet
+ which does a dbus call to the following function:
+ https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
+ Wallet::NetworkWallet
+ """
+ default_wallet = 'kdewallet'
+ try:
+ proc = Popen([
+ 'dbus-send', '--session', '--print-reply=literal',
+ '--dest=org.kde.kwalletd5',
+ '/modules/kwalletd5',
+ 'org.kde.KWallet.networkWallet'
+ ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+
+ stdout, stderr = proc.communicate_or_kill()
+ if proc.returncode != 0:
+ logger.warning('failed to read NetworkWallet')
+ return default_wallet
+ else:
+ network_wallet = stdout.decode('utf-8').strip()
+ logger.debug('NetworkWallet = "{}"'.format(network_wallet))
+ return network_wallet
+ except BaseException as e:
+ logger.warning('exception while obtaining NetworkWallet: {}'.format(e))
+ return default_wallet
+
+
+def _get_kwallet_password(browser_keyring_name, logger):
+ logger.debug('using kwallet-query to obtain password from kwallet')
+
+ if shutil.which('kwallet-query') is None:
+ logger.error('kwallet-query command not found. KWallet and kwallet-query '
+ 'must be installed to read from KWallet. kwallet-query should be'
+ 'included in the kwallet package for your distribution')
+ return b''
+
+ network_wallet = _get_kwallet_network_wallet(logger)
+
+ try:
+ proc = Popen([
+ 'kwallet-query',
+ '--read-password', '{} Safe Storage'.format(browser_keyring_name),
+ '--folder', '{} Keys'.format(browser_keyring_name),
+ network_wallet
+ ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+
+ stdout, stderr = proc.communicate_or_kill()
+ if proc.returncode != 0:
+ logger.error('kwallet-query failed with return code {}. Please consult '
+ 'the kwallet-query man page for details'.format(proc.returncode))
+ return b''
+ else:
+ if stdout.lower().startswith(b'failed to read'):
+ logger.debug('failed to read password from kwallet. Using empty string instead')
+ # this sometimes occurs in KDE because chrome does not check hasEntry and instead
+ # just tries to read the value (which kwallet returns "") whereas kwallet-query
+ # checks hasEntry. To verify this:
+ # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
+ # while starting chrome.
+ # this may be a bug as the intended behaviour is to generate a random password and store
+ # it, but that doesn't matter here.
+ return b''
+ else:
+ logger.debug('password found')
+ if stdout[-1:] == b'\n':
+ stdout = stdout[:-1]
+ return stdout
+ except BaseException as e:
+ logger.warning(f'exception running kwallet-query: {type(e).__name__}({e})')
+ return b''
+
+
+def _get_gnome_keyring_password(browser_keyring_name, logger):
+ if not SECRETSTORAGE_AVAILABLE:
+ logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON))
+ return b''
+ # the Gnome keyring does not seem to organise keys in the same way as KWallet,
+ # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
+ # and presumably searches for its key in the list. It appears that we must do the same.
+ # https://github.com/jaraco/keyring/issues/556
+ with contextlib.closing(secretstorage.dbus_init()) as con:
+ col = secretstorage.get_default_collection(con)
+ for item in col.get_all_items():
+ if item.get_label() == '{} Safe Storage'.format(browser_keyring_name):
+ return item.get_secret()
+ else:
+ logger.error('failed to read from keyring')
+ return b''
+
+
+def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
+ # note: chrome/chromium can be run with the following flags to determine which keyring backend
+ # it has chosen to use
+ # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
+ # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
+ # will not be sufficient in all cases.
+
+ keyring = _LinuxKeyring[keyring] or _choose_linux_keyring(logger)
+ logger.debug(f'Chosen keyring: {keyring.name}')
+
+ if keyring == _LinuxKeyring.KWALLET:
+ return _get_kwallet_password(browser_keyring_name, logger)
+ elif keyring == _LinuxKeyring.GNOMEKEYRING:
+ return _get_gnome_keyring_password(browser_keyring_name, logger)
+ elif keyring == _LinuxKeyring.BASICTEXT:
+ # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
+ return None
+ assert False, f'Unknown keyring {keyring}'
+
+
+def _get_mac_keyring_password(browser_keyring_name, logger):
+ logger.debug('using find-generic-password to obtain password from OSX keychain')
+ try:
proc = Popen(
['security', 'find-generic-password',
'-w', # write password to stdout
'-a', browser_keyring_name, # match 'account'
'-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service'
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
- try:
- stdout, stderr = proc.communicate_or_kill()
- if stdout[-1:] == b'\n':
- stdout = stdout[:-1]
- return stdout
- except BaseException as e:
- logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
- return None
+
+ stdout, stderr = proc.communicate_or_kill()
+ if stdout[-1:] == b'\n':
+ stdout = stdout[:-1]
+ return stdout
+ except BaseException as e:
+ logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})')
+ return None
def _get_windows_v10_key(browser_root, logger):
@@ -736,10 +943,11 @@ def _is_path(value):
return os.path.sep in value
-def _parse_browser_specification(browser_name, profile=None):
- browser_name = browser_name.lower()
+def _parse_browser_specification(browser_name, profile=None, keyring=None):
if browser_name not in SUPPORTED_BROWSERS:
raise ValueError(f'unsupported browser: "{browser_name}"')
+ if keyring not in (None, *SUPPORTED_KEYRINGS):
+ raise ValueError(f'unsupported keyring: "{keyring}"')
if profile is not None and _is_path(profile):
profile = os.path.expanduser(profile)
- return browser_name, profile
+ return browser_name, profile, keyring
diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py
index d0c9c223f..37321e34b 100644
--- a/yt_dlp/downloader/common.py
+++ b/yt_dlp/downloader/common.py
@@ -4,12 +4,14 @@ import os
import re
import time
import random
+import errno
from ..utils import (
decodeArgument,
encodeFilename,
error_to_compat_str,
format_bytes,
+ sanitize_open,
shell_quote,
timeconvert,
timetuple_from_msec,
@@ -39,6 +41,7 @@ class FileDownloader(object):
ratelimit: Download speed limit, in bytes/sec.
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
+ file_access_retries: Number of times to retry on file access error
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
@@ -207,6 +210,21 @@ class FileDownloader(object):
def ytdl_filename(self, filename):
return filename + '.ytdl'
+ def sanitize_open(self, filename, open_mode):
+ file_access_retries = self.params.get('file_access_retries', 10)
+ retry = 0
+ while True:
+ try:
+ return sanitize_open(filename, open_mode)
+ except (IOError, OSError) as err:
+ retry = retry + 1
+ if retry > file_access_retries or err.errno not in (errno.EACCES,):
+ raise
+ self.to_screen(
+ '[download] Got file access error. Retrying (attempt %d of %s) ...'
+ % (retry, self.format_retries(file_access_retries)))
+ time.sleep(0.01)
+
def try_rename(self, old_filename, new_filename):
if old_filename == new_filename:
return
@@ -397,6 +415,7 @@ class FileDownloader(object):
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
}, info_dict)
+ self._finish_multiline_status()
return True, False
if subtitle is False:
diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py
index 4c23edd32..a845ee7d3 100644
--- a/yt_dlp/downloader/dash.py
+++ b/yt_dlp/downloader/dash.py
@@ -57,7 +57,7 @@ class DashSegmentsFD(FragmentFD):
def _resolve_fragments(self, fragments, ctx):
fragments = fragments(ctx) if callable(fragments) else fragments
- return [next(fragments)] if self.params.get('test') else fragments
+ return [next(iter(fragments))] if self.params.get('test') else fragments
def _get_fragments(self, fmt, ctx):
fragment_base_url = fmt.get('fragment_base_url')
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index da69423f7..17be3c46f 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -22,7 +22,6 @@ from ..utils import (
handle_youtubedl_headers,
check_executable,
Popen,
- sanitize_open,
)
@@ -144,11 +143,11 @@ class ExternalFD(FragmentFD):
return -1
decrypt_fragment = self.decrypter(info_dict)
- dest, _ = sanitize_open(tmpfilename, 'wb')
+ dest, _ = self.sanitize_open(tmpfilename, 'wb')
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
try:
- src, _ = sanitize_open(fragment_filename, 'rb')
+ src, _ = self.sanitize_open(fragment_filename, 'rb')
except IOError as err:
if skip_unavailable_fragments and frag_index > 1:
self.report_skip_fragment(frag_index, err)
@@ -266,6 +265,7 @@ class Aria2cFD(ExternalFD):
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
+ cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args()
# aria2c strips out spaces from the beginning/end of filenames and paths.
@@ -290,7 +290,7 @@ class Aria2cFD(ExternalFD):
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
- stream, _ = sanitize_open(url_list_file, 'wb')
+ stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode('utf-8'))
stream.close()
cmd += ['-i', url_list_file]
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index 79c6561c7..d4f112b0f 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -24,7 +24,6 @@ from ..utils import (
DownloadError,
error_to_compat_str,
encodeFilename,
- sanitize_open,
sanitized_Request,
)
@@ -96,7 +95,7 @@ class FragmentFD(FileDownloader):
def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx
- stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+ stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
try:
ytdl_data = json.loads(stream.read())
ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
@@ -108,7 +107,7 @@ class FragmentFD(FileDownloader):
stream.close()
def _write_ytdl_file(self, ctx):
- frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+ frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
try:
downloader = {
'current_fragment': {
@@ -140,7 +139,7 @@ class FragmentFD(FileDownloader):
return True, self._read_fragment(ctx)
def _read_fragment(self, ctx):
- down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+ down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
@@ -216,7 +215,7 @@ class FragmentFD(FileDownloader):
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
- dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+ dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode)
ctx.update({
'dl': dl,
@@ -434,6 +433,7 @@ class FragmentFD(FileDownloader):
def download_fragment(fragment, ctx):
frag_index = ctx['fragment_index'] = fragment['frag_index']
+ ctx['last_error'] = None
if not interrupt_trigger[0]:
return False, frag_index
headers = info_dict.get('http_headers', {}).copy()
@@ -456,6 +456,7 @@ class FragmentFD(FileDownloader):
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
# https://github.com/ytdl-org/youtube-dl/issues/10448).
count += 1
+ ctx['last_error'] = err
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
except DownloadError:
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index 6290884a8..34a1eb59b 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -16,7 +16,6 @@ from ..utils import (
ContentTooShortError,
encodeFilename,
int_or_none,
- sanitize_open,
sanitized_Request,
ThrottledDownload,
write_xattr,
@@ -263,7 +262,7 @@ class HttpFD(FileDownloader):
# Open destination file just in time
if ctx.stream is None:
try:
- ctx.stream, ctx.tmpfilename = sanitize_open(
+ ctx.stream, ctx.tmpfilename = self.sanitize_open(
ctx.tmpfilename, ctx.open_mode)
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index e019ec6a8..2cb01ff83 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -19,14 +19,15 @@ from ..utils import (
parse_iso8601,
traverse_obj,
try_get,
+ parse_count,
smuggle_url,
srt_subtitles_timecode,
str_or_none,
- str_to_int,
strip_jsonp,
unified_timestamp,
unsmuggle_url,
urlencode_postdata,
+ url_or_none,
OnDemandPagedList
)
@@ -722,10 +723,10 @@ class BiliBiliPlayerIE(InfoExtractor):
class BiliIntlBaseIE(InfoExtractor):
- _API_URL = 'https://api.bili{}/intl/gateway{}'
+ _API_URL = 'https://api.bilibili.tv/intl/gateway'
- def _call_api(self, type, endpoint, id):
- return self._download_json(self._API_URL.format(type, endpoint), id)['data']
+ def _call_api(self, endpoint, *args, **kwargs):
+ return self._download_json(self._API_URL + endpoint, *args, **kwargs)['data']
def json2srt(self, json):
data = '\n\n'.join(
@@ -733,29 +734,40 @@ class BiliIntlBaseIE(InfoExtractor):
for i, line in enumerate(json['body']))
return data
- def _get_subtitles(self, type, ep_id):
- sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id)
+ def _get_subtitles(self, ep_id):
+ sub_json = self._call_api(f'/web/v2/subtitle?episode_id={ep_id}&platform=web', ep_id)
subtitles = {}
- for sub in sub_json.get('subtitles', []):
+ for sub in sub_json.get('subtitles') or []:
sub_url = sub.get('url')
if not sub_url:
continue
- sub_data = self._download_json(sub_url, ep_id, fatal=False)
+ sub_data = self._download_json(
+ sub_url, ep_id, errnote='Unable to download subtitles', fatal=False,
+ note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
if not sub_data:
continue
- subtitles.setdefault(sub.get('key', 'en'), []).append({
+ subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
'ext': 'srt',
'data': self.json2srt(sub_data)
})
return subtitles
- def _get_formats(self, type, ep_id):
- video_json = self._call_api(type, f'/web/playurl?ep_id={ep_id}&platform=web', ep_id)
- if not video_json:
- self.raise_login_required(method='cookies')
+ def _get_formats(self, ep_id):
+ video_json = self._call_api(f'/web/playurl?ep_id={ep_id}&platform=web', ep_id,
+ note='Downloading video formats', errnote='Unable to download video formats')
+ if video_json.get('code'):
+ if video_json['code'] in (10004004, 10004005, 10023006):
+ self.raise_login_required(method='cookies')
+ elif video_json['code'] == 10004001:
+ self.raise_geo_restricted()
+ elif video_json.get('message') and str(video_json['code']) != video_json['message']:
+ raise ExtractorError(
+ f'Unable to download video formats: {self.IE_NAME} said: {video_json["message"]}', expected=True)
+ else:
+ raise ExtractorError('Unable to download video formats')
video_json = video_json['playurl']
formats = []
- for vid in video_json.get('video', []):
+ for vid in video_json.get('video') or []:
video_res = vid.get('video_resource') or {}
video_info = vid.get('stream_info') or {}
if not video_res.get('url'):
@@ -771,7 +783,7 @@ class BiliIntlBaseIE(InfoExtractor):
'vcodec': video_res.get('codecs'),
'filesize': video_res.get('size'),
})
- for aud in video_json.get('audio_resource', []):
+ for aud in video_json.get('audio_resource') or []:
if not aud.get('url'):
continue
formats.append({
@@ -786,85 +798,93 @@ class BiliIntlBaseIE(InfoExtractor):
self._sort_formats(formats)
return formats
- def _extract_ep_info(self, type, episode_data, ep_id):
+ def _extract_ep_info(self, episode_data, ep_id):
return {
'id': ep_id,
- 'title': episode_data.get('long_title') or episode_data['title'],
+ 'title': episode_data.get('title_display') or episode_data['title'],
'thumbnail': episode_data.get('cover'),
- 'episode_number': str_to_int(episode_data.get('title')),
- 'formats': self._get_formats(type, ep_id),
- 'subtitles': self._get_subtitles(type, ep_id),
+ 'episode_number': int_or_none(self._search_regex(
+ r'^E(\d+)(?:$| - )', episode_data.get('title_display'), 'episode number', default=None)),
+ 'formats': self._get_formats(ep_id),
+ 'subtitles': self._get_subtitles(ep_id),
'extractor_key': BiliIntlIE.ie_key(),
}
class BiliIntlIE(BiliIntlBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.tv/en/play/34613/341736',
'info_dict': {
'id': '341736',
'ext': 'mp4',
- 'title': 'The First Night',
- 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
+ 'title': 'E2 - The First Night',
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
'episode_number': 2,
- },
- 'params': {
- 'format': 'bv',
- },
+ }
}, {
- 'url': 'https://www.biliintl.com/en/play/34613/341736',
+ 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
'info_dict': {
- 'id': '341736',
+ 'id': '11005006',
'ext': 'mp4',
- 'title': 'The First Night',
- 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
- 'episode_number': 2,
- },
- 'params': {
- 'format': 'bv',
- },
+ 'title': 'E3 - Who?',
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'episode_number': 3,
+ }
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613/341736',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- type, season_id, id = self._match_valid_url(url).groups()
- data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={season_id}', id)
- episode_data = next(
- episode for episode in data_json.get('episodes', [])
- if str(episode.get('ep_id')) == id)
- return self._extract_ep_info(type, episode_data, id)
+ season_id, video_id = self._match_valid_url(url).groups()
+ webpage = self._download_webpage(url, video_id)
+ # Bstation layout
+ initial_data = self._parse_json(self._search_regex(
+ r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
+ 'preload state', default='{}'), video_id, fatal=False) or {}
+ episode_data = traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
+
+ if not episode_data:
+ # Non-Bstation layout, read through episode list
+ season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
+ episode_data = next(
+ episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict)
+ if str(episode.get('episode_id')) == video_id)
+ return self._extract_ep_info(episode_data, video_id)
class BiliIntlSeriesIE(BiliIntlBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
_TESTS = [{
'url': 'https://www.bilibili.tv/en/play/34613',
'playlist_mincount': 15,
'info_dict': {
'id': '34613',
+ 'title': 'Fly Me to the Moon',
+ 'description': 'md5:a861ee1c4dc0acfad85f557cc42ac627',
+ 'categories': ['Romance', 'Comedy', 'Slice of life'],
+ 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'view_count': int,
},
'params': {
'skip_download': True,
- 'format': 'bv',
},
}, {
'url': 'https://www.biliintl.com/en/play/34613',
- 'playlist_mincount': 15,
- 'info_dict': {
- 'id': '34613',
- },
- 'params': {
- 'skip_download': True,
- 'format': 'bv',
- },
+ 'only_matching': True,
}]
- def _entries(self, id, type):
- data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={id}', id)
- for episode in data_json.get('episodes', []):
- episode_id = str(episode.get('ep_id'))
- yield self._extract_ep_info(type, episode, episode_id)
+ def _entries(self, series_id):
+ series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
+ for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]):
+ episode_id = str(episode.get('episode_id'))
+ yield self._extract_ep_info(episode, episode_id)
def _real_extract(self, url):
- type, id = self._match_valid_url(url).groups()
- return self.playlist_result(self._entries(id, type), playlist_id=id)
+ series_id = self._match_id(url)
+ series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
+ return self.playlist_result(
+ self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
+ categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
+ thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 392c77884..ac1272f7b 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -340,7 +340,8 @@ class CBCGemIE(InfoExtractor):
yield {
**base_format,
'format_id': join_nonempty('sec', height),
- 'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\<1>{bitrate}\2', base_url),
+ # Note: \g<1> is necessary instead of \1 since bitrate is a number
+ 'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
'tbr': bitrate / 1000.0,
'height': height,
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 9abbaf04f..3260399cb 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -616,7 +616,7 @@ class InfoExtractor(object):
kwargs = {
'video_id': e.video_id or self.get_temp_id(url),
'ie': self.IE_NAME,
- 'tb': e.traceback,
+ 'tb': e.traceback or sys.exc_info()[2],
'expected': e.expected,
'cause': e.cause
}
@@ -1574,7 +1574,7 @@ class InfoExtractor(object):
'vcodec': {'type': 'ordered', 'regex': True,
'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
'acodec': {'type': 'ordered', 'regex': True,
- 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
+ 'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
@@ -2332,7 +2332,7 @@ class InfoExtractor(object):
if smil is False:
assert not fatal
- return []
+ return [], {}
namespace = self._parse_smil_namespace(smil)
@@ -3663,7 +3663,7 @@ class InfoExtractor(object):
else 'public' if all_known
else None)
- def _configuration_arg(self, key, default=NO_DEFAULT, casesense=False):
+ def _configuration_arg(self, key, default=NO_DEFAULT, *, ie_key=None, casesense=False):
'''
@returns A list of values for the extractor argument given by "key"
or "default" if no such key is present
@@ -3671,7 +3671,7 @@ class InfoExtractor(object):
@param casesense When false, the values are converted to lower case
'''
val = traverse_obj(
- self._downloader.params, ('extractor_args', self.ie_key().lower(), key))
+ self._downloader.params, ('extractor_args', (ie_key or self.ie_key()).lower(), key))
if val is None:
return [] if default is NO_DEFAULT else default
return list(val) if casesense else [x.lower() for x in val]
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 51e1f8f3c..e1f5e9dc8 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -348,7 +348,7 @@ class HGTVDeIE(DPlayBaseIE):
class DiscoveryPlusIE(DPlayBaseIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?!it/)(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': {
@@ -575,6 +575,18 @@ class DiscoveryPlusShowBaseIE(DPlayBaseIE):
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
+class DiscoveryPlusItalyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/it/video' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.com/it/video/i-signori-della-neve/stagione-2-episodio-1-i-preparativi',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(f'https://discoveryplus.it/video/{video_id}', DPlayIE.ie_key(), video_id)
+
+
class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE):
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)'
_TESTS = [{
diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py
new file mode 100644
index 000000000..058425095
--- /dev/null
+++ b/yt_dlp/extractor/drooble.py
@@ -0,0 +1,116 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+
+class DroobleIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://drooble\.com/(?:
+ (?:(?P<user>[^/]+)/)?(?P<kind>song|videos|music/albums)/(?P<id>\d+)|
+ (?P<user_2>[^/]+)/(?P<kind_2>videos|music))
+ '''
+ _TESTS = [{
+ 'url': 'https://drooble.com/song/2858030',
+ 'md5': '5ffda90f61c7c318dc0c3df4179eb064',
+ 'info_dict': {
+ 'id': '2858030',
+ 'ext': 'mp3',
+ 'title': 'Skankocillin',
+ 'upload_date': '20200801',
+ 'timestamp': 1596241390,
+ 'uploader_id': '95894',
+ 'uploader': 'Bluebeat Shelter',
+ }
+ }, {
+ 'url': 'https://drooble.com/karl340758/videos/2859183',
+ 'info_dict': {
+ 'id': 'J6QCQY_I5Tk',
+ 'ext': 'mp4',
+ 'title': 'Skankocillin',
+ 'uploader_id': 'UCrSRoI5vVyeYihtWEYua7rg',
+ 'description': 'md5:ffc0bd8ba383db5341a86a6cd7d9bcca',
+ 'upload_date': '20200731',
+ 'uploader': 'Bluebeat Shelter',
+ }
+ }, {
+ 'url': 'https://drooble.com/karl340758/music/albums/2858031',
+ 'info_dict': {
+ 'id': '2858031',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ 'url': 'https://drooble.com/karl340758/music',
+ 'info_dict': {
+ 'id': 'karl340758',
+ },
+ 'playlist_mincount': 8,
+ }, {
+ 'url': 'https://drooble.com/karl340758/videos',
+ 'info_dict': {
+ 'id': 'karl340758',
+ },
+ 'playlist_mincount': 8,
+ }]
+
+ def _call_api(self, method, video_id, data=None):
+ response = self._download_json(
+ f'https://drooble.com/api/dt/{method}', video_id, data=json.dumps(data).encode())
+ if not response[0]:
+ raise ExtractorError('Unable to download JSON metadata')
+ return response[1]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ user = mobj.group('user') or mobj.group('user_2')
+ kind = mobj.group('kind') or mobj.group('kind_2')
+ display_id = mobj.group('id') or user
+
+ if mobj.group('kind_2') == 'videos':
+ data = {'from_user': display_id, 'album': -1, 'limit': 18, 'offset': 0, 'order': 'new2old', 'type': 'video'}
+ elif kind in ('music/albums', 'music'):
+ data = {'user': user, 'public_only': True, 'individual_limit': {'singles': 1, 'albums': 1, 'playlists': 1}}
+ else:
+ data = {'url_slug': display_id, 'children': 10, 'order': 'old2new'}
+
+ method = 'getMusicOverview' if kind in ('music/albums', 'music') else 'getElements'
+ json_data = self._call_api(method, display_id, data=data)
+ if kind in ('music/albums', 'music'):
+ json_data = json_data['singles']['list']
+
+ entites = []
+ for media in json_data:
+ url = media.get('external_media_url') or media.get('link')
+ if url.startswith('https://www.youtube.com'):
+ entites.append({
+ '_type': 'url',
+ 'url': url,
+ 'ie_key': 'Youtube'
+ })
+ continue
+ is_audio = (media.get('type') or '').lower() == 'audio'
+ entites.append({
+ 'url': url,
+ 'id': media['id'],
+ 'title': media['title'],
+ 'duration': int_or_none(media.get('duration')),
+ 'timestamp': int_or_none(media.get('timestamp')),
+ 'album': try_get(media, lambda x: x['album']['title']),
+ 'uploader': try_get(media, lambda x: x['creator']['display_name']),
+ 'uploader_id': try_get(media, lambda x: x['creator']['id']),
+ 'thumbnail': media.get('image_comment'),
+ 'like_count': int_or_none(media.get('likes')),
+ 'vcodec': 'none' if is_audio else None,
+ 'ext': 'mp3' if is_audio else None,
+ })
+
+ if len(entites) > 1:
+ return self.playlist_result(entites, display_id)
+
+ return entites[0]
diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py
new file mode 100644
index 000000000..a7442d8f0
--- /dev/null
+++ b/yt_dlp/extractor/dropout.py
@@ -0,0 +1,212 @@
+# coding: utf-8
+from .common import InfoExtractor
+from .vimeo import VHXEmbedIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_class,
+ get_element_by_id,
+ get_elements_by_class,
+ int_or_none,
+ join_nonempty,
+ unified_strdate,
+ urlencode_postdata,
+)
+
+
+class DropoutIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.dropout.tv/login'
+ _NETRC_MACHINE = 'dropout'
+
+ _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no',
+ 'note': 'Episode in a series',
+ 'md5': '5e000fdfd8d8fa46ff40456f1c2af04a',
+ 'info_dict': {
+ 'id': '738153',
+ 'display_id': 'yes-or-no',
+ 'ext': 'mp4',
+ 'title': 'Yes or No',
+ 'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?',
+ 'release_date': '20200508',
+ 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg',
+ 'series': 'Game Changer',
+ 'season_number': 2,
+ 'season': 'Season 2',
+ 'episode_number': 6,
+ 'episode': 'Yes or No',
+ 'duration': 1180,
+ 'uploader_id': 'user80538407',
+ 'uploader_url': 'https://vimeo.com/user80538407',
+ 'uploader': 'OTT Videos'
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
+ },
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1',
+ 'note': 'Episode in a series (missing release_date)',
+ 'md5': '712caf7c191f1c47c8f1879520c2fa5c',
+ 'info_dict': {
+ 'id': '320562',
+ 'display_id': 'episode-1',
+ 'ext': 'mp4',
+ 'title': 'The Beginning Begins',
+ 'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.',
+ 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg',
+ 'series': 'Dimension 20: Fantasy High',
+ 'season_number': 1,
+ 'season': 'Season 1',
+ 'episode_number': 1,
+ 'episode': 'The Beginning Begins',
+ 'duration': 6838,
+ 'uploader_id': 'user80538407',
+ 'uploader_url': 'https://vimeo.com/user80538407',
+ 'uploader': 'OTT Videos'
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
+ },
+ {
+ 'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special',
+ 'note': 'Episode not in a series',
+ 'md5': 'c30fa18999c5880d156339f13c953a26',
+ 'info_dict': {
+ 'id': '1915774',
+ 'display_id': 'misfits-magic-holiday-special',
+ 'ext': 'mp4',
+ 'title': 'Misfits & Magic Holiday Special',
+ 'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.',
+ 'release_date': '20211215',
+ 'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg',
+ 'duration': 11698,
+ 'uploader_id': 'user80538407',
+ 'uploader_url': 'https://vimeo.com/user80538407',
+ 'uploader': 'OTT Videos'
+ },
+ 'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest']
+ }
+ ]
+
+ def _get_authenticity_token(self, display_id):
+ signin_page = self._download_webpage(
+ self._LOGIN_URL, display_id, note='Getting authenticity token')
+ return self._html_search_regex(
+ r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']',
+ signin_page, 'authenticity_token')
+
+ def _login(self, display_id):
+ username, password = self._get_login_info()
+ if not (username and password):
+ self.raise_login_required(method='password')
+
+ response = self._download_webpage(
+ self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({
+ 'email': username,
+ 'password': password,
+ 'authenticity_token': self._get_authenticity_token(display_id),
+ 'utf8': True
+ }))
+
+ user_has_subscription = self._search_regex(
+ r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none')
+ if user_has_subscription.lower() == 'true':
+ return response
+ elif user_has_subscription.lower() == 'false':
+ raise ExtractorError('Account is not subscribed')
+ else:
+ raise ExtractorError('Incorrect username/password')
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ try:
+ self._login(display_id)
+ webpage = self._download_webpage(url, display_id, note='Downloading video webpage')
+ finally:
+ self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out')
+
+ embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url')
+ thumbnail = self._og_search_thumbnail(webpage)
+ watch_info = get_element_by_id('watch-info', webpage) or ''
+
+ title = clean_html(get_element_by_class('video-title', watch_info))
+ season_episode = get_element_by_class(
+ 'site-font-secondary-color', get_element_by_class('text', watch_info))
+ episode_number = int_or_none(self._search_regex(
+ r'Episode (\d+)', season_episode or '', 'episode', default=None))
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': VHXEmbedIE.ie_key(),
+ 'url': embed_url,
+ 'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'),
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._html_search_meta('description', webpage, fatal=False),
+ 'thumbnail': thumbnail.split('?')[0] if thumbnail else None, # Ignore crop/downscale
+ 'series': clean_html(get_element_by_class('series-title', watch_info)),
+ 'episode_number': episode_number,
+ 'episode': title if episode_number else None,
+ 'season_number': int_or_none(self._search_regex(
+ r'Season (\d+),', season_episode or '', 'season', default=None)),
+ 'release_date': unified_strdate(self._search_regex(
+ r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']',
+ watch_info, 'release date', default=None)),
+ }
+
+
+class DropoutSeasonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1',
+ 'note': 'Multi-season series with the season in the url',
+ 'playlist_count': 17,
+ 'info_dict': {
+ 'id': 'dimension-20-fantasy-high-season-1',
+ 'title': 'Dimension 20 Fantasy High - Season 1'
+ }
+ },
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-fantasy-high',
+ 'note': 'Multi-season series with the season not in the url',
+ 'playlist_count': 17,
+ 'info_dict': {
+ 'id': 'dimension-20-fantasy-high-season-1',
+ 'title': 'Dimension 20 Fantasy High - Season 1'
+ }
+ },
+ {
+ 'url': 'https://www.dropout.tv/dimension-20-shriek-week',
+ 'note': 'Single-season series',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'id': 'dimension-20-shriek-week-season-1',
+ 'title': 'Dimension 20 Shriek Week - Season 1'
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ season_id = self._match_id(url)
+ season_title = season_id.replace('-', ' ').title()
+ webpage = self._download_webpage(url, season_id)
+
+ entries = [
+ self.url_result(
+ url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']',
+ item, 'item_url'),
+ ie=DropoutIE.ie_key()
+ ) for item in get_elements_by_class('js-collection-item', webpage)
+ ]
+
+ seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '')
+ current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>',
+ seasons, 'current_season', default='').strip()
+
+ return {
+ '_type': 'playlist',
+ 'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')),
+ 'title': join_nonempty(season_title, current_season, delim=' - '),
+ 'entries': entries
+ }
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 0b359a253..1b32efc47 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -357,6 +357,7 @@ from .dplay import (
AnimalPlanetIE,
DiscoveryPlusIndiaIE,
DiscoveryNetworksDeIE,
+ DiscoveryPlusItalyIE,
DiscoveryPlusItalyShowIE,
DiscoveryPlusIndiaShowIE,
)
@@ -385,6 +386,10 @@ from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
from .doodstream import DoodStreamIE
from .dropbox import DropboxIE
+from .dropout import (
+ DropoutSeasonIE,
+ DropoutIE
+)
from .dw import (
DWIE,
DWArticleIE,
@@ -507,6 +512,14 @@ from .gab import (
)
from .gaia import GaiaIE
from .gameinformer import GameInformerIE
+from .gamejolt import (
+ GameJoltIE,
+ GameJoltUserIE,
+ GameJoltGameIE,
+ GameJoltGameSoundtrackIE,
+ GameJoltCommunityIE,
+ GameJoltSearchIE,
+)
from .gamespot import GameSpotIE
from .gamestar import GameStarIE
from .gaskrank import GaskrankIE
@@ -608,6 +621,7 @@ from .instagram import (
InstagramIOSIE,
InstagramUserIE,
InstagramTagIE,
+ InstagramStoryIE,
)
from .internazionale import InternazionaleIE
from .internetvideoarchive import InternetVideoArchiveIE
@@ -1036,6 +1050,10 @@ from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
)
+from .opencast import (
+ OpencastIE,
+ OpencastPlaylistIE,
+)
from .openrec import (
OpenRecIE,
OpenRecCaptureIE,
@@ -1109,6 +1127,10 @@ from .pinterest import (
PinterestIE,
PinterestCollectionIE,
)
+from .pixivsketch import (
+ PixivSketchIE,
+ PixivSketchUserIE,
+)
from .pladform import PladformIE
from .planetmarathi import PlanetMarathiIE
from .platzi import (
@@ -1517,6 +1539,9 @@ from .threeqsdn import ThreeQSDNIE
from .tiktok import (
TikTokIE,
TikTokUserIE,
+ TikTokSoundIE,
+ TikTokEffectIE,
+ TikTokTagIE,
DouyinIE,
)
from .tinypic import TinyPicIE
@@ -1667,6 +1692,7 @@ from .dlive import (
DLiveVODIE,
DLiveStreamIE,
)
+from .drooble import DroobleIE
from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py
index f6733b124..978df31ff 100644
--- a/yt_dlp/extractor/fancode.py
+++ b/yt_dlp/extractor/fancode.py
@@ -41,7 +41,7 @@ class FancodeVodIE(InfoExtractor):
_ACCESS_TOKEN = None
_NETRC_MACHINE = 'fancode'
- _LOGIN_HINT = 'Use "--user refresh --password <refresh_token>" to login using a refresh token'
+ _LOGIN_HINT = 'Use "--username refresh --password <refresh_token>" to login using a refresh token'
headers = {
'content-type': 'application/json',
diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py
new file mode 100644
index 000000000..7f2f6f3e1
--- /dev/null
+++ b/yt_dlp/extractor/gamejolt.py
@@ -0,0 +1,540 @@
+# coding: utf-8
+import itertools
+import json
+import math
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ try_get
+)
+
+
+class GameJoltBaseIE(InfoExtractor):
+ _API_BASE = 'https://gamejolt.com/site-api/'
+
+ def _call_api(self, endpoint, *args, **kwargs):
+ kwargs.setdefault('headers', {}).update({'Accept': 'image/webp,*/*'})
+ return self._download_json(self._API_BASE + endpoint, *args, **kwargs)['payload']
+
+ def _parse_content_as_text(self, content):
+ outer_contents, joined_contents = content.get('content') or [], []
+ for outer_content in outer_contents:
+ if outer_content.get('type') != 'paragraph':
+ joined_contents.append(self._parse_content_as_text(outer_content))
+ continue
+ inner_contents, inner_content_text = outer_content.get('content') or [], ''
+ for inner_content in inner_contents:
+ if inner_content.get('text'):
+ inner_content_text += inner_content['text']
+ elif inner_content.get('type') == 'hardBreak':
+ inner_content_text += '\n'
+ joined_contents.append(inner_content_text)
+
+ return '\n'.join(joined_contents)
+
+ def _get_comments(self, post_num_id, post_hash_id):
+ sort_by, scroll_id = self._configuration_arg('comment_sort', ['hot'], ie_key=GameJoltIE.ie_key())[0], -1
+ is_scrolled = sort_by in ('new', 'you')
+ for page in itertools.count(1):
+ comments_data = self._call_api(
+ 'comments/Fireside_Post/%s/%s?%s=%d' % (
+ post_num_id, sort_by,
+ 'scroll_id' if is_scrolled else 'page', scroll_id if is_scrolled else page),
+ post_hash_id, note='Downloading comments list page %d' % page)
+ if not comments_data.get('comments'):
+ break
+ for comment in traverse_obj(comments_data, (('comments', 'childComments'), ...), expected_type=dict, default=[]):
+ yield {
+ 'id': comment['id'],
+ 'text': self._parse_content_as_text(
+ self._parse_json(comment['comment_content'], post_hash_id)),
+ 'timestamp': int_or_none(comment.get('posted_on'), scale=1000),
+ 'like_count': comment.get('votes'),
+ 'author': traverse_obj(comment, ('user', ('display_name', 'name')), expected_type=str_or_none, get_all=False),
+ 'author_id': traverse_obj(comment, ('user', 'username'), expected_type=str_or_none),
+ 'author_thumbnail': traverse_obj(comment, ('user', 'image_avatar'), expected_type=str_or_none),
+ 'parent': comment.get('parent_id') or None,
+ }
+ scroll_id = int_or_none(comments_data['comments'][-1].get('posted_on'))
+
+ def _parse_post(self, post_data):
+ post_id = post_data['hash']
+ lead_content = self._parse_json(post_data.get('lead_content') or '{}', post_id, fatal=False) or {}
+ description, full_description = post_data.get('leadStr') or self._parse_content_as_text(
+ self._parse_json(post_data.get('lead_content'), post_id)), None
+ if post_data.get('has_article'):
+ article_content = self._parse_json(
+ post_data.get('article_content')
+ or self._call_api(f'web/posts/article/{post_data.get("id", post_id)}', post_id,
+ note='Downloading article metadata', errnote='Unable to download article metadata', fatal=False).get('article'),
+ post_id, fatal=False)
+ full_description = self._parse_content_as_text(article_content)
+
+ user_data = post_data.get('user') or {}
+ info_dict = {
+ 'extractor_key': GameJoltIE.ie_key(),
+ 'extractor': 'GameJolt',
+ 'webpage_url': str_or_none(post_data.get('url')) or f'https://gamejolt.com/p/{post_id}',
+ 'id': post_id,
+ 'title': description,
+ 'description': full_description or description,
+ 'display_id': post_data.get('slug'),
+ 'uploader': user_data.get('display_name') or user_data.get('name'),
+ 'uploader_id': user_data.get('username'),
+ 'uploader_url': 'https://gamejolt.com' + user_data['url'] if user_data.get('url') else None,
+ 'categories': [try_get(category, lambda x: '%s - %s' % (x['community']['name'], x['channel'].get('display_title') or x['channel']['title']))
+ for category in post_data.get('communities' or [])],
+ 'tags': traverse_obj(
+ lead_content, ('content', ..., 'content', ..., 'marks', ..., 'attrs', 'tag'), expected_type=str_or_none),
+ 'like_count': int_or_none(post_data.get('like_count')),
+ 'comment_count': int_or_none(post_data.get('comment_count'), default=0),
+ 'timestamp': int_or_none(post_data.get('added_on'), scale=1000),
+ 'release_timestamp': int_or_none(post_data.get('published_on'), scale=1000),
+ '__post_extractor': self.extract_comments(post_data.get('id'), post_id)
+ }
+
+ # TODO: Handle multiple videos/embeds?
+ video_data = traverse_obj(post_data, ('videos', ...), expected_type=dict, get_all=False) or {}
+ formats, subtitles, thumbnails = [], {}, []
+ for media in video_data.get('media') or []:
+ media_url, mimetype, ext, media_id = media['img_url'], media.get('filetype', ''), determine_ext(media['img_url']), media.get('type')
+ if mimetype == 'application/vnd.apple.mpegurl' or ext == 'm3u8':
+ hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(media_url, post_id, 'mp4', m3u8_id=media_id)
+ formats.extend(hls_formats)
+ subtitles.update(hls_subs)
+ elif mimetype == 'application/dash+xml' or ext == 'mpd':
+ dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(media_url, post_id, mpd_id=media_id)
+ formats.extend(dash_formats)
+ subtitles.update(dash_subs)
+ elif 'image' in mimetype:
+ thumbnails.append({
+ 'id': media_id,
+ 'url': media_url,
+ 'width': media.get('width'),
+ 'height': media.get('height'),
+ 'filesize': media.get('filesize'),
+ })
+ else:
+ formats.append({
+ 'format_id': media_id,
+ 'url': media_url,
+ 'width': media.get('width'),
+ 'height': media.get('height'),
+ 'filesize': media.get('filesize'),
+ 'acodec': 'none' if 'video-card' in media_url else None,
+ })
+
+ if formats:
+ return {
+ **info_dict,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'view_count': int_or_none(video_data.get('view_count')),
+ }
+
+ gif_entries = []
+ for media in post_data.get('media', []):
+ if determine_ext(media['img_url']) != 'gif' or 'gif' not in media.get('filetype', ''):
+ continue
+ gif_entries.append({
+ 'id': media['hash'],
+ 'title': media['filename'].split('.')[0],
+ 'formats': [{
+ 'format_id': url_key,
+ 'url': media[url_key],
+ 'width': media.get('width') if url_key == 'img_url' else None,
+ 'height': media.get('height') if url_key == 'img_url' else None,
+ 'filesize': media.get('filesize') if url_key == 'img_url' else None,
+ 'acodec': 'none',
+ } for url_key in ('img_url', 'mediaserver_url', 'mediaserver_url_mp4', 'mediaserver_url_webm') if media.get(url_key)]
+ })
+ if gif_entries:
+ return {
+ '_type': 'playlist',
+ **info_dict,
+ 'entries': gif_entries,
+ }
+
+ embed_url = traverse_obj(post_data, ('embeds', ..., 'url'), expected_type=str_or_none, get_all=False)
+ if embed_url:
+ return self.url_result(embed_url)
+ return info_dict
+
+
+class GameJoltIE(GameJoltBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/p/(?:[\w-]*-)?(?P<id>\w{8})'
+ _TESTS = [{
+ # No audio
+ 'url': 'https://gamejolt.com/p/introducing-ramses-jackson-some-fnf-himbo-i-ve-been-animating-fo-c6achnzu',
+ 'md5': 'cd5f733258f6678b0ce500dd88166d86',
+ 'info_dict': {
+ 'id': 'c6achnzu',
+ 'ext': 'mp4',
+ 'display_id': 'introducing-ramses-jackson-some-fnf-himbo-i-ve-been-animating-fo-c6achnzu',
+ 'title': 'Introducing Ramses Jackson, some FNF himbo I’ve been animating for the past few days, hehe.\n#fnfmod #fridaynightfunkin',
+ 'description': 'Introducing Ramses Jackson, some FNF himbo I’ve been animating for the past few days, hehe.\n#fnfmod #fridaynightfunkin',
+ 'uploader': 'Jakeneutron',
+ 'uploader_id': 'Jakeneutron',
+ 'uploader_url': 'https://gamejolt.com/@Jakeneutron',
+ 'categories': ['Friday Night Funkin\' - Videos'],
+ 'tags': ['fnfmod', 'fridaynightfunkin'],
+ 'timestamp': 1633499590,
+ 'upload_date': '20211006',
+ 'release_timestamp': 1633499655,
+ 'release_date': '20211006',
+ 'thumbnail': 're:^https?://.+wgch9mhq.png$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ }
+ }, {
+ # YouTube embed
+ 'url': 'https://gamejolt.com/p/hey-hey-if-there-s-anyone-who-s-looking-to-get-into-learning-a-n6g4jzpq',
+ 'md5': '79a931ff500a5c783ef6c3bda3272e32',
+ 'info_dict': {
+ 'id': 'XsNA_mzC0q4',
+ 'title': 'Adobe Animate CC 2021 Tutorial || Part 1 - The Basics',
+ 'description': 'md5:9d1ab9e2625b3fe1f42b2a44c67fdd13',
+ 'uploader': 'Jakeneutron',
+ 'uploader_id': 'Jakeneutron',
+ 'uploader_url': 'http://www.youtube.com/user/Jakeneutron',
+ 'ext': 'mp4',
+ 'duration': 1749,
+ 'tags': ['Adobe Animate CC', 'Tutorial', 'Animation', 'The Basics', 'For Beginners'],
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'categories': ['Education'],
+ 'availability': 'public',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/XsNA_mzC0q4/maxresdefault.webp',
+ 'age_limit': 0,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UC6_L7fnczNalFZyBthUE9oA',
+ 'channel': 'Jakeneutron',
+ 'channel_id': 'UC6_L7fnczNalFZyBthUE9oA',
+ 'upload_date': '20211015',
+ 'view_count': int,
+ 'chapters': 'count:18',
+ }
+ }, {
+ # Article
+ 'url': 'https://gamejolt.com/p/i-fuckin-broke-chaos-d56h3eue',
+ 'md5': '786c1ccf98fde02c03a2768acb4258d0',
+ 'info_dict': {
+ 'id': 'd56h3eue',
+ 'ext': 'mp4',
+ 'display_id': 'i-fuckin-broke-chaos-d56h3eue',
+ 'title': 'I fuckin broke Chaos.',
+ 'description': 'I moved my tab durning the cutscene so now it\'s stuck like this.',
+ 'uploader': 'Jeff____________',
+ 'uploader_id': 'The_Nyesh_Man',
+ 'uploader_url': 'https://gamejolt.com/@The_Nyesh_Man',
+ 'categories': ['Friday Night Funkin\' - Videos'],
+ 'timestamp': 1639800264,
+ 'upload_date': '20211218',
+ 'release_timestamp': 1639800330,
+ 'release_date': '20211218',
+ 'thumbnail': 're:^https?://.+euksy8bd.png$',
+ 'like_count': int,
+ 'comment_count': int,
+ 'view_count': int,
+ }
+ }, {
+ # Single GIF
+ 'url': 'https://gamejolt.com/p/hello-everyone-i-m-developing-a-pixel-art-style-mod-for-fnf-and-i-vs4gdrd8',
+ 'info_dict': {
+ 'id': 'vs4gdrd8',
+ 'display_id': 'hello-everyone-i-m-developing-a-pixel-art-style-mod-for-fnf-and-i-vs4gdrd8',
+ 'title': 'md5:cc3d8b031d9bc7ec2ec5a9ffc707e1f9',
+ 'description': 'md5:cc3d8b031d9bc7ec2ec5a9ffc707e1f9',
+ 'uploader': 'Quesoguy',
+ 'uploader_id': 'CheeseguyDev',
+ 'uploader_url': 'https://gamejolt.com/@CheeseguyDev',
+ 'categories': ['Game Dev - General', 'Arts n\' Crafts - Creations', 'Pixel Art - showcase',
+ 'Friday Night Funkin\' - Mods', 'Newgrounds - Friday Night Funkin (13+)'],
+ 'timestamp': 1639517122,
+ 'release_timestamp': 1639519966,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'dszyjnwi',
+ 'ext': 'webm',
+ 'title': 'gif-presentacion-mejorado-dszyjnwi',
+ 'n_entries': 1,
+ }
+ }]
+ }, {
+ # Multiple GIFs
+ 'url': 'https://gamejolt.com/p/gif-yhsqkumq',
+ 'playlist_count': 35,
+ 'info_dict': {
+ 'id': 'yhsqkumq',
+ 'display_id': 'gif-yhsqkumq',
+ 'title': 'GIF',
+ 'description': 'GIF',
+ 'uploader': 'DaniilTvman',
+ 'uploader_id': 'DaniilTvman',
+ 'uploader_url': 'https://gamejolt.com/@DaniilTvman',
+ 'categories': ['Five Nights At The AGK Studio Comunity - NEWS game'],
+ 'timestamp': 1638721559,
+ 'release_timestamp': 1638722276,
+ 'like_count': int,
+ 'comment_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+ post_data = self._call_api(
+ f'web/posts/view/{post_id}', post_id)['post']
+ return self._parse_post(post_data)
+
+
+class GameJoltPostListBaseIE(GameJoltBaseIE):
+ def _entries(self, endpoint, list_id, note='Downloading post list', errnote='Unable to download post list', initial_items=[]):
+ page_num, scroll_id = 1, None
+ items = initial_items or self._call_api(endpoint, list_id, note=note, errnote=errnote)['items']
+ while items:
+ for item in items:
+ yield self._parse_post(item['action_resource_model'])
+ scroll_id = items[-1]['scroll_id']
+ page_num += 1
+ items = self._call_api(
+ endpoint, list_id, note=f'{note} page {page_num}', errnote=errnote, data=json.dumps({
+ 'scrollDirection': 'from',
+ 'scrollId': scroll_id,
+ }).encode('utf-8')).get('items')
+
+
+class GameJoltUserIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/@(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/@BlazikenSuperStar',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '6116784',
+ 'title': 'S. Blaze',
+ 'description': 'md5:5ba7fbbb549e8ea2545aafbfe22eb03a',
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }]
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ user_data = self._call_api(
+ f'web/profile/@{user_id}', user_id, note='Downloading user info', errnote='Unable to download user info')['user']
+ bio = self._parse_content_as_text(
+ self._parse_json(user_data.get('bio_content', '{}'), user_id, fatal=False) or {})
+ return self.playlist_result(
+ self._entries(f'web/posts/fetch/user/@{user_id}?tab=active', user_id, 'Downloading user posts', 'Unable to download user posts'),
+ str_or_none(user_data.get('id')), user_data.get('display_name') or user_data.get('name'), bio)
+
+
+class GameJoltGameIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/games/[\w-]+/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/games/Friday4Fun/655124',
+ 'playlist_mincount': 2,
+ 'info_dict': {
+ 'id': '655124',
+ 'title': 'Friday Night Funkin\': Friday 4 Fun',
+ 'description': 'md5:576a7dd87912a2dcf33c50d2bd3966d3'
+ },
+ 'params': {
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }]
+
+ def _real_extract(self, url):
+ game_id = self._match_id(url)
+ game_data = self._call_api(
+ f'web/discover/games/{game_id}', game_id, note='Downloading game info', errnote='Unable to download game info')['game']
+ description = self._parse_content_as_text(
+ self._parse_json(game_data.get('description_content', '{}'), game_id, fatal=False) or {})
+ return self.playlist_result(
+ self._entries(f'web/posts/fetch/game/{game_id}', game_id, 'Downloading game posts', 'Unable to download game posts'),
+ game_id, game_data.get('title'), description)
+
+
+class GameJoltGameSoundtrackIE(GameJoltBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/get/soundtrack(?:\?|\#!?)(?:.*?[&;])??game=(?P<id>(?:\d+)+)'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/get/soundtrack?foo=bar&game=657899',
+ 'info_dict': {
+ 'id': '657899',
+ 'title': 'Friday Night Funkin\': Vs Oswald',
+ 'n_entries': None,
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '184434',
+ 'ext': 'mp3',
+ 'title': 'Gettin\' Lucky (Menu Music)',
+ 'url': r're:^https://.+vs-oswald-menu-music\.mp3$',
+ 'release_timestamp': 1635190816,
+ 'release_date': '20211025',
+ 'n_entries': 3,
+ }
+ }, {
+ 'info_dict': {
+ 'id': '184435',
+ 'ext': 'mp3',
+ 'title': 'Rabbit\'s Luck (Extended Version)',
+ 'url': r're:^https://.+rabbit-s-luck--full-version-\.mp3$',
+ 'release_timestamp': 1635190841,
+ 'release_date': '20211025',
+ 'n_entries': 3,
+ }
+ }, {
+ 'info_dict': {
+ 'id': '185228',
+ 'ext': 'mp3',
+ 'title': 'Last Straw',
+ 'url': r're:^https://.+last-straw\.mp3$',
+ 'release_timestamp': 1635881104,
+ 'release_date': '20211102',
+ 'n_entries': 3,
+ }
+ }]
+ }]
+
+ def _real_extract(self, url):
+ game_id = self._match_id(url)
+ game_overview = self._call_api(
+ f'web/discover/games/overview/{game_id}', game_id, note='Downloading soundtrack info', errnote='Unable to download soundtrack info')
+ return self.playlist_result([{
+ 'id': str_or_none(song.get('id')),
+ 'title': str_or_none(song.get('title')),
+ 'url': str_or_none(song.get('url')),
+ 'release_timestamp': int_or_none(song.get('posted_on'), scale=1000),
+ } for song in game_overview.get('songs') or []], game_id, traverse_obj(
+ game_overview, ('microdata', 'name'), (('twitter', 'fb'), 'title'), expected_type=str_or_none, get_all=False))
+
+
+class GameJoltCommunityIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/c/(?P<id>(?P<community>[\w-]+)(?:/(?P<channel>[\w-]+))?)(?:(?:\?|\#!?)(?:.*?[&;])??sort=(?P<sort>\w+))?'
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/c/fnf/videos',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'fnf/videos',
+ 'title': 'Friday Night Funkin\' - Videos',
+ 'description': 'md5:6d8c06f27460f7d35c1554757ffe53c8'
+ },
+ 'params': {
+ 'playlistend': 50,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }, {
+ 'url': 'https://gamejolt.com/c/youtubers',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': 'youtubers/featured',
+ 'title': 'Youtubers - featured',
+ 'description': 'md5:53e5582c93dcc467ab597bfca4db17d4'
+ },
+ 'params': {
+ 'playlistend': 50,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }]
+
+ def _real_extract(self, url):
+ display_id, community_id, channel_id, sort_by = self._match_valid_url(url).group('id', 'community', 'channel', 'sort')
+ channel_id, sort_by = channel_id or 'featured', sort_by or 'new'
+
+ community_data = self._call_api(
+ f'web/communities/view/{community_id}', display_id,
+ note='Downloading community info', errnote='Unable to download community info')['community']
+ channel_data = traverse_obj(self._call_api(
+ f'web/communities/view-channel/{community_id}/{channel_id}', display_id,
+ note='Downloading channel info', errnote='Unable to download channel info', fatal=False), 'channel') or {}
+
+ title = f'{community_data.get("name") or community_id} - {channel_data.get("display_title") or channel_id}'
+ description = self._parse_content_as_text(
+ self._parse_json(community_data.get('description_content') or '{}', display_id, fatal=False) or {})
+ return self.playlist_result(
+ self._entries(
+ f'web/posts/fetch/community/{community_id}?channels[]={sort_by}&channels[]={channel_id}',
+ display_id, 'Downloading community posts', 'Unable to download community posts'),
+ f'{community_id}/{channel_id}', title, description)
+
+
+class GameJoltSearchIE(GameJoltPostListBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?gamejolt\.com/search(?:/(?P<filter>communities|users|games))?(?:\?|\#!?)(?:.*?[&;])??q=(?P<id>(?:[^&#]+)+)'
+ _URL_FORMATS = {
+ 'users': 'https://gamejolt.com/@{username}',
+ 'communities': 'https://gamejolt.com/c/{path}',
+ 'games': 'https://gamejolt.com/games/{slug}/{id}',
+ }
+ _TESTS = [{
+ 'url': 'https://gamejolt.com/search?foo=bar&q=%23fnf',
+ 'playlist_mincount': 50,
+ 'info_dict': {
+ 'id': '#fnf',
+ 'title': '#fnf',
+ },
+ 'params': {
+ 'playlistend': 50,
+ 'ignore_no_formats_error': True,
+ },
+ 'expected_warnings': ['skipping format', 'No video formats found', 'Requested format is not available'],
+ }, {
+ 'url': 'https://gamejolt.com/search/communities?q=cookie%20run',
+ 'playlist_mincount': 10,
+ 'info_dict': {
+ 'id': 'cookie run',
+ 'title': 'cookie run',
+ },
+ }, {
+ 'url': 'https://gamejolt.com/search/users?q=mlp',
+ 'playlist_mincount': 278,
+ 'info_dict': {
+ 'id': 'mlp',
+ 'title': 'mlp',
+ },
+ }, {
+ 'url': 'https://gamejolt.com/search/games?q=roblox',
+ 'playlist_mincount': 688,
+ 'info_dict': {
+ 'id': 'roblox',
+ 'title': 'roblox',
+ },
+ }]
+
+ def _search_entries(self, query, filter_mode, display_query):
+ initial_search_data = self._call_api(
+ f'web/search/{filter_mode}?q={query}', display_query,
+ note=f'Downloading {filter_mode} list', errnote=f'Unable to download {filter_mode} list')
+ entries_num = traverse_obj(initial_search_data, 'count', f'{filter_mode}Count')
+ if not entries_num:
+ return
+ for page in range(1, math.ceil(entries_num / initial_search_data['perPage']) + 1):
+ search_results = self._call_api(
+ f'web/search/{filter_mode}?q={query}&page={page}', display_query,
+ note=f'Downloading {filter_mode} list page {page}', errnote=f'Unable to download {filter_mode} list')
+ for result in search_results[filter_mode]:
+ yield self.url_result(self._URL_FORMATS[filter_mode].format(**result))
+
+ def _real_extract(self, url):
+ filter_mode, query = self._match_valid_url(url).group('filter', 'id')
+ display_query = compat_urllib_parse_unquote(query)
+ return self.playlist_result(
+ self._search_entries(query, filter_mode, display_query) if filter_mode else self._entries(
+ f'web/posts/fetch/search/{query}', display_query, initial_items=self._call_api(
+ f'web/search?q={query}', display_query,
+ note='Downloading initial post list', errnote='Unable to download initial post list')['posts']),
+ display_query, display_query)
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 1ec0ce986..5dafef283 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2345,6 +2345,18 @@ class GenericIE(InfoExtractor):
}
},
{
+ # KVS Player (for sites that serve kt_player.js via non-https urls)
+ 'url': 'http://www.camhub.world/embed/389508',
+ 'md5': 'fbe89af4cfb59c8fd9f34a202bb03e32',
+ 'info_dict': {
+ 'id': '389508',
+ 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source',
+ 'ext': 'mp4',
+ 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер',
+ 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg',
+ }
+ },
+ {
# Reddit-hosted video that will redirect and be processed by RedditIE
# Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
'url': 'https://v.redd.it/zv89llsvexdz',
@@ -3689,7 +3701,7 @@ class GenericIE(InfoExtractor):
self.report_detected('JW Player embed')
if not found:
# Look for generic KVS player
- found = re.search(r'<script [^>]*?src="https://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
+ found = re.search(r'<script [^>]*?src="https?://.+?/kt_player\.js\?v=(?P<ver>(?P<maj_ver>\d+)(\.\d+)+)".*?>', webpage)
if found:
self.report_detected('KWS Player')
if found.group('maj_ver') not in ['4', '5']:
diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py
index 18a30fe67..56a6dc03d 100644
--- a/yt_dlp/extractor/gfycat.py
+++ b/yt_dlp/extractor/gfycat.py
@@ -24,9 +24,10 @@ class GfycatIE(InfoExtractor):
'duration': 10.4,
'view_count': int,
'like_count': int,
- 'dislike_count': int,
'categories': list,
'age_limit': 0,
+ 'uploader_id': 'anonymous',
+ 'description': '',
}
}, {
'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
@@ -40,9 +41,27 @@ class GfycatIE(InfoExtractor):
'duration': 3.52,
'view_count': int,
'like_count': int,
- 'dislike_count': int,
'categories': list,
'age_limit': 0,
+ 'uploader_id': 'anonymous',
+ 'description': '',
+ }
+ }, {
+ 'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
+ 'info_dict': {
+ 'id': 'alienatedsolidgreathornedowl',
+ 'ext': 'mp4',
+ 'upload_date': '20211226',
+ 'uploader_id': 'reactions',
+ 'timestamp': 1640536930,
+ 'like_count': int,
+ 'description': '',
+ 'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
+ 'categories': list,
+ 'age_limit': 0,
+ 'duration': 2.9583333333333335,
+ 'uploader': 'Reaction GIFs',
+ 'view_count': int,
}
}, {
'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
@@ -74,7 +93,7 @@ class GfycatIE(InfoExtractor):
title = gfy.get('title') or gfy['gfyName']
description = gfy.get('description')
timestamp = int_or_none(gfy.get('createDate'))
- uploader = gfy.get('userName')
+ uploader = gfy.get('userName') or gfy.get('username')
view_count = int_or_none(gfy.get('views'))
like_count = int_or_none(gfy.get('likes'))
dislike_count = int_or_none(gfy.get('dislikes'))
@@ -114,7 +133,8 @@ class GfycatIE(InfoExtractor):
'title': title,
'description': description,
'timestamp': timestamp,
- 'uploader': uploader,
+ 'uploader': gfy.get('userDisplayName') or uploader,
+ 'uploader_id': uploader,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 84c1daca6..ab14e5b0a 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -17,6 +17,7 @@ from ..utils import (
int_or_none,
lowercase_escape,
std_headers,
+ str_to_int,
traverse_obj,
url_or_none,
urlencode_postdata,
@@ -293,7 +294,10 @@ class InstagramIE(InstagramBaseIE):
video_id, url = self._match_valid_url(url).group('id', 'url')
webpage, urlh = self._download_webpage_handle(url, video_id)
if 'www.instagram.com/accounts/login' in urlh.geturl():
- self.raise_login_required('You need to log in to access this content')
+ self.report_warning('Main webpage is locked behind the login page. '
+ 'Retrying with embed webpage (Note that some metadata might be missing)')
+ webpage = self._download_webpage(
+ 'https://www.instagram.com/p/%s/embed/' % video_id, video_id, note='Downloading embed webpage')
shared_data = self._parse_json(
self._search_regex(
@@ -314,7 +318,10 @@ class InstagramIE(InstagramBaseIE):
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
webpage, 'additional data', default='{}'),
video_id, fatal=False)
- media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), expected_type=dict) or {}
+ media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), 'shortcode_media', expected_type=dict) or {}
+
+ if not media and 'www.instagram.com/accounts/login' in urlh.geturl():
+ self.raise_login_required('You need to log in to access this content')
uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex(
r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False)
@@ -348,13 +355,14 @@ class InstagramIE(InstagramBaseIE):
formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash'))
self._sort_formats(formats)
+ comment_data = traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))
comments = [{
'author': traverse_obj(comment_dict, ('node', 'owner', 'username')),
'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')),
'id': traverse_obj(comment_dict, ('node', 'id')),
'text': traverse_obj(comment_dict, ('node', 'text')),
'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none),
- } for comment_dict in traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))]
+ } for comment_dict in comment_data] if comment_data else None
display_resources = (
media.get('display_resources')
@@ -375,7 +383,8 @@ class InstagramIE(InstagramBaseIE):
'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none),
'uploader_id': uploader_id,
'uploader': traverse_obj(media, ('owner', 'full_name')),
- 'like_count': self._get_count(media, 'likes', 'preview_like'),
+ 'like_count': self._get_count(media, 'likes', 'preview_like') or str_to_int(self._search_regex(
+ r'data-log-event="likeCountClick"[^>]*>[^\d]*([\d,\.]+)', webpage, 'like count', fatal=False)),
'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'),
'comments': comments,
'thumbnails': thumbnails,
@@ -533,3 +542,77 @@ class InstagramTagIE(InstagramPlaylistBaseIE):
'tag_name':
data['entry_data']['TagPage'][0]['graphql']['hashtag']['name']
}
+
+
+class InstagramStoryIE(InstagramBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?instagram\.com/stories/(?P<user>[^/]+)/(?P<id>\d+)'
+ IE_NAME = 'instagram:story'
+
+ _TESTS = [{
+ 'url': 'https://www.instagram.com/stories/highlights/18090946048123978/',
+ 'info_dict': {
+ 'id': '18090946048123978',
+ 'title': 'Rare',
+ },
+ 'playlist_mincount': 50
+ }]
+
+ def _real_extract(self, url):
+ username, story_id = self._match_valid_url(url).groups()
+
+ story_info_url = f'{username}/{story_id}/?__a=1' if username == 'highlights' else f'{username}/?__a=1'
+ story_info = self._download_json(f'https://www.instagram.com/stories/{story_info_url}', story_id, headers={
+ 'X-IG-App-ID': 936619743392459,
+ 'X-ASBD-ID': 198387,
+ 'X-IG-WWW-Claim': 0,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': url,
+ })
+ user_id = story_info['user']['id']
+ highlight_title = traverse_obj(story_info, ('highlight', 'title'))
+
+ story_info_url = user_id if username != 'highlights' else f'highlight:{story_id}'
+ videos = self._download_json(f'https://i.instagram.com/api/v1/feed/reels_media/?reel_ids={story_info_url}', story_id, headers={
+ 'X-IG-App-ID': 936619743392459,
+ 'X-ASBD-ID': 198387,
+ 'X-IG-WWW-Claim': 0,
+ })['reels']
+ entites = []
+
+ videos = traverse_obj(videos, (f'highlight:{story_id}', 'items'), (str(user_id), 'items'))
+ for video_info in videos:
+ formats = []
+ if isinstance(video_info, list):
+ video_info = video_info[0]
+ vcodec = video_info.get('video_codec')
+ dash_manifest_raw = video_info.get('video_dash_manifest')
+ videos_list = video_info.get('video_versions')
+ if not (dash_manifest_raw or videos_list):
+ continue
+ for format in videos_list:
+ formats.append({
+ 'url': format.get('url'),
+ 'width': format.get('width'),
+ 'height': format.get('height'),
+ 'vcodec': vcodec,
+ })
+ if dash_manifest_raw:
+ formats.extend(self._parse_mpd_formats(self._parse_xml(dash_manifest_raw, story_id), mpd_id='dash'))
+ self._sort_formats(formats)
+ thumbnails = [{
+ 'url': thumbnail.get('url'),
+ 'width': thumbnail.get('width'),
+ 'height': thumbnail.get('height')
+ } for thumbnail in traverse_obj(video_info, ('image_versions2', 'candidates')) or []]
+ entites.append({
+ 'id': video_info.get('id'),
+ 'title': f'Story by {username}',
+ 'timestamp': int_or_none(video_info.get('taken_at')),
+ 'uploader': traverse_obj(videos, ('user', 'full_name')),
+ 'duration': float_or_none(video_info.get('video_duration')),
+ 'uploader_id': user_id,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entites, playlist_id=story_id, playlist_title=highlight_title)
diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py
index 0f87bf1d7..1405ce0c7 100644
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@@ -184,28 +184,38 @@ class LBRYIE(LBRYBaseIE):
display_id = compat_urllib_parse_unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
- result_value = result['value']
- if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
+ if result['value'].get('stream_type') in self._SUPPORTED_STREAM_TYPES:
+ claim_id, is_live, headers = result['claim_id'], False, None
+ streaming_url = self._call_api_proxy(
+ 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
+ final_url = self._request_webpage(
+ streaming_url, display_id, note='Downloading streaming redirect url info').geturl()
+ elif result.get('value_type') == 'stream':
+ claim_id, is_live = result['signing_channel']['claim_id'], True
+ headers = {'referer': 'https://player.odysee.live/'}
+ live_data = self._download_json(
+ f'https://api.live.odysee.com/v1/odysee/live/{claim_id}', claim_id,
+ note='Downloading livestream JSON metadata')['data']
+ if not live_data['live']:
+ raise ExtractorError('This stream is not live', expected=True)
+ streaming_url = final_url = live_data['url']
+ else:
raise ExtractorError('Unsupported URL', expected=True)
- claim_id = result['claim_id']
- title = result_value['title']
- streaming_url = self._call_api_proxy(
- 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
+
info = self._parse_stream(result, url)
- urlh = self._request_webpage(
- streaming_url, display_id, note='Downloading streaming redirect url info')
- if determine_ext(urlh.geturl()) == 'm3u8':
+ if determine_ext(final_url) == 'm3u8':
info['formats'] = self._extract_m3u8_formats(
- urlh.geturl(), display_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
+ final_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls', live=is_live, headers=headers)
self._sort_formats(info['formats'])
else:
info['url'] = streaming_url
- info.update({
+ return {
+ **info,
'id': claim_id,
- 'title': title,
- })
- return info
+ 'title': result['value']['title'],
+ 'is_live': is_live,
+ 'http_headers': headers,
+ }
class LBRYChannelIE(LBRYBaseIE):
diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py
index 3639d142f..89380d039 100644
--- a/yt_dlp/extractor/njpwworld.py
+++ b/yt_dlp/extractor/njpwworld.py
@@ -77,13 +77,8 @@ class NJPWWorldIE(InfoExtractor):
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
player_path = '/intent?id=%s&type=url' % vid
player_url = compat_urlparse.urljoin(url, player_path)
- formats.append({
- 'url': player_url,
- 'format_id': kind,
- 'ext': 'mp4',
- 'protocol': 'm3u8',
- 'quality': 2 if kind == 'high' else 1,
- })
+ formats += self._extract_m3u8_formats(
+ player_url, video_id, 'mp4', 'm3u8_native', m3u8_id=kind, fatal=False, quality=int(kind == 'high'))
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py
index 9d1122f0c..49f062d7a 100644
--- a/yt_dlp/extractor/npr.py
+++ b/yt_dlp/extractor/npr.py
@@ -91,7 +91,8 @@ class NprIE(InfoExtractor):
elif format_id == 'smil':
smil_formats = self._extract_smil_formats(
format_url, media_id, transform_source=lambda s: s.replace(
- 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
+ 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'),
+ fatal=False)
self._check_formats(smil_formats, media_id)
formats.extend(smil_formats)
else:
diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py
new file mode 100644
index 000000000..cf8d91717
--- /dev/null
+++ b/yt_dlp/extractor/opencast.py
@@ -0,0 +1,177 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ variadic,
+)
+
+
+class OpencastBaseIE(InfoExtractor):
+ _INSTANCES_RE = r'''(?:
+ opencast\.informatik\.kit\.edu|
+ electures\.uni-muenster\.de|
+ oc-presentation\.ltcc\.tuwien\.ac\.at|
+ medien\.ph-noe\.ac\.at|
+ oc-video\.ruhr-uni-bochum\.de|
+ oc-video1\.ruhr-uni-bochum\.de|
+ opencast\.informatik\.uni-goettingen\.de|
+ heicast\.uni-heidelberg\.de|
+ opencast\.hawk\.de:8080|
+ opencast\.hs-osnabrueck\.de|
+ video[0-9]+\.virtuos\.uni-osnabrueck\.de|
+ opencast\.uni-koeln\.de|
+ media\.opencast\.hochschule-rhein-waal\.de|
+ matterhorn\.dce\.harvard\.edu|
+ hs-harz\.opencast\.uni-halle\.de|
+ videocampus\.urz\.uni-leipzig\.de|
+ media\.uct\.ac\.za|
+ vid\.igb\.illinois\.edu|
+ cursosabertos\.c3sl\.ufpr\.br|
+ mcmedia\.missioncollege\.org|
+ clases\.odon\.edu\.uy
+ )'''
+ _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+
+ def _call_api(self, host, video_id, **kwargs):
+ return self._download_json(self._API_BASE % (host, video_id), video_id, **kwargs)
+
+ def _parse_mediapackage(self, video):
+ video_id = video.get('id')
+ if video_id is None:
+ raise ExtractorError('Video id was not found')
+
+ formats = []
+ for track in variadic(traverse_obj(video, ('media', 'track')) or []):
+ href = track.get('url')
+ if href is None:
+ continue
+ ext = determine_ext(href, None)
+
+ transport = track.get('transport')
+
+ if transport == 'DASH' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats_and_subtitles(href, video_id, mpd_id='dash', fatal=False))
+ elif transport == 'HLS' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats_and_subtitles(
+ href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False))
+ elif transport == 'HDS' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False))
+ elif transport == 'SMOOTH':
+ formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(href, video_id, fatal=False))
+ else:
+ track_obj = {
+ 'url': href,
+ 'ext': ext,
+ 'format_note': track.get('transport'),
+ 'resolution': traverse_obj(track, ('video', 'resolution')),
+ 'fps': int_or_none(traverse_obj(track, ('video', 'framerate'))),
+ 'vbr': int_or_none(traverse_obj(track, ('video', 'bitrate')), scale=1000),
+ 'vcodec': traverse_obj(track, ('video', 'encoder', 'type')) if track.get('video') else 'none',
+ 'abr': int_or_none(traverse_obj(track, ('audio', 'bitrate')), scale=1000),
+ 'asr': int_or_none(traverse_obj(track, ('audio', 'samplingrate'))),
+ 'acodec': traverse_obj(track, ('audio', 'encoder', 'type')) if track.get('audio') else 'none',
+ }
+
+ if transport == 'RTMP':
+ m_obj = re.search(r'(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<ext>.+):(?P<playpath>.+)', href)
+ if not m_obj:
+ continue
+ track_obj.update({
+ 'app': m_obj.group('app'),
+ 'ext': m_obj.group('ext'),
+ 'play_path': m_obj.group('ext') + ':' + m_obj.group('playpath'),
+ 'rtmp_live': True,
+ 'preference': -2,
+ })
+ formats.append(track_obj)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': video.get('title'),
+ 'series': video.get('seriestitle'),
+ 'season_id': video.get('series'),
+ 'creator': traverse_obj(video, ('creators', 'creator')),
+ 'timestamp': parse_iso8601(video.get('start')),
+ 'thumbnail': traverse_obj(video, ('attachments', 'attachment', ..., 'url'), get_all=False),
+ }
+
+
+class OpencastIE(OpencastBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?P<host>%s)/paella/ui/watch.html\?.*?
+ id=(?P<id>%s)
+ ''' % (OpencastBaseIE._INSTANCES_RE, OpencastBaseIE._UUID_RE)
+
+ _API_BASE = 'https://%s/search/episode.json?id=%s'
+
+ _TESTS = [
+ {
+ 'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
+ 'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
+ 'info_dict': {
+ 'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
+ 'ext': 'mp4',
+ 'title': '11 - Kryptographie - 24.11.2015',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1606208400,
+ 'upload_date': '20201124',
+ },
+ }
+ ]
+
+ def _real_extract(self, url):
+ host, video_id = self._match_valid_url(url).group('host', 'id')
+ return self._parse_mediapackage(
+ self._call_api(host, video_id)['search-results']['result']['mediapackage'])
+
+
+class OpencastPlaylistIE(OpencastBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://(?P<host>%s)/engage/ui/index.html\?.*?
+ epFrom=(?P<id>%s)
+ ''' % (OpencastBaseIE._INSTANCES_RE, OpencastBaseIE._UUID_RE)
+
+ _API_BASE = 'https://%s/search/episode.json?sid=%s'
+
+ _TESTS = [
+ {
+ 'url': 'https://oc-video1.ruhr-uni-bochum.de/engage/ui/index.html?epFrom=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+ 'info_dict': {
+ 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
+ 'title': 'Kryptographie - WiSe 15/16',
+ },
+ 'playlist_mincount': 28,
+ },
+ {
+ 'url': 'https://oc-video.ruhr-uni-bochum.de/engage/ui/index.html?e=1&p=1&epFrom=b1a54262-3684-403f-9731-8e77c3766f9a',
+ 'info_dict': {
+ 'id': 'b1a54262-3684-403f-9731-8e77c3766f9a',
+ 'title': 'inSTUDIES-Social movements and prefigurative politics in a global perspective',
+ },
+ 'playlist_mincount': 6,
+ },
+ ]
+
+ def _real_extract(self, url):
+ host, video_id = self._match_valid_url(url).group('host', 'id')
+
+ entries = [
+ self._parse_mediapackage(episode['mediapackage'])
+ for episode in variadic(self._call_api(host, video_id)['search-results']['result'])
+ if episode.get('mediapackage')
+ ]
+
+ return self.playlist_result(entries, video_id, traverse_obj(entries, (0, 'series')))
diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py
new file mode 100644
index 000000000..f0ad0b24a
--- /dev/null
+++ b/yt_dlp/extractor/pixivsketch.py
@@ -0,0 +1,122 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class PixivSketchBaseIE(InfoExtractor):
+ def _call_api(self, video_id, path, referer, note='Downloading JSON metadata'):
+ response = self._download_json(f'https://sketch.pixiv.net/api/{path}', video_id, note=note, headers={
+ 'Referer': referer,
+ 'X-Requested-With': referer,
+ })
+ errors = traverse_obj(response, ('errors', ..., 'message'))
+ if errors:
+ raise ExtractorError(' '.join(f'{e}.' for e in errors))
+ return response.get('data') or {}
+
+
+class PixivSketchIE(PixivSketchBaseIE):
+ IE_NAME = 'pixiv:sketch'
+ _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<uploader_id>[a-zA-Z0-9_-]+)/lives/(?P<id>\d+)/?'
+ _TESTS = [{
+ 'url': 'https://sketch.pixiv.net/@nuhutya/lives/3654620468641830507',
+ 'info_dict': {
+ 'id': '7370666691623196569',
+ 'title': 'まにあえクリスマス!',
+ 'uploader': 'ぬふちゃ',
+ 'uploader_id': 'nuhutya',
+ 'channel_id': '9844815',
+ 'age_limit': 0,
+ 'timestamp': 1640351536,
+ },
+ 'skip': True,
+ }, {
+ # these two (age_limit > 0) requires you to login on website, but it's actually not required for download
+ 'url': 'https://sketch.pixiv.net/@namahyou/lives/4393103321546851377',
+ 'info_dict': {
+ 'id': '4907995960957946943',
+ 'title': 'クリスマスなんて知らん🖕',
+ 'uploader': 'すゃもり',
+ 'uploader_id': 'suya2mori2',
+ 'channel_id': '31169300',
+ 'age_limit': 15,
+ 'timestamp': 1640347640,
+ },
+ 'skip': True,
+ }, {
+ 'url': 'https://sketch.pixiv.net/@8aki/lives/3553803162487249670',
+ 'info_dict': {
+ 'id': '1593420639479156945',
+ 'title': 'おまけ本作業(リョナ有)',
+ 'uploader': 'おぶい / Obui',
+ 'uploader_id': 'oving',
+ 'channel_id': '17606',
+ 'age_limit': 18,
+ 'timestamp': 1640330263,
+ },
+ 'skip': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id')
+ data = self._call_api(video_id, f'lives/{video_id}.json', url)
+
+ if not traverse_obj(data, 'is_broadcasting'):
+ raise ExtractorError(f'This live is offline. Use https://sketch.pixiv.net/@{uploader_id} for ongoing live.', expected=True)
+
+ m3u8_url = traverse_obj(data, ('owner', 'hls_movie', 'url'))
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': data.get('name'),
+ 'formats': formats,
+ 'uploader': traverse_obj(data, ('user', 'name'), ('owner', 'user', 'name')),
+ 'uploader_id': traverse_obj(data, ('user', 'unique_name'), ('owner', 'user', 'unique_name')),
+ 'channel_id': str(traverse_obj(data, ('user', 'pixiv_user_id'), ('owner', 'user', 'pixiv_user_id'))),
+ 'age_limit': 18 if data.get('is_r18') else 15 if data.get('is_r15') else 0,
+ 'timestamp': unified_timestamp(data.get('created_at')),
+ 'is_live': True
+ }
+
+
+class PixivSketchUserIE(PixivSketchBaseIE):
+ IE_NAME = 'pixiv:sketch:user'
+ _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<id>[a-zA-Z0-9_-]+)/?'
+ _TESTS = [{
+ 'url': 'https://sketch.pixiv.net/@nuhutya',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sketch.pixiv.net/@namahyou',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sketch.pixiv.net/@8aki',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return super(PixivSketchUserIE, cls).suitable(url) and not PixivSketchIE.suitable(url)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ data = self._call_api(user_id, f'lives/users/@{user_id}.json', url)
+
+ if not traverse_obj(data, 'is_broadcasting'):
+ try:
+ self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure')
+ except ExtractorError as ex:
+ if ex.cause and ex.cause.code == 401:
+ self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies')
+ raise ExtractorError('This user is offline', expected=True)
+
+ return self.url_result(f'https://sketch.pixiv.net/@{user_id}/lives/{data["id"]}')
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index 6d894affd..4357c79df 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -258,8 +258,7 @@ class PornHubIE(PornHubBaseIE):
webpage)
def _extract_count(self, pattern, webpage, name):
- return str_to_int(self._search_regex(
- pattern, webpage, '%s count' % name, fatal=False))
+ return str_to_int(self._search_regex(pattern, webpage, '%s count' % name, default=None))
def _real_extract(self, url):
mobj = self._match_valid_url(url)
diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py
index 19b2f451c..ac42e58d9 100644
--- a/yt_dlp/extractor/rcti.py
+++ b/yt_dlp/extractor/rcti.py
@@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
-import itertools
import json
import random
import time
@@ -12,6 +11,7 @@ from ..utils import (
dict_get,
ExtractorError,
strip_or_none,
+ traverse_obj,
try_get
)
@@ -26,7 +26,7 @@ class RCTIPlusBaseIE(InfoExtractor):
json = self._download_json(
url, video_id, note=note, headers={'Authorization': self._AUTH_KEY})
if json.get('status', {}).get('code', 0) != 0:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, json["status"]["message_client"]), cause=json)
+ raise ExtractorError(f'{self.IE_NAME} said: {json["status"]["message_client"]}', cause=json)
return json.get('data'), json.get('meta')
@@ -223,18 +223,30 @@ class RCTIPlusIE(RCTIPlusBaseIE):
class RCTIPlusSeriesIE(RCTIPlusBaseIE):
- _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
+ _VALID_URL = r'https://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?'
_TESTS = [{
- 'url': 'https://www.rctiplus.com/programs/540/upin-ipin',
- 'playlist_mincount': 417,
+ 'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran',
+ 'playlist_mincount': 1019,
'info_dict': {
- 'id': '540',
- 'title': 'Upin & Ipin',
- 'description': 'md5:22cc912381f389664416844e1ec4f86b',
+ 'id': '829',
+ 'title': 'Putri Untuk Pangeran',
+ 'description': 'md5:aca7b54d05bd95a67d4f4613cc1d622d',
+ 'age_limit': 2,
+ 'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'],
+ 'display_id': 'putri-untuk-pangeran',
+ 'tag': 'count:18',
},
- }, {
- 'url': 'https://www.rctiplus.com/programs/540/upin-ipin/episodes?utm_source=Rplusdweb&utm_medium=share_copy&utm_campaign=programsupin-ipin',
- 'only_matching': True,
+ }, { # No episodes
+ 'url': 'https://www.rctiplus.com/programs/615/inews-pagi',
+ 'playlist_mincount': 388,
+ 'info_dict': {
+ 'id': '615',
+ 'title': 'iNews Pagi',
+ 'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04',
+ 'age_limit': 2,
+ 'tag': 'count:11',
+ 'display_id': 'inews-pagi',
+ }
}]
_AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings
'S-SU': 2,
@@ -269,47 +281,63 @@ class RCTIPlusSeriesIE(RCTIPlusBaseIE):
display_id, '%s page %s' % (note, page_num))[0] or []
for video_json in episode_list:
- link = video_json['share_link']
- url_res = self.url_result(link, 'RCTIPlus', video_json.get('product_id'), video_json.get('title'))
- url_res.update(metadata)
- yield url_res
+ yield {
+ '_type': 'url',
+ 'url': video_json['share_link'],
+ 'ie_key': RCTIPlusIE.ie_key(),
+ 'id': video_json.get('product_id'),
+ 'title': video_json.get('title'),
+ 'display_id': video_json.get('title_code').replace('_', '-'),
+ 'description': video_json.get('summary'),
+ 'timestamp': video_json.get('release_date'),
+ 'duration': video_json.get('duration'),
+ 'season_number': video_json.get('season'),
+ 'episode_number': video_json.get('episode'),
+ **metadata
+ }
+
+ def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}):
+ if not video_type or video_type in 'episodes':
+ try:
+ seasons_list = self._call_api(
+ f'https://api.rctiplus.com/api/v1/program/{series_id}/season',
+ display_id, 'Downloading seasons list JSON')[0]
+ except ExtractorError as e:
+ if 'not found' not in str(e):
+ raise
+ seasons_list = []
+ for season in seasons_list:
+ yield from self._entries(
+ f'https://api.rctiplus.com/api/v2/program/{series_id}/episode?season={season["season"]}',
+ display_id, f'Downloading season {season["season"]} episode entries', metadata)
+ if not video_type or video_type in 'extras':
+ yield from self._entries(
+ f'https://api.rctiplus.com/api/v2/program/{series_id}/extra?content_id=0',
+ display_id, 'Downloading extra entries', metadata)
+ if not video_type or video_type in 'clips':
+ yield from self._entries(
+ f'https://api.rctiplus.com/api/v2/program/{series_id}/clip?content_id=0',
+ display_id, 'Downloading clip entries', metadata)
def _real_extract(self, url):
- series_id, display_id = self._match_valid_url(url).groups()
+ series_id, display_id, video_type = self._match_valid_url(url).group('id', 'display_id', 'type')
+ if video_type:
+ self.report_warning(
+ f'Only {video_type} will be downloaded. '
+ f'To download everything from the series, remove "/{video_type}" from the URL')
series_meta, meta_paths = self._call_api(
- 'https://api.rctiplus.com/api/v1/program/%s/detail' % series_id, display_id, 'Downloading series metadata')
+ f'https://api.rctiplus.com/api/v1/program/{series_id}/detail', display_id, 'Downloading series metadata')
metadata = {
- 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']])
+ 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]),
+ 'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'),
+ expected_type=lambda x: strip_or_none(x) or None),
+ 'tag': traverse_obj(series_meta, ('tag', ..., 'name'),
+ expected_type=lambda x: strip_or_none(x) or None),
}
-
- cast = []
- for star in series_meta.get('starring', []):
- cast.append(strip_or_none(star.get('name')))
- for star in series_meta.get('creator', []):
- cast.append(strip_or_none(star.get('name')))
- for star in series_meta.get('writer', []):
- cast.append(strip_or_none(star.get('name')))
- metadata['cast'] = cast
-
- tags = []
- for tag in series_meta.get('tag', []):
- tags.append(strip_or_none(tag.get('name')))
- metadata['tag'] = tags
-
- entries = []
- seasons_list = self._call_api(
- 'https://api.rctiplus.com/api/v1/program/%s/season' % series_id, display_id, 'Downloading seasons list JSON')[0]
- for season in seasons_list:
- entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/episode?season=%s' % (series_id, season['season']),
- display_id, 'Downloading season %s episode entries' % season['season'], metadata))
-
- entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/clip?content_id=0' % series_id,
- display_id, 'Downloading clip entries', metadata))
- entries.append(self._entries('https://api.rctiplus.com/api/v2/program/%s/extra?content_id=0' % series_id,
- display_id, 'Downloading extra entries', metadata))
-
- return self.playlist_result(itertools.chain(*entries), series_id, series_meta.get('title'), series_meta.get('summary'), **metadata)
+ return self.playlist_result(
+ self._series_entries(series_id, display_id, video_type, metadata), series_id,
+ series_meta.get('title'), series_meta.get('summary'), display_id=display_id, **metadata)
class RCTIPlusTVIE(RCTIPlusBaseIE):
@@ -345,5 +373,6 @@ class RCTIPlusTVIE(RCTIPlusBaseIE):
tv_id = match.get('tvname') or match.get('eventname')
webpage = self._download_webpage(url, tv_id)
video_type, video_id = self._search_regex(
- r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', webpage, 'video link', group=('type', 'id'))
+ r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url',
+ webpage, 'video link', group=('type', 'id'))
return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus')
diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py
index 18672b2e3..652fdd116 100644
--- a/yt_dlp/extractor/roosterteeth.py
+++ b/yt_dlp/extractor/roosterteeth.py
@@ -99,7 +99,7 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
},
- 'skip_download': 'm3u8',
+ 'params': {'skip_download': True},
}, {
'url': 'https://roosterteeth.com/watch/rwby-bonus-25',
'info_dict': {
@@ -112,7 +112,7 @@ class RoosterTeethIE(RoosterTeethBaseIE):
'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
'ext': 'mp4',
},
- 'skip_download': 'm3u8',
+ 'params': {'skip_download': True},
}, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
'only_matching': True,
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index f251e5599..8146b3ef5 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -130,7 +130,7 @@ class SoundcloudBaseIE(InfoExtractor):
elif username is not None:
self.report_warning(
'Login using username and password is not currently supported. '
- 'Use "--user oauth --password <oauth_token>" to login using an oauth token')
+ 'Use "--username oauth --password <oauth_token>" to login using an oauth token')
r'''
def genDevId():
diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
index 7f777c40b..4ed0fb592 100644
--- a/yt_dlp/extractor/steam.py
+++ b/yt_dlp/extractor/steam.py
@@ -7,14 +7,13 @@ from ..utils import (
extract_attributes,
ExtractorError,
get_element_by_class,
- js_to_json,
)
class SteamIE(InfoExtractor):
_VALID_URL = r"""(?x)
- https?://store\.steampowered\.com/
- (agecheck/)?
+ https?://(?:store\.steampowered|steamcommunity)\.com/
+ (?:agecheck/)?
(?P<urltype>video|app)/ #If the page is only for videos or for a game
(?P<gameID>\d+)/?
(?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
@@ -27,21 +26,24 @@ class SteamIE(InfoExtractor):
'url': 'http://store.steampowered.com/video/105600/',
'playlist': [
{
- 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
+ 'md5': '695242613303ffa2a4c44c9374ddc067',
'info_dict': {
- 'id': '2040428',
+ 'id': '256785003',
'ext': 'mp4',
- 'title': 'Terraria 1.3 Trailer',
- 'playlist_index': 1,
+ 'title': 'Terraria video 256785003',
+ 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
+ 'n_entries': 2,
}
},
{
- 'md5': '911672b20064ca3263fa89650ba5a7aa',
+ 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592',
'info_dict': {
- 'id': '2029566',
+ 'id': '2040428',
'ext': 'mp4',
- 'title': 'Terraria 1.2 Trailer',
+ 'title': 'Terraria video 2040428',
'playlist_index': 2,
+ 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
+ 'n_entries': 2,
}
}
],
@@ -53,96 +55,76 @@ class SteamIE(InfoExtractor):
'playlistend': 2,
}
}, {
- 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
+ 'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
'info_dict': {
- 'id': 'X8kpJBlzD2E',
+ 'id': '256757115',
+ 'title': 'Grand Theft Auto V video 256757115',
'ext': 'mp4',
- 'upload_date': '20140617',
- 'title': 'FRONTIERS - Trapping',
- 'description': 'md5:bf6f7f773def614054089e5769c12a6e',
- 'uploader': 'AAD Productions',
- 'uploader_id': 'AtomicAgeDogGames',
- }
+ 'thumbnail': r're:^https://cdn\.[^\.]+\.steamstatic\.com',
+ 'n_entries': 20,
+ },
}]
def _real_extract(self, url):
m = self._match_valid_url(url)
fileID = m.group('fileID')
if fileID:
- videourl = url
+ video_url = url
playlist_id = fileID
else:
gameID = m.group('gameID')
playlist_id = gameID
- videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
+ video_url = self._VIDEO_PAGE_TEMPLATE % playlist_id
- self._set_cookie('steampowered.com', 'mature_content', '1')
+ self._set_cookie('steampowered.com', 'wants_mature_content', '1')
+ self._set_cookie('steampowered.com', 'birthtime', '944006401')
+ self._set_cookie('steampowered.com', 'lastagecheckage', '1-0-2000')
- webpage = self._download_webpage(videourl, playlist_id)
+ webpage = self._download_webpage(video_url, playlist_id)
- if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
- videourl = self._AGECHECK_TEMPLATE % playlist_id
+ if re.search('<div[^>]+>Please enter your birth date to continue:</div>', webpage) is not None:
+ video_url = self._AGECHECK_TEMPLATE % playlist_id
self.report_age_confirmation()
- webpage = self._download_webpage(videourl, playlist_id)
-
- flash_vars = self._parse_json(self._search_regex(
- r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage,
- 'flash vars'), playlist_id, js_to_json)
+ webpage = self._download_webpage(video_url, playlist_id)
- playlist_title = None
+ videos = re.findall(r'(<div[^>]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
entries = []
- if fileID:
- playlist_title = get_element_by_class('workshopItemTitle', webpage)
- for movie in flash_vars.values():
- if not movie:
- continue
- youtube_id = movie.get('YOUTUBE_VIDEO_ID')
- if not youtube_id:
- continue
+ playlist_title = get_element_by_class('apphub_AppName', webpage)
+ for movie, movie_id in videos:
+ if not movie:
+ continue
+ movie = extract_attributes(movie)
+ if not movie_id:
+ continue
+ entry = {
+ 'id': movie_id,
+ 'title': f'{playlist_title} video {movie_id}',
+ }
+ formats = []
+ if movie:
+ entry['thumbnail'] = movie.get('data-poster')
+ for quality in ('', '-hd'):
+ for ext in ('webm', 'mp4'):
+ video_url = movie.get('data-%s%s-source' % (ext, quality))
+ if video_url:
+ formats.append({
+ 'format_id': ext + quality,
+ 'url': video_url,
+ })
+ self._sort_formats(formats)
+ entry['formats'] = formats
+ entries.append(entry)
+ embedded_videos = re.findall(r'(<iframe[^>]+>)', webpage)
+ for evideos in embedded_videos:
+ evideos = extract_attributes(evideos).get('src')
+ video_id = self._search_regex(r'youtube\.com/embed/([0-9A-Za-z_-]{11})', evideos, 'youtube_video_id', default=None)
+ if video_id:
entries.append({
- '_type': 'url',
- 'url': youtube_id,
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': video_id,
'ie_key': 'Youtube',
})
- else:
- playlist_title = get_element_by_class('apphub_AppName', webpage)
- for movie_id, movie in flash_vars.items():
- if not movie:
- continue
- video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False)
- title = movie.get('MOVIE_NAME')
- if not title or not video_id:
- continue
- entry = {
- 'id': video_id,
- 'title': title.replace('+', ' '),
- }
- formats = []
- flv_url = movie.get('FILENAME')
- if flv_url:
- formats.append({
- 'format_id': 'flv',
- 'url': flv_url,
- })
- highlight_element = self._search_regex(
- r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id,
- webpage, 'highlight element', fatal=False)
- if highlight_element:
- highlight_attribs = extract_attributes(highlight_element)
- if highlight_attribs:
- entry['thumbnail'] = highlight_attribs.get('data-poster')
- for quality in ('', '-hd'):
- for ext in ('webm', 'mp4'):
- video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality))
- if video_url:
- formats.append({
- 'format_id': ext + quality,
- 'url': video_url,
- })
- if not formats and not self.get_param('ignore_no_formats'):
- continue
- entry['formats'] = formats
- entries.append(entry)
if not entries:
raise ExtractorError('Could not find any videos')
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index 2cd7ba02e..18f1c5630 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -22,8 +22,8 @@ from ..utils import (
class TikTokBaseIE(InfoExtractor):
- _APP_VERSION = '20.9.3'
- _MANIFEST_APP_VERSION = '291'
+ _APP_VERSION = '20.1.0'
+ _MANIFEST_APP_VERSION = '200'
_APP_NAME = 'trill'
_AID = 1180
_API_HOSTNAME = 'api-h2.tiktokv.com'
@@ -342,16 +342,66 @@ class TikTokIE(TikTokBaseIE):
'comment_count': int,
}
}, {
- # Promoted content/ad
- 'url': 'https://www.tiktok.com/@MS4wLjABAAAAAR29F6J2Ktu0Daw03BJyXPNoRQ-W7U5a0Mn3lVCq2rQhjOd_WNLclHUoFgwX8Eno/video/6932675057474981122',
- 'only_matching': True,
+ # Banned audio, only available on the app
+ 'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
+ 'info_dict': {
+ 'id': '6984138651336838402',
+ 'ext': 'mp4',
+ 'title': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
+ 'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥',
+ 'uploader': 'barudakhb_',
+ 'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
+ 'uploader_id': '6974687867511718913',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
+ 'track': 'Boka Dance',
+ 'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
+ 'timestamp': 1626121503,
+ 'duration': 18,
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20210712',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ }
+ }, {
+ # Sponsored video, only available with feed workaround
+ 'url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_/video/7042692929109986561',
+ 'info_dict': {
+ 'id': '7042692929109986561',
+ 'ext': 'mp4',
+ 'title': 'Slap and Run!',
+ 'description': 'Slap and Run!',
+ 'uploader': 'user440922249',
+ 'creator': 'Slap And Run',
+ 'uploader_id': '7036055384943690754',
+ 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
+ 'track': 'Promoted Music',
+ 'timestamp': 1639754738,
+ 'duration': 30,
+ 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
+ 'upload_date': '20211217',
+ 'view_count': int,
+ 'like_count': int,
+ 'repost_count': int,
+ 'comment_count': int,
+ },
+ 'expected_warnings': ['Video not available']
}]
def _extract_aweme_app(self, aweme_id):
- aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
- note='Downloading video details', errnote='Unable to download video details').get('aweme_detail')
- if not aweme_detail:
- raise ExtractorError('Video not available', video_id=aweme_id)
+ try:
+ aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id,
+ note='Downloading video details', errnote='Unable to download video details').get('aweme_detail')
+ if not aweme_detail:
+ raise ExtractorError('Video not available', video_id=aweme_id)
+ except ExtractorError as e:
+ self.report_warning(f'{e}; Retrying with feed workaround')
+ feed_list = self._call_api('feed', {'aweme_id': aweme_id}, aweme_id,
+ note='Downloading video feed', errnote='Unable to download video feed').get('aweme_list') or []
+ aweme_detail = next(aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id)
+ if not aweme_detail:
+ raise ExtractorError('Unable to find video in feed', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail)
def _real_extract(self, url):
@@ -447,7 +497,7 @@ class TikTokUserIE(TikTokBaseIE):
for video in post_list.get('aweme_list', []):
yield {
**self._parse_aweme_video_app(video),
- 'ie_key': TikTokIE.ie_key(),
+ 'extractor_key': TikTokIE.ie_key(),
'extractor': 'TikTok',
'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}',
}
@@ -464,6 +514,114 @@ class TikTokUserIE(TikTokBaseIE):
return self.playlist_result(self._entries_api(webpage, user_id, user_name), user_id, user_name)
+class TikTokBaseListIE(TikTokBaseIE):
+ def _entries(self, list_id, display_id):
+ query = {
+ self._QUERY_NAME: list_id,
+ 'cursor': 0,
+ 'count': 20,
+ 'type': 5,
+ 'device_id': ''.join(random.choice(string.digits) for i in range(19))
+ }
+
+ max_retries = self.get_param('extractor_retries', 3)
+ for page in itertools.count(1):
+ for retries in itertools.count():
+ try:
+ post_list = self._call_api(self._API_ENDPOINT, query, display_id,
+ note='Downloading video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''),
+ errnote='Unable to download video list')
+ except ExtractorError as e:
+ if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries:
+ self.report_warning('%s. Retrying...' % str(e.cause or e.msg))
+ continue
+ raise
+ break
+ for video in post_list.get('aweme_list', []):
+ yield {
+ **self._parse_aweme_video_app(video),
+ 'extractor_key': TikTokIE.ie_key(),
+ 'extractor': 'TikTok',
+ 'webpage_url': f'https://tiktok.com/@_/video/{video["aweme_id"]}',
+ }
+ if not post_list.get('has_more'):
+ break
+ query['cursor'] = post_list['cursor']
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ return self.playlist_result(self._entries(list_id, list_id), list_id)
+
+
+class TikTokSoundIE(TikTokBaseListIE):
+ IE_NAME = 'tiktok:sound'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/music/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
+ _QUERY_NAME = 'music_id'
+ _API_ENDPOINT = 'music/aweme'
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/music/Build-a-Btch-6956990112127585029?lang=en',
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'id': '6956990112127585029'
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ # Actual entries are less than listed video count
+ 'url': 'https://www.tiktok.com/music/jiefei-soap-remix-7036843036118469381',
+ 'playlist_mincount': 2182,
+ 'info_dict': {
+ 'id': '7036843036118469381'
+ },
+ 'expected_warnings': ['Retrying']
+ }]
+
+
+class TikTokEffectIE(TikTokBaseListIE):
+ IE_NAME = 'tiktok:effect'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/sticker/[\w\.-]+-(?P<id>[\d]+)[/?#&]?'
+ _QUERY_NAME = 'sticker_id'
+ _API_ENDPOINT = 'sticker/aweme'
+ _TESTS = [{
+ 'url': 'https://www.tiktok.com/sticker/MATERIAL-GWOOORL-1258156',
+ 'playlist_mincount': 100,
+ 'info_dict': {
+ 'id': '1258156',
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ # Different entries between mobile and web, depending on region
+ 'url': 'https://www.tiktok.com/sticker/Elf-Friend-479565',
+ 'only_matching': True
+ }]
+
+
+class TikTokTagIE(TikTokBaseListIE):
+ IE_NAME = 'tiktok:tag'
+ _VALID_URL = r'https?://(?:www\.)?tiktok\.com/tag/(?P<id>[^/?#&]+)'
+ _QUERY_NAME = 'ch_id'
+ _API_ENDPOINT = 'challenge/aweme'
+ _TESTS = [{
+ 'url': 'https://tiktok.com/tag/hello2018',
+ 'playlist_mincount': 39,
+ 'info_dict': {
+ 'id': '46294678',
+ 'title': 'hello2018',
+ },
+ 'expected_warnings': ['Retrying']
+ }, {
+ 'url': 'https://tiktok.com/tag/fypシ?is_copy_url=0&is_from_webapp=v1',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id, headers={
+ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)'
+ })
+ tag_id = self._html_search_regex(r'snssdk\d*://challenge/detail/(\d+)', webpage, 'tag ID')
+ return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id)
+
+
class DouyinIE(TikTokIE):
_VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)'
_TESTS = [{
diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py
index 11ebe76e1..37c7d5685 100644
--- a/yt_dlp/extractor/voicy.py
+++ b/yt_dlp/extractor/voicy.py
@@ -6,9 +6,10 @@ from ..compat import compat_str
from ..utils import (
ExtractorError,
smuggle_url,
+ str_or_none,
traverse_obj,
- unsmuggle_url,
unified_strdate,
+ unsmuggle_url,
)
import itertools
@@ -25,9 +26,9 @@ class VoicyBaseIE(InfoExtractor):
'id': voice_id,
'title': compat_str(value.get('PlaylistName')),
'uploader': value.get('SpeakerName'),
- 'uploader_id': compat_str(value.get('SpeakerId')),
+ 'uploader_id': str_or_none(value.get('SpeakerId')),
'channel': value.get('ChannelName'),
- 'channel_id': compat_str(value.get('ChannelId')),
+ 'channel_id': str_or_none(value.get('ChannelId')),
'upload_date': upload_date,
}
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1f5009399..852fbd78e 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -668,6 +668,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return text
@staticmethod
+ def _extract_thumbnails(data, *path_list):
+ """
+ Extract thumbnails from thumbnails dict
+ @param path_list: path list to level that contains 'thumbnails' key
+ """
+ thumbnails = []
+ for path in path_list or [()]:
+ for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ # Sometimes youtube gives a wrong thumbnail URL. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/233
+ # https://github.com/ytdl-org/youtube-dl/issues/28023
+ if 'maxresdefault' in thumbnail_url:
+ thumbnail_url = thumbnail_url.split('?')[0]
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
+ return thumbnails
+
+ @staticmethod
def extract_relative_time(relative_time_text):
"""
Extracts a relative time from string and converts to dt object
@@ -783,6 +807,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
+ thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
+
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
@@ -794,6 +820,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'view_count': view_count,
'uploader': uploader,
'channel_id': channel_id,
+ 'thumbnails': thumbnails,
'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower()
@@ -1750,16 +1777,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._player_cache = {}
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
- EXPIRATION_DURATION = 18_000
lock = threading.Lock()
is_live = True
- expiration_time = time.time() + EXPIRATION_DURATION
+ start_time = time.time()
formats = [f for f in formats if f.get('is_from_start')]
- def refetch_manifest(format_id):
- nonlocal formats, expiration_time, is_live
- if time.time() <= expiration_time:
+ def refetch_manifest(format_id, delay):
+ nonlocal formats, start_time, is_live
+ if time.time() <= start_time + delay:
return
_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
@@ -1769,19 +1795,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict, default=[])
_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
- expiration_time = time.time() + EXPIRATION_DURATION
+ start_time = time.time()
- def mpd_feed(format_id):
+ def mpd_feed(format_id, delay):
"""
@returns (manifest_url, manifest_stream_number, is_live) or None
"""
with lock:
- refetch_manifest(format_id)
+ refetch_manifest(format_id, delay)
f = next((f for f in formats if f['format_id'] == format_id), None)
if not f:
- self.report_warning(
- f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
+ if not is_live:
+ self.to_screen(f'{video_id}: Video is no longer live')
+ else:
+ self.report_warning(
+ f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
return None
return f['manifest_url'], f['manifest_stream_number'], is_live
@@ -1812,9 +1841,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
- mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
- if old_mpd_url == mpd_url and not refresh_sequence:
- return True, last_seq
+ last_error = ctx.pop('last_error', None)
+ expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+ mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
+ or (mpd_url, stream_number, False))
+ if not refresh_sequence:
+ if expire_fast and not is_live:
+ return False, last_seq
+ elif old_mpd_url == mpd_url:
+ return True, last_seq
try:
fmts, _ = self._extract_mpd_formats_and_subtitles(
mpd_url, None, note=False, errnote=False, fatal=False)
@@ -1848,8 +1883,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
last_segment_url = None
continue
else:
- should_retry, last_seq = _extract_sequence_from_mpd(True)
- if not should_retry:
+ should_continue, last_seq = _extract_sequence_from_mpd(True)
+ if not should_continue:
continue
if known_idx > last_seq:
@@ -1866,9 +1901,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
for idx in range(known_idx, last_seq):
# do not update sequence here or you'll get skipped some part of it
- should_retry, _ = _extract_sequence_from_mpd(False)
- if not should_retry:
- # retry when it gets weird state
+ should_continue, _ = _extract_sequence_from_mpd(False)
+ if not should_continue:
known_idx = idx - 1
raise ExtractorError('breaking out of outer loop')
last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
@@ -2903,25 +2937,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio
break
-
- thumbnails = []
- thumbnail_dicts = traverse_obj(
- (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
- expected_type=dict, default=[])
- for thumbnail in thumbnail_dicts:
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- # Sometimes youtube gives a wrong thumbnail URL. See:
- # https://github.com/yt-dlp/yt-dlp/issues/233
- # https://github.com/ytdl-org/youtube-dl/issues/28023
- if 'maxresdefault' in thumbnail_url:
- thumbnail_url = thumbnail_url.split('?')[0]
- thumbnails.append({
- 'url': thumbnail_url,
- 'height': int_or_none(thumbnail.get('height')),
- 'width': int_or_none(thumbnail.get('width')),
- })
+ thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
thumbnail_url = search_meta(['og:image', 'twitter:image'])
if thumbnail_url:
thumbnails.append({
@@ -3584,7 +3600,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
playlist_id = title = description = channel_url = channel_name = channel_id = None
- thumbnails_list = []
tags = []
selected_tab = self._extract_selected_tab(tabs)
@@ -3603,26 +3618,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
description = renderer.get('description', '')
playlist_id = channel_id
tags = renderer.get('keywords', '').split()
- thumbnails_list = (
- try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
- or try_get(
- self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
- lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
- list)
- or [])
- thumbnails = []
- for t in thumbnails_list:
- if not isinstance(t, dict):
- continue
- thumbnail_url = url_or_none(t.get('url'))
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int_or_none(t.get('width')),
- 'height': int_or_none(t.get('height')),
- })
+ thumbnails = (
+ self._extract_thumbnails(renderer, 'avatar')
+ or self._extract_thumbnails(
+ self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
+ ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
+
if playlist_id is None:
playlist_id = item_id
if title is None:
diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
index 5a5eebd30..a3a705bdd 100644
--- a/yt_dlp/extractor/zee5.py
+++ b/yt_dlp/extractor/zee5.py
@@ -23,7 +23,7 @@ class Zee5IE(InfoExtractor):
zee5:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:
- (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
+ (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){3}
|movies/[^#/?]+
)/(?P<display_id>[^#/?]+)/
)
@@ -37,48 +37,50 @@ class Zee5IE(InfoExtractor):
'display_id': 'krishna-the-birth',
'title': 'Krishna - The Birth',
'duration': 4368,
- 'average_rating': 4,
'description': compat_str,
'alt_title': 'Krishna - The Birth',
'uploader': 'Zee Entertainment Enterprises Ltd',
'release_date': '20060101',
'upload_date': '20060101',
'timestamp': 1136073600,
- 'thumbnail': 'https://akamaividz.zee5.com/resources/0-0-63098/list/270x152/0063098_list_80888170.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'episode_number': 0,
+ 'episode': 'Episode 0',
'tags': list
},
'params': {
'format': 'bv',
},
}, {
- 'url': 'https://zee5.com/tvshows/details/krishna-balram/0-6-1871/episode-1-the-test-of-bramha/0-1-233402',
+ 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899/yoga-se-hoga-bandbudh-aur-budbak/0-1-239839',
'info_dict': {
- 'id': '0-1-233402',
+ 'id': '0-1-239839',
'ext': 'mp4',
- 'display_id': 'episode-1-the-test-of-bramha',
- 'title': 'Episode 1 - The Test Of Bramha',
- 'duration': 1336,
- 'average_rating': 4,
+ 'display_id': 'yoga-se-hoga-bandbudh-aur-budbak',
+ 'title': 'Yoga Se Hoga-Bandbudh aur Budbak',
+ 'duration': 659,
'description': compat_str,
- 'alt_title': 'Episode 1 - The Test Of Bramha',
+ 'alt_title': 'Yoga Se Hoga-Bandbudh aur Budbak',
'uploader': 'Zee Entertainment Enterprises Ltd',
- 'release_date': '20090101',
- 'upload_date': '20090101',
- 'timestamp': 1230768000,
- 'thumbnail': 'https://akamaividz.zee5.com/resources/0-1-233402/list/270x152/01233402_list.jpg',
- 'series': 'Krishna Balram',
+ 'release_date': '20150101',
+ 'upload_date': '20150101',
+ 'timestamp': 1420070400,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'series': 'Bandbudh Aur Budbak',
'season_number': 1,
'episode_number': 1,
+ 'episode': 'Episode 1',
+ 'season': 'Season 1',
'tags': list,
},
'params': {
'format': 'bv',
},
}, {
- 'url': 'https://www.zee5.com/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
+ 'url': 'https://www.zee5.com/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730?country=IN',
'only_matching': True
}, {
- 'url': 'https://www.zee5.com/global/hi/tvshows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
+ 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
'only_matching': True
}]
_DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
@@ -153,7 +155,6 @@ class Zee5IE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
'duration': int_or_none(asset_data.get('duration')),
- 'average_rating': int_or_none(asset_data.get('rating')),
'description': str_or_none(asset_data.get('description')),
'alt_title': str_or_none(asset_data.get('original_title')),
'uploader': str_or_none(asset_data.get('content_owner')),
@@ -175,42 +176,42 @@ class Zee5SeriesIE(InfoExtractor):
(?:
zee5:series:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
- (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
+ (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){2}/
)
(?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
'''
_TESTS = [{
- 'url': 'https://www.zee5.com/kids/kids-shows/krishna-balram/0-6-1871',
- 'playlist_mincount': 43,
+ 'url': 'https://www.zee5.com/kids/kids-shows/bandbudh-aur-budbak/0-6-1899',
+ 'playlist_mincount': 156,
'info_dict': {
- 'id': '0-6-1871',
+ 'id': '0-6-1899',
},
}, {
- 'url': 'https://www.zee5.com/tvshows/details/bhabi-ji-ghar-par-hai/0-6-199',
+ 'url': 'https://www.zee5.com/tv-shows/details/bhabi-ji-ghar-par-hai/0-6-199',
'playlist_mincount': 1500,
'info_dict': {
'id': '0-6-199',
},
}, {
- 'url': 'https://www.zee5.com/tvshows/details/agent-raghav-crime-branch/0-6-965',
+ 'url': 'https://www.zee5.com/tv-shows/details/agent-raghav-crime-branch/0-6-965',
'playlist_mincount': 24,
'info_dict': {
'id': '0-6-965',
},
}, {
- 'url': 'https://www.zee5.com/ta/tvshows/details/nagabhairavi/0-6-3201',
+ 'url': 'https://www.zee5.com/ta/tv-shows/details/nagabhairavi/0-6-3201',
'playlist_mincount': 3,
'info_dict': {
'id': '0-6-3201',
},
}, {
- 'url': 'https://www.zee5.com/global/hi/tvshows/details/khwaabon-ki-zamin-par/0-6-270',
+ 'url': 'https://www.zee5.com/global/hi/tv-shows/details/khwaabon-ki-zamin-par/0-6-270',
'playlist_mincount': 150,
'info_dict': {
'id': '0-6-270',
},
}, {
- 'url': 'https://www.zee5.com/tvshows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
+ 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
'only_matching': True,
}]
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 168821a68..971c51515 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -20,7 +20,7 @@ from .utils import (
remove_end,
write_string,
)
-from .cookies import SUPPORTED_BROWSERS
+from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS
from .version import __version__
from .downloader.external import list_external_downloaders
@@ -664,7 +664,7 @@ def parseOpts(overrideArguments=None):
downloader.add_option(
'-N', '--concurrent-fragments',
dest='concurrent_fragment_downloads', metavar='N', default=1, type=int,
- help='Number of fragments of a dash/hlsnative video that should be download concurrently (default is %default)')
+ help='Number of fragments of a dash/hlsnative video that should be downloaded concurrently (default is %default)')
downloader.add_option(
'-r', '--limit-rate', '--rate-limit',
dest='ratelimit', metavar='RATE',
@@ -678,6 +678,10 @@ def parseOpts(overrideArguments=None):
dest='retries', metavar='RETRIES', default=10,
help='Number of retries (default is %default), or "infinite"')
downloader.add_option(
+ '--file-access-retries',
+ dest='file_access_retries', metavar='RETRIES', default=10,
+ help='Number of times to retry on file access error (default is %default), or "infinite"')
+ downloader.add_option(
'--fragment-retries',
dest='fragment_retries', metavar='RETRIES', default=10,
help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
@@ -1015,7 +1019,7 @@ def parseOpts(overrideArguments=None):
}, help=(
'The paths where the files should be downloaded. '
'Specify the type of file and the path separated by a colon ":". '
- 'All the same types as --output are supported. '
+ 'All the same TYPES as --output are supported. '
'Additionally, you can also provide "home" (default) and "temp" paths. '
'All intermediary files are first downloaded to the temp path and '
'then the final files are moved over to the home path after download is finished. '
@@ -1166,14 +1170,15 @@ def parseOpts(overrideArguments=None):
help='Do not read/dump cookies from/to file (default)')
filesystem.add_option(
'--cookies-from-browser',
- dest='cookiesfrombrowser', metavar='BROWSER[:PROFILE]',
+ dest='cookiesfrombrowser', metavar='BROWSER[+KEYRING][:PROFILE]',
help=(
- 'Load cookies from a user profile of the given web browser. '
- 'Currently supported browsers are: {}. '
- 'You can specify the user profile name or directory using '
- '"BROWSER:PROFILE_NAME" or "BROWSER:PROFILE_PATH". '
- 'If no profile is given, the most recently accessed one is used'.format(
- ', '.join(sorted(SUPPORTED_BROWSERS)))))
+ 'The name of the browser and (optionally) the name/path of '
+ 'the profile to load cookies from, separated by a ":". '
+ f'Currently supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}. '
+ 'By default, the most recently accessed profile is used. '
+ 'The keyring used for decrypting Chromium cookies on Linux can be '
+ '(optionally) specified after the browser name separated by a "+". '
+ f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
filesystem.add_option(
'--no-cookies-from-browser',
action='store_const', const=None, dest='cookiesfrombrowser',
diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py
index 918d3e788..e199a1cdd 100644
--- a/yt_dlp/postprocessor/embedthumbnail.py
+++ b/yt_dlp/postprocessor/embedthumbnail.py
@@ -145,8 +145,43 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self.report_warning('unable to embed using mutagen; %s' % error_to_compat_str(err))
success = False
- # Method 2: Use ffmpeg+ffprobe
- if not success and not prefer_atomicparsley:
+ # Method 2: Use AtomicParsley
+ if not success:
+ success = True
+ atomicparsley = next((
+ x for x in ['AtomicParsley', 'atomicparsley']
+ if check_executable(x, ['-v'])), None)
+ if atomicparsley is None:
+ self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
+ success = False
+ else:
+ if not prefer_atomicparsley:
+ self.to_screen('mutagen was not found. Falling back to AtomicParsley')
+ cmd = [encodeFilename(atomicparsley, True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+ cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')]
+
+ self._report_run('atomicparsley', filename)
+ self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
+ p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate_or_kill()
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {msg}')
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self.report_warning('The file format doesn\'t support embedding a thumbnail')
+ success = False
+
+ # Method 3: Use ffmpeg+ffprobe
+ # Thumbnails attached using this method doesn't show up as cover in some cases
+ # See https://github.com/yt-dlp/yt-dlp/issues/2125, https://github.com/yt-dlp/yt-dlp/issues/411
+ if not success:
success = True
try:
options = ['-c', 'copy', '-map', '0', '-dn', '-map', '1']
@@ -161,38 +196,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
self._report_run('ffmpeg', filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
except PostProcessingError as err:
- self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
- success = False
-
- # Method 3: Use AtomicParsley
- if not success:
- success = True
- atomicparsley = next((
- x for x in ['AtomicParsley', 'atomicparsley']
- if check_executable(x, ['-v'])), None)
- if atomicparsley is None:
- raise EmbedThumbnailPPError('AtomicParsley was not found. Please install')
-
- cmd = [encodeFilename(atomicparsley, True),
- encodeFilename(filename, True),
- encodeArgument('--artwork'),
- encodeFilename(thumbnail_filename, True),
- encodeArgument('-o'),
- encodeFilename(temp_filename, True)]
- cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')]
-
- self._report_run('atomicparsley', filename)
- self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
- p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate_or_kill()
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- raise EmbedThumbnailPPError(msg)
- # for formats that don't support thumbnails (like 3gp) AtomicParsley
- # won't create to the temporary file
- if b'No changes' in stdout:
- self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False
+ raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}')
elif info['ext'] in ['ogg', 'opus', 'flac']:
if not has_mutagen:
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 594762974..96b48ded5 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -296,8 +296,8 @@ class FFmpegPostProcessor(PostProcessor):
keys = ['_%s%d' % (name, number), '_%s' % name]
if name == 'o':
args += ['-movflags', '+faststart']
- elif number == 1:
- keys.append('')
+ if number == 1:
+ keys.append('')
args += self._configuration_args(self.basename, keys)
if name == 'i':
args.append('-i')
@@ -1092,12 +1092,12 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor):
files_to_delete = []
has_thumbnail = False
- for idx, thumbnail_dict in enumerate(info['thumbnails']):
- if 'filepath' not in thumbnail_dict:
+ for idx, thumbnail_dict in enumerate(info.get('thumbnails') or []):
+ original_thumbnail = thumbnail_dict.get('filepath')
+ if not original_thumbnail:
continue
has_thumbnail = True
self.fixup_webp(info, idx)
- original_thumbnail = thumbnail_dict['filepath']
_, thumbnail_ext = os.path.splitext(original_thumbnail)
if thumbnail_ext:
thumbnail_ext = thumbnail_ext[1:].lower()
diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py
index 807cd305d..646659e75 100644
--- a/yt_dlp/postprocessor/metadataparser.py
+++ b/yt_dlp/postprocessor/metadataparser.py
@@ -99,7 +99,7 @@ class MetadataParserPP(PostProcessor):
class MetadataFromFieldPP(MetadataParserPP):
@classmethod
def to_action(cls, f):
- match = re.match(r'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
+ match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
if match is None:
raise ValueError(f'it should be FROM:TO, not {f!r}')
return (
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index d34e5b545..0c3c6c401 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -210,6 +210,7 @@ DATE_FORMATS = (
'%Y/%m/%d %H:%M:%S',
'%Y%m%d%H%M',
'%Y%m%d%H%M%S',
+ '%Y%m%d',
'%Y-%m-%d %H:%M',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%d %H:%M:%S.%f',
@@ -304,7 +305,7 @@ def write_json_file(obj, fn):
try:
with tf:
- json.dump(obj, tf)
+ json.dump(obj, tf, ensure_ascii=False)
if sys.platform == 'win32':
# Need to remove existing file on Windows, else os.rename raises
# WindowsError or FileExistsError.
@@ -1862,7 +1863,6 @@ def _windows_write_string(s, out):
False if it has yet to be written out."""
# Adapted from http://stackoverflow.com/a/3259271/35070
- import ctypes
import ctypes.wintypes
WIN_OUTPUT_IDS = {
@@ -2110,18 +2110,19 @@ def unsmuggle_url(smug_url, default=None):
return url, data
+def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
+ """ Formats numbers with decimal sufixes like K, M, etc """
+ num, factor = float_or_none(num), float(factor)
+ if num is None:
+ return None
+ exponent = 0 if num == 0 else int(math.log(num, factor))
+ suffix = ['', *'KMGTPEZY'][exponent]
+ converted = num / (factor ** exponent)
+ return fmt % (converted, f'{suffix}i' if suffix and factor == 1024 else suffix)
+
+
def format_bytes(bytes):
- if bytes is None:
- return 'N/A'
- if type(bytes) is str:
- bytes = float(bytes)
- if bytes == 0.0:
- exponent = 0
- else:
- exponent = int(math.log(bytes, 1024.0))
- suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
- converted = float(bytes) / float(1024 ** exponent)
- return '%.2f%s' % (converted, suffix)
+ return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
def lookup_unit_table(unit_table, s):
@@ -2210,7 +2211,7 @@ def parse_count(s):
if s is None:
return None
- s = s.strip()
+ s = re.sub(r'^[^\d]+\s', '', s).strip()
if re.match(r'^[\d,.]+$', s):
return str_to_int(s)
@@ -2222,9 +2223,17 @@ def parse_count(s):
'M': 1000 ** 2,
'kk': 1000 ** 2,
'KK': 1000 ** 2,
+ 'b': 1000 ** 3,
+ 'B': 1000 ** 3,
}
- return lookup_unit_table(_UNIT_TABLE, s)
+ ret = lookup_unit_table(_UNIT_TABLE, s)
+ if ret is not None:
+ return ret
+
+ mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
+ if mobj:
+ return str_to_int(mobj.group(1))
def parse_resolution(s):
@@ -3192,30 +3201,29 @@ def parse_codecs(codecs_str):
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
if not vcodec:
- vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
+ vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
if codec in ('dvh1', 'dvhe'):
hdr = 'DV'
elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
hdr = 'HDR10'
elif full_codec.replace('0', '').startswith('vp9.2'):
hdr = 'HDR10'
- elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
if not acodec:
acodec = full_codec
else:
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
- if not vcodec and not acodec:
- if len(split_codecs) == 2:
- return {
- 'vcodec': split_codecs[0],
- 'acodec': split_codecs[1],
- }
- else:
+ if vcodec or acodec:
return {
'vcodec': vcodec or 'none',
'acodec': acodec or 'none',
'dynamic_range': hdr,
}
+ elif len(split_codecs) == 2:
+ return {
+ 'vcodec': split_codecs[0],
+ 'acodec': split_codecs[1],
+ }
return {}
@@ -5024,7 +5032,7 @@ def traverse_dict(dictn, keys, casesense=True):
return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
-def variadic(x, allowed_types=(str, bytes)):
+def variadic(x, allowed_types=(str, bytes, dict)):
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 8c07d099e..7b5732595 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,5 +1,5 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2021.12.01'
+__version__ = '2021.12.27'
-RELEASE_GIT_HEAD = '91f071af6'
+RELEASE_GIT_HEAD = '6223f67a8'