aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-10-31 11:36:52 -0500
committerJesús <heckyel@hyperbola.info>2021-10-31 11:36:52 -0500
commit5bb25093eb718346ab8a723d2c04f0066fc3958a (patch)
tree8a7fa5611895a933eaf1ef1623f7b9e1a1c36157
parentc7afb25e19a91493db6069d1db9f7d1bc8491dc1 (diff)
parent652fb0d446524af4b783276babd55f5fc6a3afeb (diff)
downloadhypervideo-pre-5bb25093eb718346ab8a723d2c04f0066fc3958a.tar.lz
hypervideo-pre-5bb25093eb718346ab8a723d2c04f0066fc3958a.tar.xz
hypervideo-pre-5bb25093eb718346ab8a723d2c04f0066fc3958a.zip
updated from upstream | 31/10/2021 at 11:36
-rw-r--r--CONTRIBUTORS4
-rw-r--r--Changelog.md78
-rw-r--r--devscripts/make_lazy_extractors.py2
-rw-r--r--devscripts/make_supportedsites.py3
-rw-r--r--pyinst.py195
-rw-r--r--setup.py4
-rw-r--r--supportedsites.md46
-rw-r--r--test/test_utils.py7
-rw-r--r--yt_dlp/YoutubeDL.py381
-rw-r--r--yt_dlp/__init__.py20
-rw-r--r--yt_dlp/compat.py5
-rw-r--r--yt_dlp/cookies.py2
-rw-r--r--yt_dlp/downloader/fragment.py3
-rw-r--r--yt_dlp/extractor/__init__.py21
-rw-r--r--yt_dlp/extractor/adobepass.py4
-rw-r--r--yt_dlp/extractor/bilibili.py8
-rw-r--r--yt_dlp/extractor/cbc.py134
-rw-r--r--yt_dlp/extractor/common.py34
-rw-r--r--yt_dlp/extractor/coub.py3
-rw-r--r--yt_dlp/extractor/dplay.py5
-rw-r--r--yt_dlp/extractor/extractors.py17
-rw-r--r--yt_dlp/extractor/generic.py25
-rw-r--r--yt_dlp/extractor/instagram.py46
-rw-r--r--yt_dlp/extractor/itv.py34
-rw-r--r--yt_dlp/extractor/mediaset.py82
-rw-r--r--yt_dlp/extractor/microsoftstream.py125
-rw-r--r--yt_dlp/extractor/mlssoccer.py118
-rw-r--r--yt_dlp/extractor/mtv.py12
-rw-r--r--yt_dlp/extractor/naver.py7
-rw-r--r--yt_dlp/extractor/niconico.py1
-rw-r--r--yt_dlp/extractor/nrk.py4
-rw-r--r--yt_dlp/extractor/patreon.py2
-rw-r--r--yt_dlp/extractor/sky.py28
-rw-r--r--yt_dlp/extractor/soundcloud.py3
-rw-r--r--yt_dlp/extractor/tagesschau.py279
-rw-r--r--yt_dlp/extractor/threespeak.py97
-rw-r--r--yt_dlp/extractor/trovo.py4
-rw-r--r--yt_dlp/extractor/twitter.py2
-rw-r--r--yt_dlp/extractor/viewlift.py192
-rw-r--r--yt_dlp/extractor/vimeo.py230
-rw-r--r--yt_dlp/extractor/vlive.py230
-rw-r--r--yt_dlp/extractor/wakanim.py25
-rw-r--r--yt_dlp/extractor/youtube.py39
-rw-r--r--yt_dlp/minicurses.py83
-rw-r--r--yt_dlp/options.py13
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py1
-rw-r--r--yt_dlp/postprocessor/sponsorblock.py33
-rw-r--r--yt_dlp/utils.py91
-rw-r--r--yt_dlp/version.py2
49 files changed, 1812 insertions, 972 deletions
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index 048d98852..2bf96affe 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -125,3 +125,7 @@ jfogelman
timethrow
sarnoud
Bojidarist
+18928172992817182/gustaf
+nixklai
+smplayer-dev
+Zirro
diff --git a/Changelog.md b/Changelog.md
index 90f9bdafb..b46199168 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -14,6 +14,84 @@
-->
+### 2021.10.22
+
+* [build] Improvements
+ * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev)
+ * Release windows exe built with `py2exe`
+ * Enable lazy-extractors in releases.
+ * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental)
+ * Clean up error reporting in update
+ * Refactor `pyinst.py`, misc cleanup and improve docs
+* [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804)
+* [downloader] **Fix slow progress hooks**
+ * This was causing HLS/DASH downloads to be extremely slow in some situations
+* [downloader/ffmpeg] Improve simultaneous download and merge
+* [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key
+* [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp
+* [utils] Allow duration strings in `--match-filter`
+* Add HDR information to formats
+* Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro)
+* Calculate more fields for merged formats
+* Do not verify thumbnail URLs unless `--check-formats` is specified
+* Don't create console for subprocesses on Windows
+* Fix `--restrict-filename` when used with default template
+* Fix `check_formats` output being written to stdout when `-qv`
+* Fix bug in storyboards
+* Fix conflict b/w id and ext in format selection
+* Fix verbose head not showing custom configs
+* Load archive only after printing verbose head
+* Make `duration_string` and `resolution` available in --match-filter
+* Re-implement deprecated option `--id`
+* Reduce default `--socket-timeout`
+* Write verbose header to logger
+* [outtmpl] Fix bug in expanding environment variables
+* [cookies] Local State should be opened as utf-8
+* [extractor,utils] Detect more codecs/mimetypes
+* [extractor] Detect `EXT-X-KEY` Apple FairPlay
+* [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi)
+* [http] Retry on socket timeout and show the last encountered error
+* [fragment] Print error message when skipping fragment
+* [aria2c] Fix `--skip-unavailable-fragment`
+* [SponsorBlock] Obey `extractor-retries` and `sleep-requests`
+* [Merger] Do not add `aac_adtstoasc` to non-hls audio
+* [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari)
+* [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi)
+* [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h)
+* [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman)
+* [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one)
+* [crunchyroll] Add season to flat-playlist
+* [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code
+* [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804)
+* [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [hidive] Fix typo
+* [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804)
+* [Hotstar] Raise appropriate error for DRM
+* [instagram] Add login by [u-spec-png](https://github.com/u-spec-png)
+* [instagram] Show appropriate error when login is needed
+* [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai)
+* [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda)
+* [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de)
+* [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804)
+* [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png)
+* [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47)
+* [tiktok] Fix typo and update tests
+* [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804)
+* [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182)
+* [vimeo] Fix embedded `player.vimeo`
+* [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan)
+* [youtube] Add auto-translated subtitles
+* [youtube] Expose different formats with same itag
+* [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz)
+* [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png)
+* [cleanup] Remove broken youtube login code
+* [cleanup] Standardize timestamp formatting code
+* [cleanup] Generalize `getcomments` implementation for extractors
+* [cleanup] Simplify search extractors code
+* [cleanup] misc
+
+
### 2021.10.10
* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor`
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 427045b98..0411df76b 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -9,7 +9,7 @@ import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
-lazy_extractors_filename = sys.argv[1]
+lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py'
if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 17a34843f..4c11e25f2 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -29,6 +29,9 @@ def main():
continue
if ie_desc is not None:
ie_md += ': {0}'.format(ie.IE_DESC)
+ search_key = getattr(ie, 'SEARCH_KEY', None)
+ if search_key is not None:
+ ie_md += f'; "{ie.SEARCH_KEY}:" prefix'
if not ie.working():
ie_md += ' (Currently broken)'
yield ie_md
diff --git a/pyinst.py b/pyinst.py
index 013441c0e..949e0b233 100644
--- a/pyinst.py
+++ b/pyinst.py
@@ -1,75 +1,84 @@
#!/usr/bin/env python3
# coding: utf-8
-
-from __future__ import unicode_literals
-import sys
+import os
import platform
-
+import sys
from PyInstaller.utils.hooks import collect_submodules
-from PyInstaller.utils.win32.versioninfo import (
- VarStruct, VarFileInfo, StringStruct, StringTable,
- StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion,
-)
-import PyInstaller.__main__
-
-arch = platform.architecture()[0][:2]
-assert arch in ('32', '64')
-_x86 = '_x86' if arch == '32' else ''
-
-# Compatability with older arguments
-opts = sys.argv[1:]
-if opts[0:1] in (['32'], ['64']):
- if arch != opts[0]:
- raise Exception(f'{opts[0]}bit executable cannot be built on a {arch}bit system')
- opts = opts[1:]
-opts = opts or ['--onefile']
-
-print(f'Building {arch}bit version with options {opts}')
-
-FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '')
-
-exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec'))
-VERSION = locals()['__version__']
-
-VERSION_LIST = VERSION.split('.')
-VERSION_LIST = list(map(int, VERSION_LIST)) + [0] * (4 - len(VERSION_LIST))
-
-print('Version: %s%s' % (VERSION, _x86))
-print('Remember to update the version using devscipts\\update-version.py')
-
-VERSION_FILE = VSVersionInfo(
- ffi=FixedFileInfo(
- filevers=VERSION_LIST,
- prodvers=VERSION_LIST,
- mask=0x3F,
- flags=0x0,
- OS=0x4,
- fileType=0x1,
- subtype=0x0,
- date=(0, 0),
- ),
- kids=[
- StringFileInfo([
- StringTable(
- '040904B0', [
- StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86),
- StringStruct('CompanyName', 'https://github.com/yt-dlp'),
- StringStruct('FileDescription', FILE_DESCRIPTION),
- StringStruct('FileVersion', VERSION),
- StringStruct('InternalName', 'yt-dlp%s' % _x86),
- StringStruct(
- 'LegalCopyright',
- 'pukkandan.ytdlp@gmail.com | UNLICENSE',
- ),
- StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86),
- StringStruct('ProductName', 'yt-dlp%s' % _x86),
- StringStruct(
- 'ProductVersion',
- '%s%s on Python %s' % (VERSION, _x86, platform.python_version())),
- ])]),
- VarFileInfo([VarStruct('Translation', [0, 1200])])
+
+
+OS_NAME = platform.system()
+if OS_NAME == 'Windows':
+ from PyInstaller.utils.win32.versioninfo import (
+ VarStruct, VarFileInfo, StringStruct, StringTable,
+ StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion,
+ )
+elif OS_NAME == 'Darwin':
+ pass
+else:
+ raise Exception('{OS_NAME} is not supported')
+
+ARCH = platform.architecture()[0][:2]
+
+
+def main():
+ opts = parse_options()
+ version = read_version()
+
+ suffix = '_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else ''
+ final_file = 'dist/%syt-dlp%s%s' % (
+ 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '')
+
+ print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}')
+ print('Remember to update the version using "devscripts/update-version.py"')
+ if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'):
+ print('WARNING: Building without lazy_extractors. Run '
+ '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr)
+ print(f'Destination: {final_file}\n')
+
+ opts = [
+ f'--name=yt-dlp{suffix}',
+ '--icon=devscripts/logo.ico',
+ '--upx-exclude=vcruntime140.dll',
+ '--noconfirm',
+ *dependancy_options(),
+ *opts,
+ 'yt_dlp/__main__.py',
]
-)
+ print(f'Running PyInstaller with {opts}')
+
+ import PyInstaller.__main__
+
+ PyInstaller.__main__.run(opts)
+
+ set_version_info(final_file, version)
+
+
+def parse_options():
+ # Compatability with older arguments
+ opts = sys.argv[1:]
+ if opts[0:1] in (['32'], ['64']):
+ if ARCH != opts[0]:
+ raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system')
+ opts = opts[1:]
+ return opts or ['--onefile']
+
+
+def read_version():
+ exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec'))
+ return locals()['__version__']
+
+
+def version_to_list(version):
+ version_list = version.split('.')
+ return list(map(int, version_list)) + [0] * (4 - len(version_list))
+
+
+def dependancy_options():
+ dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets')
+ excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
+
+ yield from (f'--hidden-import={module}' for module in dependancies)
+ yield from (f'--exclude-module={module}' for module in excluded_modules)
def pycryptodome_module():
@@ -86,17 +95,41 @@ def pycryptodome_module():
return 'Cryptodome'
-dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets')
-excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
-
-PyInstaller.__main__.run([
- '--name=yt-dlp%s' % _x86,
- '--icon=devscripts/logo.ico',
- *[f'--exclude-module={module}' for module in excluded_modules],
- *[f'--hidden-import={module}' for module in dependancies],
- '--upx-exclude=vcruntime140.dll',
- '--noconfirm',
- *opts,
- 'yt_dlp/__main__.py',
-])
-SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE)
+def set_version_info(exe, version):
+ if OS_NAME == 'Windows':
+ windows_set_version(exe, version)
+
+
+def windows_set_version(exe, version):
+ version_list = version_to_list(version)
+ suffix = '_x86' if ARCH == '32' else ''
+ SetVersion(exe, VSVersionInfo(
+ ffi=FixedFileInfo(
+ filevers=version_list,
+ prodvers=version_list,
+ mask=0x3F,
+ flags=0x0,
+ OS=0x4,
+ fileType=0x1,
+ subtype=0x0,
+ date=(0, 0),
+ ),
+ kids=[
+ StringFileInfo([StringTable('040904B0', [
+ StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % suffix),
+ StringStruct('CompanyName', 'https://github.com/yt-dlp'),
+ StringStruct('FileDescription', 'yt-dlp%s' % (' (32 Bit)' if ARCH == '32' else '')),
+ StringStruct('FileVersion', version),
+ StringStruct('InternalName', f'yt-dlp{suffix}'),
+ StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'),
+ StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'),
+ StringStruct('ProductName', f'yt-dlp{suffix}'),
+ StringStruct(
+ 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'),
+ ])]), VarFileInfo([VarStruct('Translation', [0, 1200])])
+ ]
+ ))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/setup.py b/setup.py
index 675afab17..f5f9d7513 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ from distutils.spawn import spawn
exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec'))
-DESCRIPTION = 'Command-line program to download videos from YouTube.com and many other other video platforms.'
+DESCRIPTION = 'A youtube-dl fork with additional features and patches'
LONG_DESCRIPTION = '\n\n'.join((
'Official repository: <https://github.com/yt-dlp/yt-dlp>',
@@ -29,7 +29,7 @@ REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets']
if sys.argv[1:2] == ['py2exe']:
import py2exe
warnings.warn(
- 'Building with py2exe is not officially supported. '
+ 'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'The recommended way is to use "pyinst.py" to build using pyinstaller')
params = {
'console': [{
diff --git a/supportedsites.md b/supportedsites.md
index 02be6b918..01c3f43a9 100644
--- a/supportedsites.md
+++ b/supportedsites.md
@@ -127,7 +127,7 @@
- **BilibiliAudioAlbum**
- **BilibiliChannel**
- **BiliBiliPlayer**
- - **BiliBiliSearch**: Bilibili video search, "bilisearch" keyword
+ - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix
- **BiliIntl**
- **BiliIntlSeries**
- **BioBioChileTV**
@@ -226,7 +226,9 @@
- **Crackle**
- **CrooksAndLiars**
- **crunchyroll**
+ - **crunchyroll:beta**
- **crunchyroll:playlist**
+ - **crunchyroll:playlist:beta**
- **CSpan**: C-SPAN
- **CtsNews**: 華視新聞
- **CTV**
@@ -315,6 +317,7 @@
- **ESPNArticle**
- **EsriVideo**
- **Europa**
+ - **EUScreen**
- **EWETV**
- **ExpoTV**
- **Expressen**
@@ -394,6 +397,7 @@
- **Goshgay**
- **GoToStage**
- **GPUTechConf**
+ - **Gronkh**
- **Groupon**
- **hbo**
- **HearThisAt**
@@ -570,6 +574,7 @@
- **Mgoon**
- **MGTV**: 芒果TV
- **MiaoPai**
+ - **microsoftstream**: Microsoft Stream
- **mildom**: Record ongoing live by specific user in Mildom
- **mildom:user:vod**: Download all VODs from specific user in Mildom
- **mildom:vod**: Download a VOD in Mildom
@@ -686,8 +691,8 @@
- **niconico**: ニコニコ動画
- **NiconicoPlaylist**
- **NiconicoUser**
- - **nicovideo:search**: Nico video searches
- - **nicovideo:search:date**: Nico video searches, newest first
+ - **nicovideo:search**: Nico video searches; "nicosearch:" prefix
+ - **nicovideo:search:date**: Nico video searches, newest first; "nicosearchdate:" prefix
- **nicovideo:search_url**: Nico video search URLs
- **Nintendo**
- **Nitter**
@@ -734,6 +739,7 @@
- **Odnoklassniki**
- **OktoberfestTV**
- **OlympicsReplay**
+ - **on24**: ON24
- **OnDemandKorea**
- **onet.pl**
- **onet.tv**
@@ -930,7 +936,7 @@
- **SBS**: sbs.com.au
- **schooltv**
- **ScienceChannel**
- - **screen.yahoo:search**: Yahoo screen search
+ - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix
- **Screencast**
- **ScreencastOMatic**
- **ScrippsNetworks**
@@ -961,6 +967,7 @@
- **SkylineWebcams**
- **skynewsarabia:article**
- **skynewsarabia:video**
+ - **SkyNewsAU**
- **Slideshare**
- **SlidesLive**
- **Slutload**
@@ -970,7 +977,7 @@
- **SonyLIVSeries**
- **soundcloud**
- **soundcloud:playlist**
- - **soundcloud:search**: Soundcloud search
+ - **soundcloud:search**: Soundcloud search; "scsearch:" prefix
- **soundcloud:set**
- **soundcloud:trackstation**
- **soundcloud:user**
@@ -1029,7 +1036,6 @@
- **SztvHu**
- **t-online.de**
- **Tagesschau**
- - **tagesschau:player**
- **Tass**
- **TBS**
- **TDSLifeway**
@@ -1089,6 +1095,8 @@
- **TrailerAddict** (Currently broken)
- **Trilulilu**
- **Trovo**
+ - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix
+ - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix
- **TrovoVod**
- **TruNews**
- **TruTV**
@@ -1193,7 +1201,7 @@
- **Viddler**
- **Videa**
- **video.arnes.si**: Arnes Video
- - **video.google:search**: Google Video search
+ - **video.google:search**: Google Video search; "gvsearch:" prefix (Currently broken)
- **video.sky.it**
- **video.sky.it:live**
- **VideoDetective**
@@ -1335,19 +1343,19 @@
- **YouPorn**
- **YourPorn**
- **YourUpload**
- - **youtube**: YouTube.com
- - **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication)
- - **youtube:history**: Youtube watch history, ":ythis" for short (requires authentication)
- - **youtube:playlist**: YouTube.com playlists
- - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
- - **youtube:search**: YouTube.com searches, "ytsearch" keyword
- - **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword
- - **youtube:search_url**: YouTube.com search URLs
- - **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)
- - **youtube:tab**: YouTube.com tab
- - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
+ - **youtube**: YouTube
+ - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies)
+ - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies)
+ - **youtube:playlist**: YouTube playlists
+ - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword
+ - **youtube:search**: YouTube searches; "ytsearch:" prefix
+ - **youtube:search:date**: YouTube searches, newest videos first; "ytsearchdate:" prefix
+ - **youtube:search_url**: YouTube search URLs with sorting and filter support
+ - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
+ - **youtube:tab**: YouTube Tabs
+ - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies)
- **YoutubeYtBe**: youtu.be
- - **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword
+ - **YoutubeYtUser**: YouTube user videos; "ytuser:" prefix
- **Zapiks**
- **Zattoo**
- **ZattooLive**
diff --git a/test/test_utils.py b/test/test_utils.py
index d84c3d3ee..810ed3de4 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1163,12 +1163,15 @@ class TestUtil(unittest.TestCase):
def test_parse_resolution(self):
self.assertEqual(parse_resolution(None), {})
self.assertEqual(parse_resolution(''), {})
- self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080})
- self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080})
self.assertEqual(parse_resolution('720p'), {'height': 720})
self.assertEqual(parse_resolution('4k'), {'height': 2160})
self.assertEqual(parse_resolution('8K'), {'height': 4320})
+ self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080})
+ self.assertEqual(parse_resolution('ep1x2'), {})
+ self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080})
def test_parse_bitrate(self):
self.assertEqual(parse_bitrate(None), None)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index d1ab540d2..b10e56fa1 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -28,6 +28,7 @@ import traceback
import random
import unicodedata
+from enum import Enum
from string import ascii_letters
from .compat import (
@@ -55,9 +56,7 @@ from .utils import (
DEFAULT_OUTTMPL,
determine_ext,
determine_protocol,
- DOT_DESKTOP_LINK_TEMPLATE,
- DOT_URL_LINK_TEMPLATE,
- DOT_WEBLOC_LINK_TEMPLATE,
+ DownloadCancelled,
DownloadError,
encode_compat_str,
encodeFilename,
@@ -76,11 +75,13 @@ from .utils import (
iri_to_uri,
ISO3166Utils,
LazyList,
+ LINK_TEMPLATES,
locked_file,
make_dir,
make_HTTPS_handler,
MaxDownloadsReached,
network_exceptions,
+ number_of_digits,
orderedSet,
OUTTMPL_TYPES,
PagedList,
@@ -107,7 +108,6 @@ from .utils import (
strftime_or_none,
subtitles_filename,
supports_terminal_sequences,
- TERMINAL_SEQUENCES,
ThrottledDownload,
to_high_limit_path,
traverse_obj,
@@ -123,6 +123,7 @@ from .utils import (
YoutubeDLRedirectHandler,
)
from .cache import Cache
+from .minicurses import format_text
from .extractor import (
gen_extractor_classes,
get_info_extractor,
@@ -221,7 +222,8 @@ class YoutubeDL(object):
allow_multiple_audio_streams: Allow multiple audio streams to be merged
into a single file
check_formats Whether to test if the formats are downloadable.
- Can be True (check all), False (check none)
+ Can be True (check all), False (check none),
+ 'selected' (check selected formats),
or None (check only if requested by extractor)
paths: Dictionary of output paths. The allowed keys are 'home'
'temp' and the keys of OUTTMPL_TYPES (in utils.py)
@@ -306,7 +308,7 @@ class YoutubeDL(object):
cookiefile: File name where cookies should be read from and dumped to
cookiesfrombrowser: A tuple containing the name of the browser and the profile
name/path from where cookies are loaded.
- Eg: ('chrome', ) or (vivaldi, 'default')
+ Eg: ('chrome', ) or ('vivaldi', 'default')
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
@@ -502,7 +504,7 @@ class YoutubeDL(object):
def __init__(self, params=None, auto_init=True):
"""Create a FileDownloader object with the given options.
@param auto_init Whether to load the default extractors and print header (if verbose).
- Set to 'no_verbose_header' to not ptint the header
+ Set to 'no_verbose_header' to not print the header
"""
if params is None:
params = {}
@@ -523,7 +525,10 @@ class YoutubeDL(object):
windows_enable_vt_mode()
# FIXME: This will break if we ever print color to stdout
- self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)
+ self._allow_colors = {
+ 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
+ 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
+ }
if sys.version_info < (3, 6):
self.report_warning(
@@ -531,10 +536,10 @@ class YoutubeDL(object):
if self.params.get('allow_unplayable_formats'):
self.report_warning(
- f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '
+ f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
'This is a developer option intended for debugging. \n'
' If you experience any issues while using this option, '
- f'{self._color_text("DO NOT", "red")} open a bug report')
+ f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
@@ -550,9 +555,12 @@ class YoutubeDL(object):
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
- for msg in self.params.get('warnings', []):
+ for msg in self.params.get('_warnings', []):
self.report_warning(msg)
+ if 'list-formats' in self.params.get('compat_opts', []):
+ self.params['listformats_table'] = False
+
if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
# nooverwrites was unnecessarily changed to overwrites
# in 0c3d0f51778b153f65c21906031c2e091fcfb641
@@ -583,7 +591,9 @@ class YoutubeDL(object):
self._output_channel = os.fdopen(master, 'rb')
except OSError as ose:
if ose.errno == errno.ENOENT:
- self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ self.report_warning(
+ 'Could not find fribidi executable, ignoring --bidi-workaround. '
+ 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
else:
raise
@@ -630,7 +640,7 @@ class YoutubeDL(object):
"""Preload the archive, if any is specified"""
if fn is None:
return False
- self.write_debug('Loading archive file %r\n' % fn)
+ self.write_debug(f'Loading archive file {fn!r}')
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
@@ -657,7 +667,7 @@ class YoutubeDL(object):
)
self.report_warning(
'Long argument string detected. '
- 'Use -- to separate parameters and URLs, like this:\n%s\n' %
+ 'Use -- to separate parameters and URLs, like this:\n%s' %
args_to_str(correct_argv))
def add_info_extractor(self, ie):
@@ -823,10 +833,32 @@ class YoutubeDL(object):
self.to_stdout(
message, skip_eol, quiet=self.params.get('quiet', False))
- def _color_text(self, text, color):
- if self.params.get('no_color'):
- return text
- return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'
+ class Styles(Enum):
+ HEADERS = 'yellow'
+ EMPHASIS = 'blue'
+ ID = 'green'
+ DELIM = 'blue'
+ ERROR = 'red'
+ WARNING = 'yellow'
+
+ def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
+ assert out in ('screen', 'err')
+ if test_encoding:
+ original_text = text
+ handle = self._screen_file if out == 'screen' else self._err_file
+ encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
+ text = text.encode(encoding, 'ignore').decode(encoding)
+ if fallback is not None and text != original_text:
+ text = fallback
+ if isinstance(f, self.Styles):
+ f = f._value_
+ return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
+
+ def _format_screen(self, *args, **kwargs):
+ return self.__format_text('screen', *args, **kwargs)
+
+ def _format_err(self, *args, **kwargs):
+ return self.__format_text('err', *args, **kwargs)
def report_warning(self, message, only_once=False):
'''
@@ -838,14 +870,14 @@ class YoutubeDL(object):
else:
if self.params.get('no_warnings'):
return
- self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)
+ self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
def report_error(self, message, tb=None):
'''
Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file.
'''
- self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)
+ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)
def write_debug(self, message, only_once=False):
'''Log debug message or Print message to stderr'''
@@ -974,8 +1006,8 @@ class YoutubeDL(object):
# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
- 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
- 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),
+ 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
+ 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
'autonumber': self.params.get('autonumber_size') or 5,
}
@@ -1288,7 +1320,7 @@ class YoutubeDL(object):
self.to_stderr('\r')
self.report_warning('The download speed is below throttle limit. Re-extracting data')
return wrapper(self, *args, **kwargs)
- except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):
+ except (DownloadCancelled, LazyList.IndexError):
raise
except Exception as e:
if self.params.get('ignoreerrors'):
@@ -1549,7 +1581,7 @@ class YoutubeDL(object):
playlistitems = list(range(playliststart, playliststart + n_entries))
ie_result['requested_entries'] = playlistitems
- if self.params.get('allow_playlist_files', True):
+ if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
ie_copy = {
'playlist': playlist,
'playlist_id': ie_result.get('id'),
@@ -1557,6 +1589,7 @@ class YoutubeDL(object):
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_index': 0,
+ 'n_entries': n_entries,
}
ie_copy.update(dict(ie_result))
@@ -1686,6 +1719,28 @@ class YoutubeDL(object):
return op(actual_value, comparison_value)
return _filter
+ def _check_formats(self, formats):
+ for f in formats:
+ self.to_screen('[info] Testing format %s' % f['format_id'])
+ temp_file = tempfile.NamedTemporaryFile(
+ suffix='.tmp', delete=False,
+ dir=self.get_output_path('temp') or None)
+ temp_file.close()
+ try:
+ success, _ = self.dl(temp_file.name, f, test=True)
+ except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
+ success = False
+ finally:
+ if os.path.exists(temp_file.name):
+ try:
+ os.remove(temp_file.name)
+ except OSError:
+ self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+ if success:
+ yield f
+ else:
+ self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+
def _default_format_spec(self, info_dict, download=True):
def can_merge():
@@ -1725,7 +1780,7 @@ class YoutubeDL(object):
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
'video': self.params.get('allow_multiple_video_streams', False)}
- check_formats = self.params.get('check_formats')
+ check_formats = self.params.get('check_formats') == 'selected'
def _parse_filter(tokens):
filter_parts = []
@@ -1882,6 +1937,7 @@ class YoutubeDL(object):
'height': the_only_video.get('height'),
'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
'fps': the_only_video.get('fps'),
+ 'dynamic_range': the_only_video.get('dynamic_range'),
'vcodec': the_only_video.get('vcodec'),
'vbr': the_only_video.get('vbr'),
'stretched_ratio': the_only_video.get('stretched_ratio'),
@@ -1900,26 +1956,7 @@ class YoutubeDL(object):
if not check_formats:
yield from formats
return
- for f in formats:
- self.to_screen('[info] Testing format %s' % f['format_id'])
- temp_file = tempfile.NamedTemporaryFile(
- suffix='.tmp', delete=False,
- dir=self.get_output_path('temp') or None)
- temp_file.close()
- try:
- success, _ = self.dl(temp_file.name, f, test=True)
- except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
- success = False
- finally:
- if os.path.exists(temp_file.name):
- try:
- os.remove(temp_file.name)
- except OSError:
- self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
- if success:
- yield f
- else:
- self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+ yield from self._check_formats(formats)
def _build_selector_function(selector):
if isinstance(selector, list): # ,
@@ -2076,42 +2113,45 @@ class YoutubeDL(object):
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
+ def _sort_thumbnails(self, thumbnails):
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '',
+ t.get('url')))
+
def _sanitize_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')
if thumbnails is None:
thumbnail = info_dict.get('thumbnail')
if thumbnail:
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
- if thumbnails:
- thumbnails.sort(key=lambda t: (
- t.get('preference') if t.get('preference') is not None else -1,
- t.get('width') if t.get('width') is not None else -1,
- t.get('height') if t.get('height') is not None else -1,
- t.get('id') if t.get('id') is not None else '',
- t.get('url')))
-
- def thumbnail_tester():
- def test_thumbnail(t):
- self.to_screen(f'[info] Testing thumbnail {t["id"]}')
- try:
- self.urlopen(HEADRequest(t['url']))
- except network_exceptions as err:
- self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
- return False
- return True
- return test_thumbnail
-
- for i, t in enumerate(thumbnails):
- if t.get('id') is None:
- t['id'] = '%d' % i
- if t.get('width') and t.get('height'):
- t['resolution'] = '%dx%d' % (t['width'], t['height'])
- t['url'] = sanitize_url(t['url'])
-
- if self.params.get('check_formats'):
- info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
- else:
- info_dict['thumbnails'] = thumbnails
+ if not thumbnails:
+ return
+
+ def check_thumbnails(thumbnails):
+ for t in thumbnails:
+ self.to_screen(f'[info] Testing thumbnail {t["id"]}')
+ try:
+ self.urlopen(HEADRequest(t['url']))
+ except network_exceptions as err:
+ self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
+ continue
+ yield t
+
+ self._sort_thumbnails(thumbnails)
+ for i, t in enumerate(thumbnails):
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ t['url'] = sanitize_url(t['url'])
+
+ if self.params.get('check_formats') is True:
+ info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
+ else:
+ info_dict['thumbnails'] = thumbnails
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
@@ -2217,7 +2257,6 @@ class YoutubeDL(object):
info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], subtitles, automatic_captions)
- # We now pick which formats have to be downloaded
if info_dict.get('formats') is None:
# There's only one format available
formats = [info_dict]
@@ -2289,6 +2328,10 @@ class YoutubeDL(object):
format['resolution'] = self.format_resolution(format, default=None)
if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
format['dynamic_range'] = 'SDR'
+ if (info_dict.get('duration') and format.get('tbr')
+ and not format.get('filesize') and not format.get('filesize_approx')):
+ format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
+
# Add HTTP headers, so that external programs can use them from the
# json output
full_format_info = info_dict.copy()
@@ -2300,6 +2343,9 @@ class YoutubeDL(object):
# TODO Central sorting goes here
+ if self.params.get('check_formats') is True:
+ formats = LazyList(self._check_formats(formats[::-1])).reverse()
+
if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
@@ -2380,7 +2426,7 @@ class YoutubeDL(object):
new_info['__original_infodict'] = info_dict
new_info.update(fmt)
self.process_info(new_info)
- # We update the info dict with the best quality format (backwards compatibility)
+ # We update the info dict with the selected best quality format (backwards compatibility)
if formats_to_download:
info_dict.update(formats_to_download[-1])
return info_dict
@@ -2617,53 +2663,41 @@ class YoutubeDL(object):
return
# Write internet shortcut files
- url_link = webloc_link = desktop_link = False
- if self.params.get('writelink', False):
- if sys.platform == "darwin": # macOS.
- webloc_link = True
- elif sys.platform.startswith("linux"):
- desktop_link = True
- else: # if sys.platform in ['win32', 'cygwin']:
- url_link = True
- if self.params.get('writeurllink', False):
- url_link = True
- if self.params.get('writewebloclink', False):
- webloc_link = True
- if self.params.get('writedesktoplink', False):
- desktop_link = True
-
- if url_link or webloc_link or desktop_link:
+ def _write_link_file(link_type):
if 'webpage_url' not in info_dict:
self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
- return
- ascii_url = iri_to_uri(info_dict['webpage_url'])
-
- def _write_link_file(extension, template, newline, embed_filename):
- linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
+ return False
+ linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
- self.to_screen('[info] Internet shortcut is already present')
- else:
- try:
- self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
- with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
- template_vars = {'url': ascii_url}
- if embed_filename:
- template_vars['filename'] = linkfn[:-(len(extension) + 1)]
- linkfile.write(template % template_vars)
- except (OSError, IOError):
- self.report_error('Cannot write internet shortcut ' + linkfn)
- return False
+ self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
+ return True
+ try:
+ self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
+ with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
+ newline='\r\n' if link_type == 'url' else '\n') as linkfile:
+ template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
+ if link_type == 'desktop':
+ template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
+ linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
+ except (OSError, IOError):
+ self.report_error(f'Cannot write internet shortcut {linkfn}')
+ return False
return True
- if url_link:
- if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
- return
- if webloc_link:
- if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
- return
- if desktop_link:
- if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
- return
+ write_links = {
+ 'url': self.params.get('writeurllink'),
+ 'webloc': self.params.get('writewebloclink'),
+ 'desktop': self.params.get('writedesktoplink'),
+ }
+ if self.params.get('writelink'):
+ link_type = ('webloc' if sys.platform == 'darwin'
+ else 'desktop' if sys.platform.startswith('linux')
+ else 'url')
+ write_links[link_type] = True
+
+ if any(should_write and not _write_link_file(link_type)
+ for link_type, should_write in write_links.items()):
+ return
try:
info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
@@ -2915,14 +2949,8 @@ class YoutubeDL(object):
url, force_generic_extractor=self.params.get('force_generic_extractor', False))
except UnavailableVideoError:
self.report_error('unable to download video')
- except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloads reached')
- raise
- except ExistingVideoReached:
- self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
- raise
- except RejectedVideoReached:
- self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
+ except DownloadCancelled as e:
+ self.to_screen(f'[info] {e.msg}')
raise
else:
if self.params.get('dump_single_json', False):
@@ -3162,38 +3190,46 @@ class YoutubeDL(object):
res += '~' + format_bytes(fdict['filesize_approx'])
return res
+ def _list_format_headers(self, *headers):
+ if self.params.get('listformats_table', True) is not False:
+ return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
+ return headers
+
def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- new_format = (
- 'list-formats' not in self.params.get('compat_opts', [])
- and self.params.get('listformats_table', True) is not False)
+ new_format = self.params.get('listformats_table', True) is not False
if new_format:
+ tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
+ vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
+ abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
+ delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
table = [
[
- format_field(f, 'format_id'),
+ self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
format_field(f, 'ext'),
self.format_resolution(f),
format_field(f, 'fps', '%d'),
format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
- '|',
+ delim,
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
- format_field(f, 'tbr', '%4dk'),
+ format_field(f, 'tbr', f'%{tbr_digits}dk'),
shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
- '|',
+ delim,
format_field(f, 'vcodec', default='unknown').replace('none', ''),
- format_field(f, 'vbr', '%4dk'),
+ format_field(f, 'vbr', f'%{vbr_digits}dk'),
format_field(f, 'acodec', default='unknown').replace('none', ''),
- format_field(f, 'abr', '%3dk'),
+ format_field(f, 'abr', f'%{abr_digits}dk'),
format_field(f, 'asr', '%5dHz'),
', '.join(filter(None, (
- 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
+ self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '',
format_field(f, 'language', '[%s]'),
format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))),
))),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
- header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO',
- '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
+ header_line = self._list_format_headers(
+ 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO',
+ delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
else:
table = [
[
@@ -3208,7 +3244,10 @@ class YoutubeDL(object):
self.to_screen(
'[info] Available formats for %s:' % info_dict['id'])
self.to_stdout(render_table(
- header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
+ header_line, table,
+ extraGap=(0 if new_format else 1),
+ hideEmpty=new_format,
+ delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
def list_thumbnails(self, info_dict):
thumbnails = list(info_dict.get('thumbnails'))
@@ -3219,7 +3258,7 @@ class YoutubeDL(object):
self.to_screen(
'[info] Thumbnails for %s:' % info_dict['id'])
self.to_stdout(render_table(
- ['ID', 'width', 'height', 'URL'],
+ self._list_format_headers('ID', 'Width', 'Height', 'URL'),
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
def list_subtitles(self, video_id, subtitles, name='subtitles'):
@@ -3236,7 +3275,7 @@ class YoutubeDL(object):
return [lang, ', '.join(names), ', '.join(exts)]
self.to_stdout(render_table(
- ['Language', 'Name', 'Formats'],
+ self._list_format_headers('Language', 'Name', 'Formats'),
[_row(lang, formats) for lang, formats in subtitles.items()],
hideEmpty=True))
@@ -3249,31 +3288,40 @@ class YoutubeDL(object):
def print_debug_header(self):
if not self.params.get('verbose'):
return
- get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
- encoding_str = (
- '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % (
- locale.getpreferredencoding(),
- sys.getfilesystemencoding(),
- get_encoding(self._screen_file), get_encoding(self._err_file),
- self.get_encoding()))
+
+ def get_encoding(stream):
+ ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
+ if not supports_terminal_sequences(stream):
+ ret += ' (No ANSI)'
+ return ret
+
+ encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
+ locale.getpreferredencoding(),
+ sys.getfilesystemencoding(),
+ get_encoding(self._screen_file), get_encoding(self._err_file),
+ self.get_encoding())
logger = self.params.get('logger')
if logger:
write_debug = lambda msg: logger.debug(f'[debug] {msg}')
write_debug(encoding_str)
else:
- write_debug = lambda msg: self._write_string(f'[debug] {msg}')
- write_string(encoding_str, encoding=None)
-
- write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))
- if _LAZY_LOADER:
- write_debug('Lazy loading extractors enabled\n')
+ write_string(f'[debug] {encoding_str}\n', encoding=None)
+ write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
+
+ source = detect_variant()
+ write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
+ if not _LAZY_LOADER:
+ if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ write_debug('Lazy loading extractors is forcibly disabled')
+ else:
+ write_debug('Lazy loading extractors is disabled')
if plugin_extractors or plugin_postprocessors:
- write_debug('Plugins: %s\n' % [
+ write_debug('Plugins: %s' % [
'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
if self.params.get('compat_opts'):
- write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
+ write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
try:
sp = Popen(
['git', 'rev-parse', '--short', 'HEAD'],
@@ -3282,7 +3330,7 @@ class YoutubeDL(object):
out, err = sp.communicate_or_kill()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- write_debug('Git HEAD: %s\n' % out)
+ write_debug('Git HEAD: %s' % out)
except Exception:
try:
sys.exc_clear()
@@ -3295,7 +3343,7 @@ class YoutubeDL(object):
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name
- write_debug('Python version %s (%s %s) - %s\n' % (
+ write_debug('Python version %s (%s %s) - %s' % (
platform.python_version(),
python_implementation(),
platform.architecture()[0],
@@ -3307,7 +3355,7 @@ class YoutubeDL(object):
exe_str = ', '.join(
f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
) or 'none'
- write_debug('exe versions: %s\n' % exe_str)
+ write_debug('exe versions: %s' % exe_str)
from .downloader.websocket import has_websockets
from .postprocessor.embedthumbnail import has_mutagen
@@ -3320,21 +3368,18 @@ class YoutubeDL(object):
SQLITE_AVAILABLE and 'sqlite',
KEYRING_AVAILABLE and 'keyring',
)))) or 'none'
- write_debug('Optional libraries: %s\n' % lib_str)
- write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % (
- supports_terminal_sequences(self._screen_file),
- supports_terminal_sequences(self._err_file)))
+ write_debug('Optional libraries: %s' % lib_str)
proxy_map = {}
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
- write_debug('Proxy map: ' + compat_str(proxy_map) + '\n')
+ write_debug(f'Proxy map: {proxy_map}')
- if self.params.get('call_home', False):
+ # Not implemented
+ if False and self.params.get('call_home'):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
- write_debug('Public IP address: %s\n' % ipaddr)
- return
+ write_debug('Public IP address: %s' % ipaddr)
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode('utf-8')
if version_tuple(latest_version) > version_tuple(__version__):
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index d8db5754f..3a4b81efd 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -119,10 +119,10 @@ def _real_main(argv=None):
desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
if desc is False:
continue
- if hasattr(ie, 'SEARCH_KEY'):
+ if getattr(ie, 'SEARCH_KEY', None) is not None:
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
_COUNTS = ('', '5', '10', 'all')
- desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
+ desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")'
write_string(desc + '\n', out=sys.stdout)
sys.exit(0)
if opts.ap_list_mso:
@@ -256,6 +256,9 @@ def _real_main(argv=None):
compat_opts = opts.compat_opts
+ def report_conflict(arg1, arg2):
+ warnings.append(f'{arg2} is ignored since {arg1} was given')
+
def _unused_compat_opt(name):
if name not in compat_opts:
return False
@@ -287,10 +290,14 @@ def _real_main(argv=None):
if _video_multistreams_set is False and _audio_multistreams_set is False:
_unused_compat_opt('multistreams')
outtmpl_default = opts.outtmpl.get('default')
+ if opts.useid:
+ if outtmpl_default is None:
+ outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s'
+ else:
+ report_conflict('--output', '--id')
if 'filename' in compat_opts:
if outtmpl_default is None:
- outtmpl_default = '%(title)s-%(id)s.%(ext)s'
- opts.outtmpl.update({'default': outtmpl_default})
+ outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s'
else:
_unused_compat_opt('filename')
@@ -363,9 +370,6 @@ def _real_main(argv=None):
opts.addchapters = True
opts.remove_chapters = opts.remove_chapters or []
- def report_conflict(arg1, arg2):
- warnings.append('%s is ignored since %s was given' % (arg2, arg1))
-
if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False:
if opts.sponskrub:
if opts.remove_chapters:
@@ -738,7 +742,7 @@ def _real_main(argv=None):
'geo_bypass': opts.geo_bypass,
'geo_bypass_country': opts.geo_bypass_country,
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
- 'warnings': warnings,
+ '_warnings': warnings,
'compat_opts': compat_opts,
}
diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py
index b107b2114..8508f1465 100644
--- a/yt_dlp/compat.py
+++ b/yt_dlp/compat.py
@@ -19,6 +19,7 @@ import shlex
import shutil
import socket
import struct
+import subprocess
import sys
import tokenize
import urllib
@@ -162,7 +163,9 @@ except ImportError:
def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075
if compat_os_name != 'nt':
return
- os.system('')
+ startupinfo = subprocess.STARTUPINFO()
+ startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+ subprocess.Popen('', shell=True, startupinfo=startupinfo)
# Deprecated
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 5f7fdf584..c9ae9b6db 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -620,7 +620,7 @@ def _get_windows_v10_key(browser_root, logger):
if path is None:
logger.error('could not find local state file')
return None
- with open(path, 'r') as f:
+ with open(path, 'r', encoding='utf8') as f:
data = json.load(f)
try:
base64_key = data['os_crypt']['encrypted_key']
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py
index c345f3148..a9d1471f8 100644
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -370,7 +370,8 @@ class FragmentFD(FileDownloader):
if max_progress == 1:
return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
max_workers = self.params.get('concurrent_fragment_downloads', max_progress)
- self._prepare_multiline_status(max_progress)
+ if max_progress > 1:
+ self._prepare_multiline_status(max_progress)
def thread_func(idx, ctx, fragments, info_dict, tpe):
ctx['max_progress'] = max_progress
diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py
index 198c4ae17..b35484246 100644
--- a/yt_dlp/extractor/__init__.py
+++ b/yt_dlp/extractor/__init__.py
@@ -1,14 +1,15 @@
-from __future__ import unicode_literals
+import os
from ..utils import load_plugins
-try:
- from .lazy_extractors import *
- from .lazy_extractors import _ALL_CLASSES
- _LAZY_LOADER = True
- _PLUGIN_CLASSES = {}
-except ImportError:
- _LAZY_LOADER = False
+_LAZY_LOADER = False
+if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
+ try:
+ from .lazy_extractors import *
+ from .lazy_extractors import _ALL_CLASSES
+ _LAZY_LOADER = True
+ except ImportError:
+ pass
if not _LAZY_LOADER:
from .extractors import *
@@ -19,8 +20,8 @@ if not _LAZY_LOADER:
]
_ALL_CLASSES.append(GenericIE)
- _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
- _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
+_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals())
+_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES
def gen_extractor_classes():
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index 9378c33cd..bebcafa6b 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -39,8 +39,8 @@ MSO_INFO = {
},
'RCN': {
'name': 'RCN',
- 'username_field': 'UserName',
- 'password_field': 'UserPassword',
+ 'username_field': 'username',
+ 'password_field': 'password',
},
'Rogers': {
'name': 'Rogers',
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index d6c77e418..483f93d67 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -376,8 +376,10 @@ class BiliBiliIE(InfoExtractor):
replies = traverse_obj(
self._download_json(
f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
- video_id, note=f'Extracting comments from page {idx}'),
- ('data', 'replies')) or []
+ video_id, note=f'Extracting comments from page {idx}', fatal=False),
+ ('data', 'replies'))
+ if not replies:
+ return
for children in map(self._get_all_children, replies):
yield from children
@@ -566,7 +568,7 @@ class BilibiliCategoryIE(InfoExtractor):
class BiliBiliSearchIE(SearchInfoExtractor):
- IE_DESC = 'Bilibili video search, "bilisearch" keyword'
+ IE_DESC = 'Bilibili video search'
_MAX_RESULTS = 100000
_SEARCH_KEY = 'bilisearch'
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 5e4526c53..4fcf2a9c1 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -2,6 +2,9 @@
from __future__ import unicode_literals
import re
+import json
+import base64
+import time
from .common import InfoExtractor
from ..compat import (
@@ -244,37 +247,96 @@ class CBCGemIE(InfoExtractor):
'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}]
- _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/assets/'
+
+ _GEO_COUNTRIES = ['CA']
+ _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
+ _NETRC_MACHINE = 'cbcgem'
+ _claims_token = None
+
+ def _new_claims_token(self, email, password):
+ data = json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode()
+ headers = {'content-type': 'application/json'}
+ query = {'apikey': self._TOKEN_API_KEY}
+ resp = self._download_json('https://api.loginradius.com/identity/v2/auth/login',
+ None, data=data, headers=headers, query=query)
+ access_token = resp['access_token']
+
+ query = {
+ 'access_token': access_token,
+ 'apikey': self._TOKEN_API_KEY,
+ 'jwtapp': 'jwt',
+ }
+ resp = self._download_json('https://cloud-api.loginradius.com/sso/jwt/api/token',
+ None, headers=headers, query=query)
+ sig = resp['signature']
+
+ data = json.dumps({'jwt': sig}).encode()
+ headers = {'content-type': 'application/json', 'ott-device-type': 'web'}
+ resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token',
+ None, data=data, headers=headers)
+ cbc_access_token = resp['accessToken']
+
+ headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token}
+ resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
+ None, headers=headers)
+ return resp['claimsToken']
+
+ def _get_claims_token_expiry(self):
+ # Token is a JWT
+ # JWT is decoded here and 'exp' field is extracted
+ # It is a Unix timestamp for when the token expires
+ b64_data = self._claims_token.split('.')[1]
+ data = base64.urlsafe_b64decode(b64_data + "==")
+ return json.loads(data)['exp']
+
+ def claims_token_expired(self):
+ exp = self._get_claims_token_expiry()
+ if exp - time.time() < 10:
+ # It will expire in less than 10 seconds, or has already expired
+ return True
+ return False
+
+ def claims_token_valid(self):
+ return self._claims_token is not None and not self.claims_token_expired()
+
+ def _get_claims_token(self, email, password):
+ if not self.claims_token_valid():
+ self._claims_token = self._new_claims_token(email, password)
+ self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token)
+ return self._claims_token
+
+ def _real_initialize(self):
+ if self.claims_token_valid():
+ return
+ self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token')
def _real_extract(self, url):
video_id = self._match_id(url)
- video_info = self._download_json(self._API_BASE + video_id, video_id)
-
- last_error = None
- attempt = -1
- retries = self.get_param('extractor_retries', 15)
- while attempt < retries:
- attempt += 1
- if last_error:
- self.report_warning('%s. Retrying ...' % last_error)
- m3u8_info = self._download_json(
- video_info['playSession']['url'], video_id,
- note='Downloading JSON metadata%s' % f' (attempt {attempt})')
- m3u8_url = m3u8_info.get('url')
- if m3u8_url:
- break
- elif m3u8_info.get('errorCode') == 1:
- self.raise_geo_restricted(countries=['CA'])
- else:
- last_error = f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}'
- # 35 means media unavailable, but retries work
- if m3u8_info.get('errorCode') != 35 or attempt >= retries:
- raise ExtractorError(last_error)
+ video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id)
+
+ email, password = self._get_login_info()
+ if email and password:
+ claims_token = self._get_claims_token(email, password)
+ headers = {'x-claims-token': claims_token}
+ else:
+ headers = {}
+ m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
+ m3u8_url = m3u8_info.get('url')
+
+ if m3u8_info.get('errorCode') == 1:
+ self.raise_geo_restricted(countries=['CA'])
+ elif m3u8_info.get('errorCode') == 35:
+ self.raise_login_required(method='password')
+ elif m3u8_info.get('errorCode') != 0:
+ raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
self._remove_duplicate_formats(formats)
- for i, format in enumerate(formats):
+ for format in formats:
if format.get('vcodec') == 'none':
if format.get('ext') is None:
format['ext'] = 'm4a'
@@ -377,7 +439,7 @@ class CBCGemPlaylistIE(InfoExtractor):
class CBCGemLiveIE(InfoExtractor):
IE_NAME = 'gem.cbc.ca:live'
- _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>[0-9]{12})'
+ _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)'
_TEST = {
'url': 'https://gem.cbc.ca/live/920604739687',
'info_dict': {
@@ -396,21 +458,21 @@ class CBCGemLiveIE(InfoExtractor):
# It's unclear where the chars at the end come from, but they appear to be
# constant. Might need updating in the future.
- _API = 'https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT'
+ # There are two URLs, some livestreams are in one, and some
+ # in the other. The JSON schema is the same for both.
+ _API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT']
def _real_extract(self, url):
video_id = self._match_id(url)
- live_info = self._download_json(self._API, video_id)['entries']
- video_info = None
- for stream in live_info:
- if stream.get('guid') == video_id:
- video_info = stream
-
- if video_info is None:
- raise ExtractorError(
- 'Couldn\'t find video metadata, maybe this livestream is now offline',
- expected=True)
+ for api_url in self._API_URLS:
+ video_info = next((
+ stream for stream in self._download_json(api_url, video_id)['entries']
+ if stream.get('guid') == video_id), None)
+ if video_info:
+ break
+ else:
+ raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
return {
'_type': 'url_transparent',
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index e00d8c42b..aa98c0cc9 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -74,6 +74,7 @@ from ..utils import (
strip_or_none,
traverse_obj,
unescapeHTML,
+ UnsupportedError,
unified_strdate,
unified_timestamp,
update_Request,
@@ -448,7 +449,9 @@ class InfoExtractor(object):
}
def __init__(self, downloader=None):
- """Constructor. Receives an optional downloader."""
+ """Constructor. Receives an optional downloader (a YoutubeDL instance).
+ If a downloader is not passed during initialization,
+ it must be set using "set_downloader()" before "extract()" is called"""
self._ready = False
self._x_forwarded_for_ip = None
self._printed_messages = set()
@@ -602,10 +605,19 @@ class InfoExtractor(object):
if self.__maybe_fake_ip_and_retry(e.countries):
continue
raise
+ except UnsupportedError:
+ raise
except ExtractorError as e:
- video_id = e.video_id or self.get_temp_id(url)
- raise ExtractorError(
- e.msg, video_id=video_id, ie=self.IE_NAME, tb=e.traceback, expected=e.expected, cause=e.cause)
+ kwargs = {
+ 'video_id': e.video_id or self.get_temp_id(url),
+ 'ie': self.IE_NAME,
+ 'tb': e.traceback,
+ 'expected': e.expected,
+ 'cause': e.cause
+ }
+ if hasattr(e, 'countries'):
+ kwargs['countries'] = e.countries
+ raise type(e)(e.msg, **kwargs)
except compat_http_client.IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e:
@@ -664,7 +676,7 @@ class InfoExtractor(object):
See _download_webpage docstring for arguments specification.
"""
if not self._downloader._first_webpage_request:
- sleep_interval = float_or_none(self.get_param('sleep_interval_requests')) or 0
+ sleep_interval = self.get_param('sleep_interval_requests') or 0
if sleep_interval > 0:
self.to_screen('Sleeping %s seconds ...' % sleep_interval)
time.sleep(sleep_interval)
@@ -1137,7 +1149,7 @@ class InfoExtractor(object):
if mobj:
break
- _name = self._downloader._color_text(name, 'blue')
+ _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
if mobj:
if group is None:
@@ -1537,8 +1549,8 @@ class InfoExtractor(object):
'ie_pref': {'priority': True, 'type': 'extractor'},
'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
- 'lang': {'convert': 'ignore', 'field': 'language_preference'},
- 'quality': {'convert': 'float_none', 'default': -1},
+ 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
+ 'quality': {'convert': 'float', 'default': -1},
'filesize': {'convert': 'bytes'},
'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
'id': {'convert': 'string', 'field': 'format_id'},
@@ -1549,7 +1561,7 @@ class InfoExtractor(object):
'vbr': {'convert': 'float_none'},
'abr': {'convert': 'float_none'},
'asr': {'convert': 'float_none'},
- 'source': {'convert': 'ignore', 'field': 'source_preference'},
+ 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
@@ -3618,9 +3630,11 @@ class SearchInfoExtractor(InfoExtractor):
"""
Base class for paged search queries extractors.
They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
- Instances should define _SEARCH_KEY and _MAX_RESULTS.
+ Instances should define _SEARCH_KEY and optionally _MAX_RESULTS
"""
+ _MAX_RESULTS = float('inf')
+
@classmethod
def _make_valid_url(cls):
return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py
index eba6b73ba..e90aa1954 100644
--- a/yt_dlp/extractor/coub.py
+++ b/yt_dlp/extractor/coub.py
@@ -57,7 +57,7 @@ class CoubIE(InfoExtractor):
file_versions = coub['file_versions']
- QUALITIES = ('low', 'med', 'high')
+ QUALITIES = ('low', 'med', 'high', 'higher')
MOBILE = 'mobile'
IPHONE = 'iphone'
@@ -86,6 +86,7 @@ class CoubIE(InfoExtractor):
'format_id': '%s-%s-%s' % (HTML5, kind, quality),
'filesize': int_or_none(item.get('size')),
'vcodec': 'none' if kind == 'audio' else None,
+ 'acodec': 'none' if kind == 'video' else None,
'quality': quality_key(quality),
'source_preference': preference_key(HTML5),
})
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index e0e446b87..d62480810 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -325,7 +325,7 @@ class HGTVDeIE(DPlayIE):
class DiscoveryPlusIE(DPlayIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': {
@@ -343,6 +343,9 @@ class DiscoveryPlusIE(DPlayIE):
'episode_number': 1,
},
'skip': 'Available for Premium users',
+ }, {
+ 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers',
+ 'only_matching': True,
}]
_PRODUCT = 'dplus_us'
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index f4f817fcb..9d963ee46 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -744,7 +744,10 @@ from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaite import MediaiteIE
from .mediaklikk import MediaKlikkIE
-from .mediaset import MediasetIE
+from .mediaset import (
+ MediasetIE,
+ MediasetShowIE,
+)
from .mediasite import (
MediasiteIE,
MediasiteCatalogIE,
@@ -760,6 +763,7 @@ from .metacritic import MetacriticIE
from .mgoon import MgoonIE
from .mgtv import MGTVIE
from .miaopai import MiaoPaiIE
+from .microsoftstream import MicrosoftStreamIE
from .microsoftvirtualacademy import (
MicrosoftVirtualAcademyIE,
MicrosoftVirtualAcademyCourseIE,
@@ -792,6 +796,7 @@ from .mlb import (
MLBIE,
MLBVideoIE,
)
+from .mlssoccer import MLSSoccerIE
from .mnet import MnetIE
from .moevideo import MoeVideoIE
from .mofosex import (
@@ -1288,6 +1293,7 @@ from .skynewsarabia import (
from .skynewsau import SkyNewsAUIE
from .sky import (
SkyNewsIE,
+ SkyNewsStoryIE,
SkySportsIE,
SkySportsNewsIE,
)
@@ -1387,10 +1393,7 @@ from .svt import (
from .swrmediathek import SWRMediathekIE
from .syfy import SyfyIE
from .sztvhu import SztvHuIE
-from .tagesschau import (
- TagesschauPlayerIE,
- TagesschauIE,
-)
+from .tagesschau import TagesschauIE
from .tass import TassIE
from .tbs import TBSIE
from .tdslifeway import TDSLifewayIE
@@ -1444,6 +1447,10 @@ from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE
+from .threespeak import (
+ ThreeSpeakIE,
+ ThreeSpeakUserIE,
+)
from .threeqsdn import ThreeQSDNIE
from .tiktok import (
TikTokIE,
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 5918c8c56..0d279016b 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1188,6 +1188,21 @@ class GenericIE(InfoExtractor):
},
'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
},
+ # jwplayer with only the json URL
+ {
+ 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454',
+ 'info_dict': {
+ 'id': 'TljWkvWH',
+ 'ext': 'mp4',
+ 'upload_date': '20180306',
+ 'title': 'md5:91eb1862f6526415214f62c00b453936',
+ 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa',
+ 'timestamp': 1520367225,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
# Complex jwplayer
{
'url': 'http://www.indiedb.com/games/king-machine/videos',
@@ -3503,6 +3518,13 @@ class GenericIE(InfoExtractor):
jwplayer_data = self._find_jwplayer_data(
webpage, video_id, transform_source=js_to_json)
if jwplayer_data:
+ if isinstance(jwplayer_data.get('playlist'), str):
+ return {
+ **info_dict,
+ '_type': 'url',
+ 'ie_key': JWPlatformIE.ie_key(),
+ 'url': jwplayer_data['playlist'],
+ }
try:
info = self._parse_jwplayer_data(
jwplayer_data, video_id, require_title=False, base_url=url)
@@ -3561,8 +3583,7 @@ class GenericIE(InfoExtractor):
return info_dict
# Looking for http://schema.org/VideoObject
- json_ld = self._search_json_ld(
- webpage, video_id, default={}, expected_type='VideoObject')
+ json_ld = self._search_json_ld(webpage, video_id, default={})
if json_ld.get('url'):
return merge_dicts(json_ld, info_dict)
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 3801c7af9..ccfcddd5b 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -4,6 +4,7 @@ import itertools
import hashlib
import json
import re
+import time
from .common import InfoExtractor
from ..compat import (
@@ -20,11 +21,13 @@ from ..utils import (
try_get,
url_or_none,
variadic,
+ urlencode_postdata,
)
class InstagramIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
+ _NETRC_MACHINE = 'instagram'
_TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516',
@@ -140,6 +143,47 @@ class InstagramIE(InfoExtractor):
if mobj:
return mobj.group('link')
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_webpage = self._download_webpage(
+ 'https://www.instagram.com/accounts/login/', None,
+ note='Downloading login webpage', errnote='Failed to download login webpage')
+
+ shared_data = self._parse_json(
+ self._search_regex(
+ r'window\._sharedData\s*=\s*({.+?});',
+ login_webpage, 'shared data', default='{}'),
+ None)
+
+ login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={
+ 'Accept': '*/*',
+ 'X-IG-App-ID': '936619743392459',
+ 'X-ASBD-ID': '198387',
+ 'X-IG-WWW-Claim': '0',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-CSRFToken': shared_data['config']['csrf_token'],
+ 'X-Instagram-AJAX': shared_data['rollout_hash'],
+ 'Referer': 'https://www.instagram.com/',
+ }, data=urlencode_postdata({
+ 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}',
+ 'username': username,
+ 'queryParams': '{}',
+ 'optIntoOneTap': 'false',
+ 'stopDeletionNonce': '',
+ 'trustedDeviceRecords': '{}',
+ }))
+
+ if not login.get('authenticated'):
+ if login.get('message'):
+ raise ExtractorError(f'Unable to login: {login["message"]}')
+ raise ExtractorError('Unable to login')
+
+ def _real_initialize(self):
+ self._login()
+
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id')
@@ -147,7 +191,7 @@ class InstagramIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, video_id)
if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'):
- self.raise_login_required('You need to log in to access this content', method='cookies')
+ self.raise_login_required('You need to log in to access this content')
(media, video_url, description, thumbnail, timestamp, uploader,
uploader_id, like_count, comment_count, comments, height,
diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py
index d69782b78..6e6a3673c 100644
--- a/yt_dlp/extractor/itv.py
+++ b/yt_dlp/extractor/itv.py
@@ -220,16 +220,23 @@ class ITVIE(InfoExtractor):
class ITVBTCCIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
+ _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action',
'info_dict': {
'id': 'btcc-2019-brands-hatch-gp-race-action',
'title': 'BTCC 2019: Brands Hatch GP race action',
},
'playlist_count': 12,
- }
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s'
+ }, {
+ 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
+ 'info_dict': {
+ 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike',
+ 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32'
+ },
+ 'playlist_count': 4
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -240,15 +247,15 @@ class ITVBTCCIE(InfoExtractor):
'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)</script>', webpage, 'json_map'), playlist_id),
lambda x: x['props']['pageProps']['article']['body']['content']) or []
- # Discard empty objects
- video_ids = []
+ entries = []
for video in json_map:
- if video['data'].get('id'):
- video_ids.append(video['data']['id'])
-
- entries = [
- self.url_result(
- smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {
+ if not any(video['data'].get(attr) == 'Brightcove' for attr in ('name', 'type')):
+ continue
+ video_id = video['data']['id']
+ account_id = video['data']['accountId']
+ player_id = video['data']['playerId']
+ entries.append(self.url_result(
+ smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), {
# ITV does not like some GB IP ranges, so here are some
# IP blocks it accepts
'geo_ip_blocks': [
@@ -256,8 +263,7 @@ class ITVBTCCIE(InfoExtractor):
],
'referrer': url,
}),
- ie=BrightcoveNewIE.ie_key(), video_id=video_id)
- for video_id in video_ids]
+ ie=BrightcoveNewIE.ie_key(), video_id=video_id))
title = self._og_search_title(webpage, fatal=False)
diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py
index 26e7abc49..119b39997 100644
--- a/yt_dlp/extractor/mediaset.py
+++ b/yt_dlp/extractor/mediaset.py
@@ -1,13 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
+import functools
import re
from .theplatform import ThePlatformBaseIE
from ..utils import (
ExtractorError,
int_or_none,
+ OnDemandPagedList,
parse_qs,
+ try_get,
+ urljoin,
update_url_query,
)
@@ -212,3 +216,81 @@ class MediasetIE(ThePlatformBaseIE):
'subtitles': subtitles,
})
return info
+
+
+class MediasetShowIE(MediasetIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (?:
+ (?:fiction|programmi-tv|serie-tv)/(?:.+?/)?
+ (?:[a-z]+)_SE(?P<id>\d{12})
+ (?:,ST(?P<st>\d{12}))?
+ (?:,sb(?P<sb>\d{9}))?$
+ )
+ )
+ '''
+ _TESTS = [{
+ # TV Show webpage (with a single playlist)
+ 'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556',
+ 'info_dict': {
+ 'id': '000000001556',
+ 'title': 'Fire Force',
+ },
+ 'playlist_count': 1,
+ }, {
+ # TV Show webpage (with multiple playlists)
+ 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763',
+ 'info_dict': {
+ 'id': '000000002763',
+ 'title': 'Le Iene',
+ },
+ 'playlist_count': 7,
+ }, {
+ # TV Show specific playlist (single page)
+ 'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556,ST000000002738,sb100013107',
+ 'info_dict': {
+ 'id': '100013107',
+ 'title': 'Episodi',
+ },
+ 'playlist_count': 4,
+ }, {
+ # TV Show specific playlist (with multiple pages)
+ 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375',
+ 'info_dict': {
+ 'id': '100013375',
+ 'title': 'I servizi',
+ },
+ 'playlist_count': 53,
+ }]
+
+ _BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d'
+ _PAGE_SIZE = 25
+
+ def _fetch_page(self, sb, page):
+ lower_limit = page * self._PAGE_SIZE + 1
+ upper_limit = lower_limit + self._PAGE_SIZE - 1
+ content = self._download_json(
+ self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb)
+ for entry in content.get('entries') or []:
+ yield self.url_result(
+ 'mediaset:' + entry['guid'],
+ playlist_title=entry['mediasetprogram$subBrandDescription'])
+
+ def _real_extract(self, url):
+ playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb')
+ if not sb:
+ page = self._download_webpage(url, playlist_id)
+ entries = [self.url_result(urljoin('https://www.mediasetplay.mediaset.it', url))
+ for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)]
+ title = (self._html_search_regex(r'(?s)<h1[^>]*>(.+?)</h1>', page, 'title', default=None)
+ or self._og_search_title(page))
+ return self.playlist_result(entries, st or playlist_id, title)
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, sb),
+ self._PAGE_SIZE)
+ title = try_get(entries, lambda x: x[0]['playlist_title'])
+
+ return self.playlist_result(entries, sb, title)
diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py
new file mode 100644
index 000000000..4d5a9df1f
--- /dev/null
+++ b/yt_dlp/extractor/microsoftstream.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from base64 import b64decode
+
+from .common import InfoExtractor
+from ..utils import (
+ merge_dicts,
+ parse_iso8601,
+ parse_duration,
+ parse_resolution,
+ try_get,
+ url_basename,
+)
+
+
+class MicrosoftStreamIE(InfoExtractor):
+ IE_NAME = 'microsoftstream'
+ IE_DESC = 'Microsoft Stream'
+ _VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+
+ _TESTS = [{
+ 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca',
+ 'only_matching': True,
+ }]
+
+ def _get_all_subtitles(self, api_url, video_id, headers):
+ subtitles = {}
+ automatic_captions = {}
+ text_tracks = self._download_json(
+ f'{api_url}/videos/{video_id}/texttracks', video_id,
+ note='Downloading subtitles JSON', fatal=False, headers=headers,
+ query={'api-version': '1.4-private'}).get('value') or []
+ for track in text_tracks:
+ if not track.get('language') or not track.get('url'):
+ continue
+ sub_dict = automatic_captions if track.get('autoGenerated') else subtitles
+ sub_dict.setdefault(track['language'], []).append({
+ 'ext': 'vtt',
+ 'url': track.get('url')
+ })
+ return {
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions
+ }
+
+ def extract_all_subtitles(self, *args, **kwargs):
+ if (self.get_param('writesubtitles', False)
+ or self.get_param('writeautomaticsub', False)
+ or self.get_param('listsubtitles')):
+ return self._get_all_subtitles(*args, **kwargs)
+ return {}
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ if '<title>Microsoft Stream</title>' not in webpage:
+ self.raise_login_required(method='cookies')
+
+ access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token')
+ api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url')
+
+ headers = {'Authorization': f'Bearer {access_token}'}
+
+ video_data = self._download_json(
+ f'{api_url}/videos/{video_id}', video_id,
+ headers=headers, query={
+ '$expand': 'creator,tokens,status,liveEvent,extensions',
+ 'api-version': '1.4-private'
+ })
+ video_id = video_data.get('id') or video_id
+ language = video_data.get('language')
+
+ thumbnails = []
+ for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'):
+ thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str)
+ if not thumbnail_url:
+ continue
+ thumb = {
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ }
+ thumb_name = url_basename(thumbnail_url)
+ thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
+ thumb.update(parse_resolution(thumb_name))
+ thumbnails.append(thumb)
+
+ formats = []
+ for playlist in video_data['playbackUrls']:
+ if playlist['mimeType'] == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ playlist['playbackUrl'], video_id,
+ ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False, headers=headers))
+ elif playlist['mimeType'] == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ playlist['playbackUrl'], video_id, mpd_id='dash',
+ fatal=False, headers=headers))
+ elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml':
+ formats.extend(self._extract_ism_formats(
+ playlist['playbackUrl'], video_id, ism_id='mss',
+ fatal=False, headers=headers))
+ formats = [merge_dicts(f, {'language': language}) for f in formats]
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_data['name'],
+ 'description': video_data.get('description'),
+ 'uploader': try_get(video_data, lambda x: x['creator']['name'], str),
+ 'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'],
+ lambda x: x['creator']['id']), str),
+ 'thumbnails': thumbnails,
+ **self.extract_all_subtitles(api_url, video_id, headers),
+ 'timestamp': parse_iso8601(video_data.get('created')),
+ 'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])),
+ 'webpage_url': f'https://web.microsoftstream.com/video/{video_id}',
+ 'view_count': try_get(video_data, lambda x: x['metrics']['views'], int),
+ 'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int),
+ 'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py
new file mode 100644
index 000000000..2d65787e2
--- /dev/null
+++ b/yt_dlp/extractor/mlssoccer.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MLSSoccerIE(InfoExtractor):
+ _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)'
+ _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
+
+ _TESTS = [{
+ 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986',
+ 'info_dict': {
+ 'id': '6276033198001',
+ 'ext': 'mp4',
+ 'title': 'The Octagon | Can Alphonso Davies lead Canada to first World Cup since 1986?',
+ 'description': 'md5:f0a883ee33592a0221798f451a98be8f',
+ 'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/static/5530036772001/1bbc44f6-c63c-4981-82fa-46b0c1f891e0/5c1ca44a-a033-4e98-b531-ff24c4947608/160x90/match/image.jpg',
+ 'duration': 350.165,
+ 'timestamp': 1633627291,
+ 'uploader_id': '5530036772001',
+ 'tags': ['club/canada'],
+ 'is_live': False,
+ 'duration_string': '5:50',
+ 'upload_date': '20211007',
+ 'filesize_approx': 255193528.83200002
+ },
+ 'params': {'skip_download': True}
+ }, {
+ 'url': 'https://www.whitecapsfc.com/video/highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021#highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.torontofc.ca/video/highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733#highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.sportingkc.com/video/post-match-press-conference-john-pulskamp-oct-27-2021#post-match-press-conference-john-pulskamp-oct-27-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.soundersfc.com/video/highlights-seattle-sounders-fc-vs-sporting-kansas-city-october-23-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.sjearthquakes.com/video/#highlights-austin-fc-vs-san-jose-earthquakes-june-19-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.rsl.com/video/2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21#2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.timbers.com/video/highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose#highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.philadelphiaunion.com/video/highlights-torvphi',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.orlandocitysc.com/video/highlight-columbus-crew-vs-orlando-city-sc',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.newyorkredbulls.com/video/all-access-matchday-double-derby-week#all-access-matchday-double-derby-week',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.nycfc.com/video/highlights-nycfc-1-0-chicago-fire-fc#highlights-nycfc-1-0-chicago-fire-fc',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.revolutionsoccer.net/video/two-minute-highlights-revs-1-rapids-0-october-27-2021#two-minute-highlights-revs-1-rapids-0-october-27-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.nashvillesc.com/video/goal-c-j-sapong-nashville-sc-92nd-minute',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.cfmontreal.com/video/faits-saillants-tor-v-mtl#faits-saillants-orl-v-mtl-x5645',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.intermiamicf.com/video/all-access-victory-vs-nashville-sc-by-ukg#all-access-victory-vs-nashville-sc-by-ukg',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.lagalaxy.com/video/#moment-of-the-month-presented-by-san-manuel-casino-rayan-raveloson-scores-his-se',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.lafc.com/video/breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season#breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.houstondynamofc.com/video/postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660#postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.dcunited.com/video/tony-alfaro-my-family-pushed-me-to-believe-everything-was-possible',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.fcdallas.com/video/highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021#highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.columbuscrew.com/video/match-rewind-columbus-crew-vs-new-york-red-bulls-october-23-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.coloradorapids.com/video/postgame-reaction-robin-fraser-october-27#postgame-reaction-robin-fraser-october-27',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.fccincinnati.com/video/#keeping-cincy-chill-presented-by-coors-lite',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.chicagofirefc.com/video/all-access-fire-score-dramatic-road-win-in-cincy#all-access-fire-score-dramatic-road-win-in-cincy',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.austinfc.com/video/highlights-colorado-rapids-vs-austin-fc-september-29-2021#highlights-colorado-rapids-vs-austin-fc-september-29-2021',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.atlutd.com/video/goal-josef-martinez-scores-in-the-73rd-minute#goal-josef-martinez-scores-in-the-73rd-minute',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ data_json = self._parse_json(self._html_search_regex(r'data-options\=\"([^\"]+)\"', webpage, 'json'), id)['videoList'][0]
+ return {
+ 'id': id,
+ '_type': 'url',
+ 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (data_json['accountId'], data_json['videoId']),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py
index e0608845d..141dd7deb 100644
--- a/yt_dlp/extractor/mtv.py
+++ b/yt_dlp/extractor/mtv.py
@@ -306,6 +306,14 @@ class MTVServicesInfoExtractor(InfoExtractor):
mgid = self._extract_triforce_mgid(webpage)
if not mgid:
+ mgid = self._search_regex(
+ r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None)
+
+ if not mgid:
+ mgid = self._search_regex(
+ r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
+
+ if not mgid:
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self._extract_child_with_type(data, 'MainContainer')
@@ -313,10 +321,6 @@ class MTVServicesInfoExtractor(InfoExtractor):
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
mgid = video_player['props']['media']['video']['config']['uri']
- if not mgid:
- mgid = self._search_regex(
- r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
-
return mgid
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py
index acf53c1ff..a6821ba86 100644
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -40,6 +40,7 @@ class NaverBaseIE(InfoExtractor):
formats.append({
'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
'url': stream_url,
+ 'ext': 'mp4',
'width': int_or_none(encoding_option.get('width')),
'height': int_or_none(encoding_option.get('height')),
'vbr': int_or_none(bitrate.get('video')),
@@ -174,7 +175,7 @@ class NaverLiveIE(InfoExtractor):
'url': 'https://tv.naver.com/l/52010',
'info_dict': {
'id': '52010',
- 'ext': 'm3u8',
+ 'ext': 'mp4',
'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"',
'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3',
'channel_id': 'NTV-ytnnews24-0',
@@ -184,7 +185,7 @@ class NaverLiveIE(InfoExtractor):
'url': 'https://tv.naver.com/l/51549',
'info_dict': {
'id': '51549',
- 'ext': 'm3u8',
+ 'ext': 'mp4',
'title': '연합뉴스TV - 코로나19 뉴스특보',
'description': 'md5:c655e82091bc21e413f549c0eaccc481',
'channel_id': 'NTV-yonhapnewstv-0',
@@ -233,7 +234,7 @@ class NaverLiveIE(InfoExtractor):
continue
formats.extend(self._extract_m3u8_formats(
- quality.get('url'), video_id, 'm3u8',
+ quality.get('url'), video_id, 'mp4',
m3u8_id=quality.get('qualityId'), live=True
))
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 76f087057..4bcea33d5 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -704,7 +704,6 @@ class NicovideoSearchURLIE(InfoExtractor):
class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE):
IE_DESC = 'Nico video searches'
- _MAX_RESULTS = float('inf')
IE_NAME = NicovideoSearchIE_NAME
_SEARCH_KEY = 'nicosearch'
_TESTS = []
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index b556bc6aa..49d58a685 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -147,7 +147,7 @@ class NRKIE(NRKBaseIE):
def _real_extract(self, url):
video_id = self._match_id(url).split('/')[-1]
- path_templ = 'playback/%s/' + video_id
+ path_templ = 'playback/%s/program/' + video_id
def call_playback_api(item, query=None):
return self._call_api(path_templ % item, video_id, item, query=query)
@@ -188,7 +188,7 @@ class NRKIE(NRKBaseIE):
title = titles['title']
alt_title = titles.get('subtitle')
- description = preplay.get('description')
+ description = try_get(preplay, lambda x: x['description'].replace('\r', '\n'))
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
thumbnails = []
diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py
index a189c0237..c7d316efc 100644
--- a/yt_dlp/extractor/patreon.py
+++ b/yt_dlp/extractor/patreon.py
@@ -161,7 +161,7 @@ class PatreonIE(InfoExtractor):
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
embed_html = try_get(attributes, lambda x: x['embed']['html'])
v_url = url_or_none(compat_urllib_parse_unquote(
- self._search_regex(r'src=(https%3A%2F%2Fplayer\.vimeo\.com.+)%3F', embed_html, 'vimeo url', fatal=False)))
+ self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
if v_url:
info.update({
'_type': 'url_transparent',
diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py
index ff2c977a0..ad1e62d88 100644
--- a/yt_dlp/extractor/sky.py
+++ b/yt_dlp/extractor/sky.py
@@ -105,6 +105,34 @@ class SkyNewsIE(SkyBaseIE):
}
+class SkyNewsStoryIE(SkyBaseIE):
+ IE_NAME = 'sky:news:story'
+ _VALID_URL = r'https?://news\.sky\.com/story/[0-9a-z-]+-(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://news.sky.com/story/budget-2021-chancellor-rishi-sunak-vows-address-will-deliver-strong-economy-fit-for-a-new-age-of-optimism-12445425',
+ 'info_dict': {
+ 'id': 'ref:0714acb9-123d-42c8-91b8-5c1bc6c73f20',
+ 'title': 'md5:e408dd7aad63f31a1817bbe40c7d276f',
+ 'description': 'md5:a881e12f49212f92be2befe4a09d288a',
+ 'ext': 'mp4',
+ 'upload_date': '20211027',
+ 'timestamp': 1635317494,
+ 'uploader_id': '6058004172001',
+ }
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+ webpage = self._download_webpage(url, article_id)
+
+ entries = [self._process_ooyala_element(webpage, sdc_el, url)
+ for sdc_el in re.findall(self._SDC_EL_REGEX, webpage)]
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage),
+ self._html_search_meta(['og:description', 'description'], webpage))
+
+
class SkySportsNewsIE(SkyBaseIE):
IE_NAME = 'sky:sports:news'
_VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P<id>\d+)'
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index e89383ff1..824528474 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -856,7 +856,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
- _MAX_RESULTS = float('inf')
+ _SEARCH_KEY = 'scsearch'
_TESTS = [{
'url': 'scsearch15:post-avant jazzcore',
'info_dict': {
@@ -865,7 +865,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
'playlist_count': 15,
}]
- _SEARCH_KEY = 'scsearch'
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py
index 25c200455..6e03d0a7d 100644
--- a/yt_dlp/extractor/tagesschau.py
+++ b/yt_dlp/extractor/tagesschau.py
@@ -5,177 +5,63 @@ import re
from .common import InfoExtractor
from ..utils import (
- determine_ext,
js_to_json,
- parse_iso8601,
- parse_filesize,
+ extract_attributes,
+ try_get,
+ int_or_none,
)
-class TagesschauPlayerIE(InfoExtractor):
- IE_NAME = 'tagesschau:player'
- _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html'
-
- _TESTS = [{
- 'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html',
- 'md5': '8d09548d5c15debad38bee3a4d15ca21',
- 'info_dict': {
- 'id': '179517',
- 'ext': 'mp4',
- 'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD',
- 'thumbnail': r're:^https?:.*\.jpg$',
- 'formats': 'mincount:6',
- },
- }, {
- 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
- 'md5': '76e6eec6ebd40740671cf0a2c88617e5',
- 'info_dict': {
- 'id': '29417',
- 'ext': 'mp3',
- 'title': 'Trabi - Bye, bye Rennpappe',
- 'thumbnail': r're:^https?:.*\.jpg$',
- 'formats': 'mincount:2',
- },
- }, {
- 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html',
- 'only_matching': True,
- }]
-
- _FORMATS = {
- 'xs': {'quality': 0},
- 's': {'width': 320, 'height': 180, 'quality': 1},
- 'm': {'width': 512, 'height': 288, 'quality': 2},
- 'l': {'width': 960, 'height': 540, 'quality': 3},
- 'xl': {'width': 1280, 'height': 720, 'quality': 4},
- 'xxl': {'quality': 5},
- }
-
- def _extract_via_api(self, kind, video_id):
- info = self._download_json(
- 'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id),
- video_id)
- title = info['headline']
- formats = []
- for media in info['mediadata']:
- for format_id, format_url in media.items():
- if determine_ext(format_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls'))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'vcodec': 'none' if kind == 'audio' else None,
- })
- self._sort_formats(formats)
- timestamp = parse_iso8601(info.get('date'))
- return {
- 'id': video_id,
- 'title': title,
- 'timestamp': timestamp,
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- mobj = self._match_valid_url(url)
- video_id = mobj.group('id')
-
- # kind = mobj.group('kind').lower()
- # if kind == 'video':
- # return self._extract_via_api(kind, video_id)
-
- # JSON api does not provide some audio formats (e.g. ogg) thus
- # extracting audio via webpage
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._og_search_title(webpage).strip()
- formats = []
-
- for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage):
- media = self._parse_json(js_to_json(media_json), video_id, fatal=False)
- if not media:
- continue
- src = media.get('src')
- if not src:
- return
- quality = media.get('quality')
- kind = media.get('type', '').split('/')[0]
- ext = determine_ext(src)
- f = {
- 'url': src,
- 'format_id': '%s_%s' % (quality, ext) if quality else ext,
- 'ext': ext,
- 'vcodec': 'none' if kind == 'audio' else None,
- }
- f.update(self._FORMATS.get(quality, {}))
- formats.append(f)
-
- self._sort_formats(formats)
-
- thumbnail = self._og_search_thumbnail(webpage)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
-
-
class TagesschauIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html'
_TESTS = [{
'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html',
- 'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6',
+ 'md5': '7a7287612fa881a1ae1d087df45c2fd6',
'info_dict': {
- 'id': 'video-102143',
+ 'id': 'video-102143-1',
'ext': 'mp4',
'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt',
- 'description': '18.07.2015 20:10 Uhr',
- 'thumbnail': r're:^https?:.*\.jpg$',
},
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html',
'md5': '3c54c1f6243d279b706bde660ceec633',
'info_dict': {
- 'id': 'ts-5727',
+ 'id': 'ts-5727-1',
'ext': 'mp4',
- 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr',
- 'description': 'md5:695c01bfd98b7e313c501386327aea59',
- 'thumbnail': r're:^https?:.*\.jpg$',
+ 'title': 'Ganze Sendung',
},
}, {
# exclusive audio
'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html',
- 'md5': '76e6eec6ebd40740671cf0a2c88617e5',
+ 'md5': '4cf22023c285f35e99c24d290ba58cc9',
'info_dict': {
- 'id': 'audio-29417',
+ 'id': 'audio-29417-1',
'ext': 'mp3',
- 'title': 'Trabi - Bye, bye Rennpappe',
- 'description': 'md5:8687dda862cbbe2cfb2df09b56341317',
- 'thumbnail': r're:^https?:.*\.jpg$',
+ 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
},
}, {
- # audio in article
'url': 'http://www.tagesschau.de/inland/bnd-303.html',
- 'md5': 'e0916c623e85fc1d2b26b78f299d3958',
+ 'md5': '12cfb212d9325b5ba0d52b625f1aa61c',
'info_dict': {
- 'id': 'bnd-303',
- 'ext': 'mp3',
- 'title': 'Viele Baustellen für neuen BND-Chef',
- 'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4',
- 'thumbnail': r're:^https?:.*\.jpg$',
+ 'id': 'bnd-303-1',
+ 'ext': 'mp4',
+ 'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa',
},
}, {
'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html',
'info_dict': {
'id': 'afd-parteitag-135',
- 'title': 'Möchtegern-Underdog mit Machtanspruch',
+ 'title': 'AfD',
+ },
+ 'playlist_count': 20,
+ }, {
+ 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html',
+ 'info_dict': {
+ 'id': 'audio-29417-1',
+ 'ext': 'mp3',
+ 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt',
},
- 'playlist_count': 2,
}, {
'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html',
'only_matching': True,
@@ -206,62 +92,6 @@ class TagesschauIE(InfoExtractor):
'only_matching': True,
}]
- @classmethod
- def suitable(cls, url):
- return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url)
-
- def _extract_formats(self, download_text, media_kind):
- links = re.finditer(
- r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>',
- download_text)
- formats = []
- for l in links:
- link_url = l.group('url')
- if not link_url:
- continue
- format_id = self._search_regex(
- r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID',
- default=determine_ext(link_url))
- format = {
- 'format_id': format_id,
- 'url': l.group('url'),
- 'format_name': l.group('name'),
- }
- title = l.group('title')
- if title:
- if media_kind.lower() == 'video':
- m = re.match(
- r'''(?x)
- Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10;
- (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10;
- (?P<vbr>[0-9]+)kbps&\#10;
- Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10;
- Gr&ouml;&szlig;e:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''',
- title)
- if m:
- format.update({
- 'format_note': m.group('audio_desc'),
- 'vcodec': m.group('vcodec'),
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- 'abr': int(m.group('abr')),
- 'vbr': int(m.group('vbr')),
- 'filesize_approx': parse_filesize(m.group('filesize_approx')),
- })
- else:
- m = re.match(
- r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)',
- title)
- if m:
- format.update({
- 'format_note': '%s, %s' % (m.group('format'), m.group('note')),
- 'vcodec': 'none',
- 'abr': int(m.group('abr')),
- })
- formats.append(format)
- self._sort_formats(formats)
- return formats
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('path')
@@ -271,34 +101,46 @@ class TagesschauIE(InfoExtractor):
title = self._html_search_regex(
r'<span[^>]*class="headline"[^>]*>(.+?)</span>',
- webpage, 'title', default=None) or self._og_search_title(webpage)
-
- DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>'
-
- webpage_type = self._og_search_property('type', webpage, default=None)
- if webpage_type == 'website': # Article
- entries = []
- for num, (entry_title, media_kind, download_text) in enumerate(re.findall(
- r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX,
- webpage), 1):
+ webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False)
+
+ entries = []
+ videos = re.findall(r'<div[^>]+>', webpage)
+ num = 0
+ for video in videos:
+ video = extract_attributes(video).get('data-config')
+ if not video:
+ continue
+ video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False)
+ video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray'])
+ if not video_formats:
+ continue
+ num += 1
+ for video_format in video_formats:
+ media_url = video_format.get('_stream') or ''
+ formats = []
+ if media_url.endswith('master.m3u8'):
+ formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls')
+ elif media_url.endswith('.hi.mp3') and media_url.startswith('https://download'):
+ formats = [{
+ 'url': media_url,
+ 'vcodec': 'none',
+ }]
+ if not formats:
+ continue
entries.append({
'id': '%s-%d' % (display_id, num),
- 'title': '%s' % entry_title,
- 'formats': self._extract_formats(download_text, media_kind),
+ 'title': try_get(video, lambda x: x['mc']['_title']),
+ 'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])),
+ 'formats': formats
})
- if len(entries) > 1:
- return self.playlist_result(entries, display_id, title)
- formats = entries[0]['formats']
- else: # Assume single video
- download_text = self._search_regex(
- DOWNLOAD_REGEX, webpage, 'download links', group='links')
- media_kind = self._search_regex(
- DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind')
- formats = self._extract_formats(download_text, media_kind)
- thumbnail = self._og_search_thumbnail(webpage)
- description = self._html_search_regex(
- r'(?s)<p class="teasertext">(.*?)</p>',
- webpage, 'description', default=None)
+ if len(entries) > 1:
+ return self.playlist_result(entries, display_id, title)
+ formats = entries[0]['formats']
+ video_info = self._search_json_ld(webpage, video_id)
+ description = video_info.get('description')
+ thumbnail = self._og_search_thumbnail(webpage) or video_info.get('thumbnail')
+ timestamp = video_info.get('timestamp')
+ title = title or video_info.get('description')
self._sort_formats(formats)
@@ -307,5 +149,6 @@ class TagesschauIE(InfoExtractor):
'title': title,
'thumbnail': thumbnail,
'formats': formats,
+ 'timestamp': timestamp,
'description': description,
}
diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py
new file mode 100644
index 000000000..60e84529d
--- /dev/null
+++ b/yt_dlp/extractor/threespeak.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ try_get,
+ unified_strdate,
+)
+
+
+class ThreeSpeakIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
+
+ _TESTS = [{
+ 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
+ 'info_dict': {
+ 'id': 'wjgoxyfy',
+ 'ext': 'mp4',
+ 'title': 'Can People who took the Vax think Critically',
+ 'uploader': 'dannyshine',
+ 'description': 'md5:181aa7ccb304afafa089b5af3bca7a10',
+ 'tags': ['sex', 'covid', 'antinatalism', 'comedy', 'vaccines'],
+ 'thumbnail': 'https://img.3speakcontent.co/wjgoxyfy/thumbnails/default.png',
+ 'upload_date': '20211021',
+ 'duration': 2703.867833,
+ 'filesize': 1620054781,
+ },
+ 'params': {'skip_download': True}
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ json_str = self._html_search_regex(r'JSON\.parse\(\'([^\']+)\'\)', webpage, 'json')
+ # The json string itself is escaped. Hence the double parsing
+ data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id)
+ video_json = self._parse_json(data_json['json_metadata'], id)
+ formats, subtitles = [], {}
+ og_m3u8 = self._html_search_regex(r'<meta\s?property=\"ogvideo\"\s?content=\"([^\"]+)\">', webpage, 'og m3u8', fatal=False)
+ if og_m3u8:
+ https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https')
+ formats.extend(https_frmts)
+ subtitles = self._merge_subtitles(subtitles, https_subs)
+ ipfs_m3u8 = try_get(video_json, lambda x: x['video']['info']['ipfs'])
+ if ipfs_m3u8:
+ ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https://ipfs.3speak.tv/ipfs/{ipfs_m3u8}',
+ id, fatal=False, m3u8_id='ipfs')
+ formats.extend(ipfs_frmts)
+ subtitles = self._merge_subtitles(subtitles, ipfs_subs)
+ mp4_file = try_get(video_json, lambda x: x['video']['info']['file'])
+ if mp4_file:
+ formats.append({
+ 'url': f'https://threespeakvideo.b-cdn.net/{id}/{mp4_file}',
+ 'ext': 'mp4',
+ 'format_id': 'https-mp4',
+ 'duration': try_get(video_json, lambda x: x['video']['info']['duration']),
+ 'filesize': try_get(video_json, lambda x: x['video']['info']['filesize']),
+ 'quality': 11,
+ 'format_note': 'Original file',
+ })
+ self._sort_formats(formats)
+ return {
+ 'id': id,
+ 'title': data_json.get('title') or data_json.get('root_title'),
+ 'uploader': data_json.get('author'),
+ 'description': try_get(video_json, lambda x: x['video']['content']['description']),
+ 'tags': try_get(video_json, lambda x: x['video']['content']['tags']),
+ 'thumbnail': try_get(video_json, lambda x: x['image'][0]),
+ 'upload_date': unified_strdate(data_json.get('created')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ThreeSpeakUserIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://3speak.tv/user/theycallmedan',
+ 'info_dict': {
+ 'id': 'theycallmedan',
+ },
+ 'playlist_mincount': 115,
+ }]
+
+ def _real_extract(self, url):
+ id = self._match_id(url)
+ webpage = self._download_webpage(url, id)
+ entries = [
+ self.url_result(
+ 'https://3speak.tv/watch?v=%s' % video,
+ ie=ThreeSpeakIE.ie_key())
+ for video in re.findall(r'data-payout\s?\=\s?\"([^\"]+)\"', webpage) if video
+ ]
+ return self.playlist_result(entries, id)
diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py
index ec55f41f2..a0f0cc31c 100644
--- a/yt_dlp/extractor/trovo.py
+++ b/yt_dlp/extractor/trovo.py
@@ -223,7 +223,7 @@ class TrovoChannelBaseIE(InfoExtractor):
class TrovoChannelVodIE(TrovoChannelBaseIE):
_VALID_URL = r'trovovod:(?P<id>[^\s]+)'
- IE_DESC = 'All VODs of a trovo.live channel, "trovovod" keyword'
+ IE_DESC = 'All VODs of a trovo.live channel; "trovovod:" prefix'
_TESTS = [{
'url': 'trovovod:OneTappedYou',
@@ -244,7 +244,7 @@ class TrovoChannelVodIE(TrovoChannelBaseIE):
class TrovoChannelClipIE(TrovoChannelBaseIE):
_VALID_URL = r'trovoclip:(?P<id>[^\s]+)'
- IE_DESC = 'All Clips of a trovo.live channel, "trovoclip" keyword'
+ IE_DESC = 'All Clips of a trovo.live channel; "trovoclip:" prefix'
_TESTS = [{
'url': 'trovoclip:OneTappedYou',
diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py
index 485b781ca..0749263d9 100644
--- a/yt_dlp/extractor/twitter.py
+++ b/yt_dlp/extractor/twitter.py
@@ -485,7 +485,7 @@ class TwitterIE(TwitterBaseIE):
fmts, subs = self._extract_variant_formats(variant, twid)
subtitles = self._merge_subtitles(subtitles, subs)
formats.extend(fmts)
- self._sort_formats(formats)
+ self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown
thumbnails = []
media_url = media.get('media_url_https') or media.get('media_url')
diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py
index c3b2e863d..5b558d890 100644
--- a/yt_dlp/extractor/viewlift.py
+++ b/yt_dlp/extractor/viewlift.py
@@ -9,6 +9,7 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
+ traverse_obj,
)
@@ -32,26 +33,36 @@ class ViewLiftBaseIE(InfoExtractor):
}
_TOKENS = {}
- def _call_api(self, site, path, video_id, query):
- token = self._TOKENS.get(site)
- if not token:
- token_query = {'site': site}
- email, password = self._get_login_info(netrc_machine=site)
- if email:
- resp = self._download_json(
- self._API_BASE + 'identity/signin', video_id,
- 'Logging in', query=token_query, data=json.dumps({
- 'email': email,
- 'password': password,
- }).encode())
- else:
- resp = self._download_json(
- self._API_BASE + 'identity/anonymous-token', video_id,
- 'Downloading authorization token', query=token_query)
- self._TOKENS[site] = token = resp['authorizationToken']
- return self._download_json(
- self._API_BASE + path, video_id,
- headers={'Authorization': token}, query=query)
+ def _fetch_token(self, site, url):
+ if self._TOKENS.get(site):
+ return
+ email, password = self._get_login_info(netrc_machine=site)
+ if email:
+ self.report_warning('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
+
+ cookies = self._get_cookies(url)
+ if cookies and cookies.get('token'):
+ self._TOKENS[site] = self._search_regex(r'22authorizationToken\%22:\%22([^\%]+)\%22', cookies['token'].value, 'token')
+ if not self._TOKENS.get(site):
+ self.raise_login_required('Cookies (not necessarily logged in) are needed to download from this website', method='cookies')
+
+ def _call_api(self, site, path, video_id, url, query):
+ self._fetch_token(site, url)
+ try:
+ return self._download_json(
+ self._API_BASE + path, video_id, headers={'Authorization': self._TOKENS.get(site)}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ webpage = e.cause.read().decode()
+ try:
+ error_message = traverse_obj(json.loads(webpage), 'errorMessage', 'message')
+ except json.JSONDecodeError:
+ raise ExtractorError(f'{site} said: {webpage}', cause=e.cause)
+ if error_message:
+ if 'has not purchased' in error_message:
+ self.raise_login_required(method='cookies')
+ raise ExtractorError(error_message, expected=True)
+ raise
class ViewLiftEmbedIE(ViewLiftBaseIE):
@@ -96,27 +107,24 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
site = domain.split('.')[-2]
if site in self._SITE_MAP:
site = self._SITE_MAP[site]
- try:
- content_data = self._call_api(
- site, 'entitlement/video/status', film_id, {
- 'id': film_id
- })['video']
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
- if error_message == 'User does not have a valid subscription or has not purchased this content.':
- self.raise_login_required()
- raise ExtractorError(error_message, expected=True)
- raise
+
+ content_data = self._call_api(
+ site, 'entitlement/video/status', film_id, url, {
+ 'id': film_id
+ })['video']
gist = content_data['gist']
title = gist['title']
video_assets = content_data['streamingInfo']['videoAssets']
- formats = []
- mpeg_video_assets = video_assets.get('mpeg') or []
- for video_asset in mpeg_video_assets:
+ hls_url = video_assets.get('hls')
+ formats, subtitles = [], {}
+ if hls_url:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ for video_asset in video_assets.get('mpeg') or []:
video_asset_url = video_asset.get('url')
- if not video_asset:
+ if not video_asset_url:
continue
bitrate = int_or_none(video_asset.get('bitrate'))
height = int_or_none(self._search_regex(
@@ -130,13 +138,17 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'vcodec': video_asset.get('codec'),
})
- hls_url = video_assets.get('hls')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
+ subs = {}
+ for sub in traverse_obj(content_data, ('contentDetails', 'closedCaptions')) or []:
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subs.setdefault(sub.get('language', 'English'), []).append({
+ 'url': sub_url,
+ })
- info = {
+ self._sort_formats(formats)
+ return {
'id': film_id,
'title': title,
'description': gist.get('description'),
@@ -145,14 +157,15 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'age_limit': parse_age_limit(content_data.get('parentalRating')),
'timestamp': int_or_none(gist.get('publishDate'), 1000),
'formats': formats,
+ 'subtitles': self._merge_subtitles(subs, subtitles),
+ 'categories': traverse_obj(content_data, ('categories', ..., 'title')),
+ 'tags': traverse_obj(content_data, ('tags', ..., 'title')),
}
- for k in ('categories', 'tags'):
- info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
- return info
class ViewLiftIE(ViewLiftBaseIE):
IE_NAME = 'viewlift'
+ _API_BASE = 'https://prod-api-cached-2.viewlift.com/'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
@@ -222,24 +235,111 @@ class ViewLiftIE(ViewLiftBaseIE):
}, {
'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
'only_matching': True,
+ }, { # Free film with langauge code
+ 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka',
+ 'info_dict': {
+ 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196',
+ 'ext': 'mp4',
+ 'title': 'Shuyopoka',
+ 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211006',
+ 'series': None
+ },
+ 'params': {'skip_download': True},
+ }, { # Free film
+ 'url': 'https://www.hoichoi.tv/films/title/dadu-no1',
+ 'info_dict': {
+ 'id': '0000015b-b009-d126-a1db-b81ff3780000',
+ 'ext': 'mp4',
+ 'title': 'Dadu No.1',
+ 'description': 'md5:605cba408e51a79dafcb824bdeded51e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20210827',
+ 'series': None
+ },
+ 'params': {'skip_download': True},
+ }, { # Free episode
+ 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01',
+ 'info_dict': {
+ 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba',
+ 'ext': 'mp4',
+ 'title': 'Humans Vs. Corona',
+ 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20210830',
+ 'series': 'Case Jaundice'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free video
+ 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi',
+ 'info_dict': {
+ 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30',
+ 'ext': 'mp4',
+ 'title': 'Woman in red - Hindi',
+ 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211006',
+ 'series': 'Six (Hindi)'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free episode
+ 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1',
+ 'info_dict': {
+ 'id': '1f45d185-8500-455c-b88d-13252307c3eb',
+ 'ext': 'mp4',
+ 'title': 'Jisshu Sengupta',
+ 'description': 'md5:ef6ffae01a3d83438597367400f824ed',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211004',
+ 'series': 'Asian Paints Moner Thikana'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free series
+ 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'watch-moner-thikana-bengali-web-series-online',
+ },
+ }, { # Premium series
+ 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'watch-byomkesh-bengali-web-series-online',
+ },
+ }, { # Premium movie
+ 'url': 'https://www.hoichoi.tv/movies/detective-2020',
+ 'only_matching': True
}]
@classmethod
def suitable(cls, url):
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
+ def _show_entries(self, domain, seasons):
+ for season in seasons:
+ for episode in season.get('episodes') or []:
+ path = traverse_obj(episode, ('gist', 'permalink'))
+ if path:
+ yield self.url_result(f'https://www.{domain}{path}', ie=self.ie_key())
+
def _real_extract(self, url):
domain, path, display_id = self._match_valid_url(url).groups()
site = domain.split('.')[-2]
if site in self._SITE_MAP:
site = self._SITE_MAP[site]
modules = self._call_api(
- site, 'content/pages', display_id, {
+ site, 'content/pages', display_id, url, {
'includeContent': 'true',
'moduleOffset': 1,
'path': path,
'site': site,
})['modules']
+
+ seasons = next((m['contentData'][0]['seasons'] for m in modules if m.get('moduleType') == 'ShowDetailModule'), None)
+ if seasons:
+ return self.playlist_result(self._show_entries(domain, seasons), display_id)
+
film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
return {
'_type': 'url_transparent',
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 8b367a4e6..04c504934 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import base64
import functools
-import json
import re
import itertools
@@ -17,8 +16,8 @@ from ..compat import (
from ..utils import (
clean_html,
determine_ext,
- dict_get,
ExtractorError,
+ get_element_by_class,
js_to_json,
int_or_none,
merge_dicts,
@@ -26,7 +25,6 @@ from ..utils import (
parse_filesize,
parse_iso8601,
parse_qs,
- RegexNotFoundError,
sanitized_Request,
smuggle_url,
std_headers,
@@ -129,10 +127,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
video_title = video_data['title']
live_event = video_data.get('live_event') or {}
is_live = live_event.get('status') == 'started'
+ request = config.get('request') or {}
formats = []
- config_files = video_data.get('files') or config['request'].get('files', {})
- for f in config_files.get('progressive', []):
+ config_files = video_data.get('files') or request.get('files') or {}
+ for f in (config_files.get('progressive') or []):
video_url = f.get('url')
if not video_url:
continue
@@ -148,7 +147,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
# TODO: fix handling of 308 status code returned for live archive manifest requests
sep_pattern = r'/sep/video/'
for files_type in ('hls', 'dash'):
- for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
+ for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
manifest_url = cdn_data.get('url')
if not manifest_url:
continue
@@ -188,17 +187,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
})
subtitles = {}
- text_tracks = config['request'].get('text_tracks')
- if text_tracks:
- for tt in text_tracks:
- subtitles[tt['lang']] = [{
- 'ext': 'vtt',
- 'url': urljoin('https://vimeo.com', tt['url']),
- }]
+ for tt in (request.get('text_tracks') or []):
+ subtitles[tt['lang']] = [{
+ 'ext': 'vtt',
+ 'url': urljoin('https://vimeo.com', tt['url']),
+ }]
thumbnails = []
if not is_live:
- for key, thumb in video_data.get('thumbs', {}).items():
+ for key, thumb in (video_data.get('thumbs') or {}).items():
thumbnails.append({
'id': key,
'width': int_or_none(key),
@@ -342,6 +339,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'duration': 1595,
'upload_date': '20130610',
'timestamp': 1370893156,
+ 'license': 'by',
},
'params': {
'format': 'best[protocol=https]',
@@ -420,6 +418,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'staff',
'uploader': 'Vimeo Staff',
'duration': 62,
+ 'subtitles': {
+ 'de': [{'ext': 'vtt'}],
+ 'en': [{'ext': 'vtt'}],
+ 'es': [{'ext': 'vtt'}],
+ 'fr': [{'ext': 'vtt'}],
+ },
}
},
{
@@ -626,6 +630,37 @@ class VimeoIE(VimeoBaseInfoExtractor):
def _real_initialize(self):
self._login()
+ def _extract_from_api(self, video_id, unlisted_hash=None):
+ token = self._download_json(
+ 'https://vimeo.com/_rv/jwt', video_id, headers={
+ 'X-Requested-With': 'XMLHttpRequest'
+ })['token']
+ api_url = 'https://api.vimeo.com/videos/' + video_id
+ if unlisted_hash:
+ api_url += ':' + unlisted_hash
+ video = self._download_json(
+ api_url, video_id, headers={
+ 'Authorization': 'jwt ' + token,
+ }, query={
+ 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
+ })
+ info = self._parse_config(self._download_json(
+ video['config_url'], video_id), video_id)
+ self._vimeo_sort_formats(info['formats'])
+ get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
+ info.update({
+ 'description': video.get('description'),
+ 'license': video.get('license'),
+ 'release_timestamp': get_timestamp('release'),
+ 'timestamp': get_timestamp('created'),
+ 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
+ })
+ connections = try_get(
+ video, lambda x: x['metadata']['connections'], dict) or {}
+ for k in ('comment', 'like'):
+ info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
+ return info
+
def _try_album_password(self, url):
album_id = self._search_regex(
r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None)
@@ -675,45 +710,16 @@ class VimeoIE(VimeoBaseInfoExtractor):
# Extract ID from URL
video_id, unlisted_hash = self._match_valid_url(url).groups()
if unlisted_hash:
- token = self._download_json(
- 'https://vimeo.com/_rv/jwt', video_id, headers={
- 'X-Requested-With': 'XMLHttpRequest'
- })['token']
- video = self._download_json(
- 'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
- video_id, headers={
- 'Authorization': 'jwt ' + token,
- }, query={
- 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
- })
- info = self._parse_config(self._download_json(
- video['config_url'], video_id), video_id)
- self._vimeo_sort_formats(info['formats'])
- get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
- info.update({
- 'description': video.get('description'),
- 'license': video.get('license'),
- 'release_timestamp': get_timestamp('release'),
- 'timestamp': get_timestamp('created'),
- 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
- })
- connections = try_get(
- video, lambda x: x['metadata']['connections'], dict) or {}
- for k in ('comment', 'like'):
- info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
- return info
+ return self._extract_from_api(video_id, unlisted_hash)
orig_url = url
is_pro = 'vimeopro.com/' in url
- is_player = '://player.vimeo.com/video/' in url
if is_pro:
# some videos require portfolio_id to be present in player url
# https://github.com/ytdl-org/youtube-dl/issues/20070
url = self._extract_url(url, self._download_webpage(url, video_id))
if not url:
url = 'https://vimeo.com/' + video_id
- elif is_player:
- url = 'https://player.vimeo.com/video/' + video_id
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
@@ -734,14 +740,25 @@ class VimeoIE(VimeoBaseInfoExtractor):
expected=True)
raise
- # Now we begin extracting as much information as we can from what we
- # retrieved. First we extract the information common to all extractors,
- # and latter we extract those that are Vimeo specific.
- self.report_extraction(video_id)
+ if '://player.vimeo.com/video/' in url:
+ config = self._parse_json(self._search_regex(
+ r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+ if config.get('view') == 4:
+ config = self._verify_player_video_password(
+ redirect_url, video_id, headers)
+ info = self._parse_config(config, video_id)
+ self._vimeo_sort_formats(info['formats'])
+ return info
+
+ if re.search(r'<form[^>]+?id="pw_form"', webpage):
+ video_password = self._get_video_password()
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ webpage = self._verify_video_password(
+ redirect_url, video_id, video_password, token, vuid)
vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
if vimeo_config:
- seed_status = vimeo_config.get('seed_status', {})
+ seed_status = vimeo_config.get('seed_status') or {}
if seed_status.get('state') == 'failed':
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, seed_status['title']),
@@ -750,70 +767,40 @@ class VimeoIE(VimeoBaseInfoExtractor):
cc_license = None
timestamp = None
video_description = None
+ info_dict = {}
- # Extract the config JSON
- try:
- try:
- config_url = self._html_search_regex(
- r' data-config-url="(.+?)"', webpage,
- 'config URL', default=None)
- if not config_url:
- # Sometimes new react-based page is served instead of old one that require
- # different config URL extraction approach (see
- # https://github.com/ytdl-org/youtube-dl/pull/7209)
- page_config = self._parse_json(self._search_regex(
- r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
- webpage, 'page config'), video_id)
- config_url = page_config['player']['config_url']
- cc_license = page_config.get('cc_license')
- timestamp = try_get(
- page_config, lambda x: x['clip']['uploaded_on'],
- compat_str)
- video_description = clean_html(dict_get(
- page_config, ('description', 'description_html_escaped')))
- config = self._download_json(config_url, video_id)
- except RegexNotFoundError:
- # For pro videos or player.vimeo.com urls
- # We try to find out to which variable is assigned the config dic
- m_variable_name = re.search(r'(\w)\.video\.id', webpage)
- if m_variable_name is not None:
- config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
- else:
- config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
- config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
- config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
- config = self._search_regex(config_re, webpage, 'info section',
- flags=re.DOTALL)
- config = json.loads(config)
- except Exception as e:
- if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
- raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
-
- if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
- if '_video_password_verified' in data:
- raise ExtractorError('video password verification failed!')
- video_password = self._get_video_password()
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- self._verify_video_password(
- redirect_url, video_id, video_password, token, vuid)
- return self._real_extract(
- smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
- else:
- raise ExtractorError('Unable to extract info section',
- cause=e)
+ channel_id = self._search_regex(
+ r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+ if channel_id:
+ config_url = self._html_search_regex(
+ r'\bdata-config-url="([^"]+)"', webpage, 'config URL')
+ video_description = clean_html(get_element_by_class('description', webpage))
+ info_dict.update({
+ 'channel_id': channel_id,
+ 'channel_url': 'https://vimeo.com/channels/' + channel_id,
+ })
else:
- if config.get('view') == 4:
- config = self._verify_player_video_password(redirect_url, video_id, headers)
-
+ page_config = self._parse_json(self._search_regex(
+ r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+ webpage, 'page config', default='{}'), video_id, fatal=False)
+ if not page_config:
+ return self._extract_from_api(video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ clip = page_config.get('clip') or {}
+ timestamp = clip.get('uploaded_on')
+ video_description = clean_html(
+ clip.get('description') or page_config.get('description_html_escaped'))
+ config = self._download_json(config_url, video_id)
video = config.get('video') or {}
vod = video.get('vod') or {}
def is_rented():
if '>You rented this title.<' in webpage:
return True
- if config.get('user', {}).get('purchased'):
+ if try_get(config, lambda x: x['user']['purchased']):
return True
- for purchase_option in vod.get('purchase_options', []):
+ for purchase_option in (vod.get('purchase_options') or []):
if purchase_option.get('purchased'):
return True
label = purchase_option.get('label_string')
@@ -828,14 +815,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
'https://player.vimeo.com/player/%s' % feature_id,
{'force_feature_id': True}), 'Vimeo')
- # Extract video description
if not video_description:
video_description = self._html_search_regex(
r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
webpage, 'description', default=None)
if not video_description:
video_description = self._html_search_meta(
- 'description', webpage, default=None)
+ ['description', 'og:description', 'twitter:description'],
+ webpage, default=None)
if not video_description and is_pro:
orig_webpage = self._download_webpage(
orig_url, video_id,
@@ -844,24 +831,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
if orig_webpage:
video_description = self._html_search_meta(
'description', orig_webpage, default=None)
- if not video_description and not is_player:
+ if not video_description:
self.report_warning('Cannot find video description')
- # Extract upload date
if not timestamp:
timestamp = self._search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage,
'timestamp', default=None)
- try:
- view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
- like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
- comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
- except RegexNotFoundError:
- # This info is only available in vimeo.com/{id} urls
- view_count = None
- like_count = None
- comment_count = None
+ view_count = int_or_none(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count', default=None))
+ like_count = int_or_none(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count', default=None))
+ comment_count = int_or_none(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count', default=None))
formats = []
@@ -881,11 +861,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
webpage, 'license', default=None, group='license')
- channel_id = self._search_regex(
- r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
- channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
-
- info_dict = {
+ info_dict.update({
'formats': formats,
'timestamp': unified_timestamp(timestamp),
'description': video_description,
@@ -894,18 +870,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
'like_count': like_count,
'comment_count': comment_count,
'license': cc_license,
- 'channel_id': channel_id,
- 'channel_url': channel_url,
- }
-
- info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
+ })
- return info_dict
+ return merge_dicts(info_dict, info_dict_config, json_ld)
class VimeoOndemandIE(VimeoIE):
IE_NAME = 'vimeo:ondemand'
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
_TESTS = [{
# ondemand video not available via https://vimeo.com/id
'url': 'https://vimeo.com/ondemand/20704',
diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py
index 84f51a544..4340b1d4c 100644
--- a/yt_dlp/extractor/vlive.py
+++ b/yt_dlp/extractor/vlive.py
@@ -17,17 +17,65 @@ from ..utils import (
strip_or_none,
try_get,
urlencode_postdata,
+ url_or_none,
)
class VLiveBaseIE(NaverBaseIE):
- _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+ _NETRC_MACHINE = 'vlive'
+ _logged_in = False
+
+ def _real_initialize(self):
+ if not self._logged_in:
+ VLiveBaseIE._logged_in = self._login()
+
+ def _login(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return False
+
+ LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+ self._request_webpage(
+ LOGIN_URL, None, note='Downloading login cookies')
+
+ self._download_webpage(
+ LOGIN_URL, None, note='Logging in',
+ data=urlencode_postdata({'email': email, 'pwd': password}),
+ headers={
+ 'Referer': LOGIN_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ login_info = self._download_json(
+ 'https://www.vlive.tv/auth/loginInfo', None,
+ note='Checking login status',
+ headers={'Referer': 'https://www.vlive.tv/home'})
+
+ if not try_get(login_info, lambda x: x['message']['login'], bool):
+ raise ExtractorError('Unable to log in', expected=True)
+ return True
+
+ def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
+ if note is None:
+ note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
+ query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
+ if fields:
+ query['fields'] = fields
+ if query_add:
+ query.update(query_add)
+ try:
+ return self._download_json(
+ 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+ note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
+ raise
class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
- _NETRC_MACHINE = 'vlive'
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
@@ -38,6 +86,12 @@ class VLiveIE(VLiveBaseIE):
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
+ 'upload_date': '20150817',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1439816449,
+ },
+ 'params': {
+ 'skip_download': True,
},
}, {
'url': 'http://www.vlive.tv/video/16937',
@@ -49,6 +103,9 @@ class VLiveIE(VLiveBaseIE):
'view_count': int,
'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
+ 'upload_date': '20161112',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'timestamp': 1478923074,
},
'params': {
'skip_download': True,
@@ -81,53 +138,6 @@ class VLiveIE(VLiveBaseIE):
'playlist_mincount': 120
}]
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- email, password = self._get_login_info()
- if None in (email, password):
- return
-
- def is_logged_in():
- login_info = self._download_json(
- 'https://www.vlive.tv/auth/loginInfo', None,
- note='Downloading login info',
- headers={'Referer': 'https://www.vlive.tv/home'})
- return try_get(
- login_info, lambda x: x['message']['login'], bool) or False
-
- LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
- self._request_webpage(
- LOGIN_URL, None, note='Downloading login cookies')
-
- self._download_webpage(
- LOGIN_URL, None, note='Logging in',
- data=urlencode_postdata({'email': email, 'pwd': password}),
- headers={
- 'Referer': LOGIN_URL,
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
-
- if not is_logged_in():
- raise ExtractorError('Unable to log in', expected=True)
-
- def _call_api(self, path_template, video_id, fields=None, limit=None):
- query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
- if fields:
- query['fields'] = fields
- if limit:
- query['limit'] = limit
- try:
- return self._download_json(
- 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
- 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
- headers={'Referer': 'https://www.vlive.tv/'}, query=query)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
- raise
-
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -150,7 +160,7 @@ class VLiveIE(VLiveBaseIE):
playlist_count = str_or_none(playlist.get('totalCount'))
playlist = self._call_api(
- 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count)
+ 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
entries = []
for video_data in playlist['data']:
@@ -172,6 +182,8 @@ class VLiveIE(VLiveBaseIE):
'view_count': int_or_none(video.get('playCount')),
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
+ 'timestamp': int_or_none(video.get('createdAt'), scale=1000),
+ 'thumbnail': video.get('thumb'),
}
video_type = video.get('type')
@@ -216,7 +228,7 @@ class VLiveIE(VLiveBaseIE):
raise ExtractorError('Unknown status ' + status)
-class VLivePostIE(VLiveIE):
+class VLivePostIE(VLiveBaseIE):
IE_NAME = 'vlive:post'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
_TESTS = [{
@@ -238,8 +250,6 @@ class VLivePostIE(VLiveIE):
'playlist_count': 1,
}]
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
- _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
- _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
def _real_extract(self, url):
post_id = self._match_id(url)
@@ -266,7 +276,7 @@ class VLivePostIE(VLiveIE):
entry = None
if upload_type == 'SOS':
download = self._call_api(
- self._SOS_TMPL, video_id)['videoUrl']['download']
+ self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
formats = []
for f_id, f_url in download.items():
formats.append({
@@ -284,7 +294,7 @@ class VLivePostIE(VLiveIE):
vod_id = upload_info.get('videoId')
if not vod_id:
continue
- inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+ inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
entry = self._extract_video_info(video_id, vod_id, inkey)
if entry:
entry['title'] = '%s_part%s' % (title, idx)
@@ -295,7 +305,7 @@ class VLivePostIE(VLiveIE):
class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel'
- _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
+ _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
_TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': {
@@ -306,78 +316,58 @@ class VLiveChannelIE(VLiveBaseIE):
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'only_matching': True,
+ }, {
+ 'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
+ 'info_dict': {
+ 'id': 'FCD4B-3546',
+ 'title': 'MAMAMOO - Star Board',
+ },
+ 'playlist_mincount': 880
}]
- def _call_api(self, path, channel_key_suffix, channel_value, note, query):
- q = {
- 'app_id': self._APP_ID,
- 'channel' + channel_key_suffix: channel_value,
- }
- q.update(query)
- return self._download_json(
- 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
- channel_value, note='Downloading ' + note, query=q)['result']
-
- def _real_extract(self, url):
- channel_code = self._match_id(url)
-
- channel_seq = self._call_api(
- 'decodeChannelCode', 'Code', channel_code,
- 'decode channel code', {})['channelSeq']
-
- channel_name = None
- entries = []
+ def _entries(self, posts_id, board_name):
+ if board_name:
+ posts_path = 'post/v1.0/board-%s/posts'
+ query_add = {'limit': 100, 'sortType': 'LATEST'}
+ else:
+ posts_path = 'post/v1.0/channel-%s/starPosts'
+ query_add = {'limit': 100}
for page_num in itertools.count(1):
video_list = self._call_api(
- 'getChannelVideoList', 'Seq', channel_seq,
- 'channel list page #%d' % page_num, {
- # Large values of maxNumOfRows (~300 or above) may cause
- # empty responses (see [1]), e.g. this happens for [2] that
- # has more than 300 videos.
- # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
- # 2. http://channels.vlive.tv/EDBF.
- 'maxNumOfRows': 100,
- 'pageNo': page_num
- }
- )
-
- if not channel_name:
- channel_name = try_get(
- video_list,
- lambda x: x['channelInfo']['channelName'],
- compat_str)
+ posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
+ note=f'Downloading playlist page {page_num}')
+
+ for video in try_get(video_list, lambda x: x['data'], list) or []:
+ video_id = str(video.get('postId'))
+ video_title = str_or_none(video.get('title'))
+ video_url = url_or_none(video.get('url'))
+ if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
+ continue
+ channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
+ yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)
- videos = try_get(
- video_list, lambda x: x['videoList'], list)
- if not videos:
+ after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
+ if not after:
break
+ query_add['after'] = after
- for video in videos:
- video_id = video.get('videoSeq')
- video_type = video.get('videoType')
+ def _real_extract(self, url):
+ channel_id, posts_id = self._match_valid_url(url).groups()
- if not video_id or not video_type:
- continue
- video_id = compat_str(video_id)
-
- if video_type in ('PLAYLIST'):
- first_video_id = try_get(
- video,
- lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int)
-
- if not first_video_id:
- continue
-
- entries.append(
- self.url_result(
- 'http://www.vlive.tv/video/%s' % first_video_id,
- ie=VLiveIE.ie_key(), video_id=first_video_id))
- else:
- entries.append(
- self.url_result(
- 'http://www.vlive.tv/video/%s' % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id))
+ board_name = None
+ if posts_id:
+ board = self._call_api(
+ 'board/v1.0/board-%s', posts_id, 'title,boardType')
+ board_name = board.get('title') or 'Unknown'
+ if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
+ raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)
+
+ entries = self._entries(posts_id or channel_id, board_name)
+ first_video = next(entries)
+ channel_name = first_video['channel']
return self.playlist_result(
- entries, channel_code, channel_name)
+ itertools.chain([first_video], entries),
+ f'{channel_id}-{posts_id}' if posts_id else channel_id,
+ f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)
diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py
index c956d616e..a61a630e2 100644
--- a/yt_dlp/extractor/wakanim.py
+++ b/yt_dlp/extractor/wakanim.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+from urllib.parse import unquote
+
from .common import InfoExtractor
from ..utils import (
merge_dicts,
@@ -31,26 +33,37 @@ class WakanimIE(InfoExtractor):
'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu',
'only_matching': True,
}]
+ _GEO_BYPASS = False
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- m3u8_url = urljoin(url, self._search_regex(
- r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'm3u8 url',
+ if 'Geoblocking' in webpage:
+ if '/de/' in url:
+ self.raise_geo_restricted(countries=['DE', 'AT', 'CH'])
+ else:
+ self.raise_geo_restricted(countries=['RU'])
+
+ manifest_url = urljoin(url, self._search_regex(
+ r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'manifest url',
group='url'))
if not self.get_param('allow_unplayable_formats'):
# https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls
encryption = self._search_regex(
r'encryption%3D(c(?:enc|bc(?:s-aapl)?))',
- m3u8_url, 'encryption', default=None)
+ manifest_url, 'encryption', default=None)
if encryption in ('cenc', 'cbcs-aapl'):
self.report_drm(video_id)
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
+ if 'format=mpd-time-cmaf' in unquote(manifest_url):
+ formats = self._extract_mpd_formats(
+ manifest_url, video_id, mpd_id='dash')
+ else:
+ formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
info = self._search_json_ld(webpage, video_id, default={})
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index aa58a22bf..658b45fe1 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -695,7 +695,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
class YoutubeIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com'
+ IE_DESC = 'YouTube'
_INVIDIOUS_SITES = (
# invidious-redirect websites
r'(?:www\.)?redirect\.invidious\.io',
@@ -2696,6 +2696,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
thumbnails.append({
'url': thumbnail_url,
})
+ original_thumbnails = thumbnails.copy()
+
# The best resolution thumbnails sometimes does not appear in the webpage
# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
@@ -2706,7 +2708,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'default', '1', '2', '3'
]
n_thumbnail_names = len(thumbnail_names)
-
thumbnails.extend({
'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
video_id=video_id, name=name, ext=ext,
@@ -2716,6 +2717,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
self._remove_duplicate_formats(thumbnails)
+ self._downloader._sort_thumbnails(original_thumbnails)
category = get_first(microformats, 'category') or search_meta('genre')
channel_id = str_or_none(
@@ -2745,6 +2747,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'title': self._live_title(video_title) if is_live else video_title,
'formats': formats,
'thumbnails': thumbnails,
+ # The best thumbnail that we are sure exists. Prevents unnecessary
+ # URL checking if user don't care about getting the best possible thumbnail
+ 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
'description': video_description,
'upload_date': unified_strdate(
get_first(microformats, 'uploadDate')
@@ -3010,7 +3015,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
class YoutubeTabIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com tab'
+ IE_DESC = 'YouTube Tabs'
_VALID_URL = r'''(?x)
https?://
(?:\w+\.)?
@@ -4238,7 +4243,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
class YoutubePlaylistIE(InfoExtractor):
- IE_DESC = 'YouTube.com playlists'
+ IE_DESC = 'YouTube playlists'
_VALID_URL = r'''(?x)(?:
(?:https?://)?
(?:\w+\.)?
@@ -4304,9 +4309,7 @@ class YoutubePlaylistIE(InfoExtractor):
def suitable(cls, url):
if YoutubeTabIE.suitable(url):
return False
- # Hack for lazy extractors until more generic solution is implemented
- # (see #28780)
- from .youtube import parse_qs
+ from ..utils import parse_qs
qs = parse_qs(url)
if qs.get('v', [None])[0]:
return False
@@ -4364,7 +4367,7 @@ class YoutubeYtBeIE(InfoExtractor):
class YoutubeYtUserIE(InfoExtractor):
- IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
+ IE_DESC = 'YouTube user videos; "ytuser:" prefix'
_VALID_URL = r'ytuser:(?P<id>.+)'
_TESTS = [{
'url': 'ytuser:phihag',
@@ -4380,7 +4383,7 @@ class YoutubeYtUserIE(InfoExtractor):
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
+ IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
_VALID_URL = r':ytfav(?:ou?rite)?s?'
_LOGIN_REQUIRED = True
_TESTS = [{
@@ -4398,10 +4401,7 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
- IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
- # there doesn't appear to be a real limit, for example if you search for
- # 'python' you get more than 8.000.000 results
- _MAX_RESULTS = float('inf')
+ IE_DESC = 'YouTube searches'
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
_SEARCH_PARAMS = None
@@ -4461,13 +4461,14 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
- IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
+ IE_DESC = 'YouTube searches, newest videos first'
_SEARCH_PARAMS = 'CAI%3D'
class YoutubeSearchURLIE(YoutubeSearchIE):
- IE_DESC = 'YouTube.com search URLs'
+ IE_DESC = 'YouTube search URLs with sorting and filter support'
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+ _SEARCH_KEY = None
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
# _MAX_RESULTS = 100
_TESTS = [{
@@ -4513,7 +4514,7 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE):
class YoutubeWatchLaterIE(InfoExtractor):
IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+ IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
_VALID_URL = r':ytwatchlater'
_TESTS = [{
'url': ':ytwatchlater',
@@ -4526,7 +4527,7 @@ class YoutubeWatchLaterIE(InfoExtractor):
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+ IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_LOGIN_REQUIRED = False
@@ -4543,7 +4544,7 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
+ IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
_VALID_URL = r':ytsub(?:scription)?s?'
_FEED_NAME = 'subscriptions'
_TESTS = [{
@@ -4556,7 +4557,7 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
+ IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
_VALID_URL = r':ythis(?:tory)?'
_FEED_NAME = 'history'
_TESTS = [{
diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py
index a6e159a14..699b1158a 100644
--- a/yt_dlp/minicurses.py
+++ b/yt_dlp/minicurses.py
@@ -1,6 +1,77 @@
import functools
from threading import Lock
-from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES, write_string
+from .utils import supports_terminal_sequences, write_string
+
+
+CONTROL_SEQUENCES = {
+ 'DOWN': '\n',
+ 'UP': '\033[A',
+ 'ERASE_LINE': '\033[K',
+ 'RESET': '\033[0m',
+}
+
+
+_COLORS = {
+ 'BLACK': '0',
+ 'RED': '1',
+ 'GREEN': '2',
+ 'YELLOW': '3',
+ 'BLUE': '4',
+ 'PURPLE': '5',
+ 'CYAN': '6',
+ 'WHITE': '7',
+}
+
+
+_TEXT_STYLES = {
+ 'NORMAL': '0',
+ 'BOLD': '1',
+ 'UNDERLINED': '4',
+}
+
+
+def format_text(text, f):
+ '''
+ @param f String representation of formatting to apply in the form:
+ [style] [light] font_color [on [light] bg_color]
+ Eg: "red", "bold green on light blue"
+ '''
+ f = f.upper()
+ tokens = f.strip().split()
+
+ bg_color = ''
+ if 'ON' in tokens:
+ if tokens[-1] == 'ON':
+ raise SyntaxError(f'Empty background format specified in {f!r}')
+ if tokens[-1] not in _COLORS:
+ raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color')
+ bg_color = f'4{_COLORS[tokens.pop()]}'
+ if tokens[-1] == 'LIGHT':
+ bg_color = f'0;10{bg_color[1:]}'
+ tokens.pop()
+ if tokens[-1] != 'ON':
+ raise SyntaxError(f'Invalid format {f.split(" ON ", 1)[1]!r} in {f!r}')
+ bg_color = f'\033[{bg_color}m'
+ tokens.pop()
+
+ if not tokens:
+ fg_color = ''
+ elif tokens[-1] not in _COLORS:
+ raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color')
+ else:
+ fg_color = f'3{_COLORS[tokens.pop()]}'
+ if tokens and tokens[-1] == 'LIGHT':
+ fg_color = f'9{fg_color[1:]}'
+ tokens.pop()
+ fg_style = tokens.pop() if tokens and tokens[-1] in _TEXT_STYLES else 'NORMAL'
+ fg_color = f'\033[{_TEXT_STYLES[fg_style]};{fg_color}m'
+ if tokens:
+ raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}')
+
+ if fg_color or bg_color:
+ return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}'
+ else:
+ return text
class MultilinePrinterBase:
@@ -67,15 +138,15 @@ class MultilinePrinter(MultilinePrinterBase):
yield '\r'
distance = dest - current
if distance < 0:
- yield TERMINAL_SEQUENCES['UP'] * -distance
+ yield CONTROL_SEQUENCES['UP'] * -distance
elif distance > 0:
- yield TERMINAL_SEQUENCES['DOWN'] * distance
+ yield CONTROL_SEQUENCES['DOWN'] * distance
self._lastline = dest
@lock
def print_at_line(self, text, pos):
if self._HAVE_FULLCAP:
- self.write(*self._move_cursor(pos), TERMINAL_SEQUENCES['ERASE_LINE'], text)
+ self.write(*self._move_cursor(pos), CONTROL_SEQUENCES['ERASE_LINE'], text)
text = self._add_line_number(text, pos)
textlen = len(text)
@@ -103,7 +174,7 @@ class MultilinePrinter(MultilinePrinterBase):
if self._HAVE_FULLCAP:
self.write(
- *text, TERMINAL_SEQUENCES['ERASE_LINE'],
- f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum)
+ *text, CONTROL_SEQUENCES['ERASE_LINE'],
+ f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum)
else:
self.write(*text, ' ' * self._lastlength)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index b45b79bc9..eb86f9e0c 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -558,12 +558,16 @@ def parseOpts(overrideArguments=None):
help="Don't give any special preference to free containers (default)")
video_format.add_option(
'--check-formats',
- action='store_true', dest='check_formats', default=None,
- help='Check that the formats selected are actually downloadable')
+ action='store_const', const='selected', dest='check_formats', default=None,
+ help='Check that the selected formats are actually downloadable')
+ video_format.add_option(
+ '--check-all-formats',
+ action='store_true', dest='check_formats',
+ help='Check all formats for whether they are actually downloadable')
video_format.add_option(
'--no-check-formats',
action='store_false', dest='check_formats',
- help='Do not check that the formats selected are actually downloadable')
+ help='Do not check that the formats are actually downloadable')
video_format.add_option(
'-F', '--list-formats',
action='store_true', dest='listformats',
@@ -972,6 +976,9 @@ def parseOpts(overrideArguments=None):
dest='batchfile', action='store_const', const=None,
help='Do not read URLs from batch file (default)')
filesystem.add_option(
+ '--id', default=False,
+ action='store_true', dest='useid', help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
'-P', '--paths',
metavar='[TYPES:]PATH', dest='paths', default={}, type='str',
action='callback', callback=_dict_from_options_callback,
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 4a0a96427..b7fcc569b 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -41,6 +41,7 @@ EXT_TO_OUT_FORMATS = {
'ts': 'mpegts',
'wma': 'asf',
'wmv': 'asf',
+ 'vtt': 'webvtt',
}
ACODECS = {
'mp3': 'libmp3lame',
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
index 7265a9de7..70c5462d1 100644
--- a/yt_dlp/postprocessor/sponsorblock.py
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -1,6 +1,8 @@
+from hashlib import sha256
+import itertools
import json
import re
-from hashlib import sha256
+import time
from .ffmpeg import FFmpegPostProcessor
from ..compat import compat_urllib_parse_urlencode, compat_HTTPError
@@ -33,6 +35,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
self.to_screen(f'SponsorBlock is not supported for {extractor}')
return [], info
+ self.to_screen('Fetching SponsorBlock segments')
info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration'])
return [], info
@@ -79,18 +82,28 @@ class SponsorBlockPP(FFmpegPostProcessor):
'service': service,
'categories': json.dumps(self._categories),
})
+ self.write_debug(f'SponsorBlock query: {url}')
for d in self._get_json(url):
if d['videoID'] == video_id:
return d['segments']
return []
def _get_json(self, url):
- self.write_debug(f'SponsorBlock query: {url}')
- try:
- rsp = self._downloader.urlopen(sanitized_Request(url))
- except network_exceptions as e:
- if isinstance(e, compat_HTTPError) and e.code == 404:
- return []
- raise PostProcessingError(f'Unable to communicate with SponsorBlock API - {e}')
-
- return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
+ # While this is not an extractor, it behaves similar to one and
+ # so obey extractor_retries and sleep_interval_requests
+ max_retries = self.get_param('extractor_retries', 3)
+ sleep_interval = self.get_param('sleep_interval_requests') or 0
+ for retries in itertools.count():
+ try:
+ rsp = self._downloader.urlopen(sanitized_Request(url))
+ return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
+ except network_exceptions as e:
+ if isinstance(e, compat_HTTPError) and e.code == 404:
+ return []
+ if retries < max_retries:
+ self.report_warning(f'{e}. Retrying...')
+ if sleep_interval > 0:
+ self.to_screen(f'Sleeping {sleep_interval} seconds ...')
+ time.sleep(sleep_interval)
+ continue
+ raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}')
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 88adbd3b9..e70c5f909 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2492,9 +2492,9 @@ class GeoRestrictedError(ExtractorError):
geographic location due to geographic restrictions imposed by a website.
"""
- def __init__(self, msg, countries=None):
- super(GeoRestrictedError, self).__init__(msg, expected=True)
- self.msg = msg
+ def __init__(self, msg, countries=None, **kwargs):
+ kwargs['expected'] = True
+ super(GeoRestrictedError, self).__init__(msg, **kwargs)
self.countries = countries
@@ -2542,23 +2542,33 @@ class PostProcessingError(YoutubeDLError):
self.msg = msg
-class ExistingVideoReached(YoutubeDLError):
- """ --max-downloads limit has been reached. """
- pass
+class DownloadCancelled(YoutubeDLError):
+ """ Exception raised when the download queue should be interrupted """
+ msg = 'The download was cancelled'
+ def __init__(self, msg=None):
+ if msg is not None:
+ self.msg = msg
+ YoutubeDLError.__init__(self, self.msg)
-class RejectedVideoReached(YoutubeDLError):
- """ --max-downloads limit has been reached. """
- pass
+class ExistingVideoReached(DownloadCancelled):
+ """ --break-on-existing triggered """
+ msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
-class ThrottledDownload(YoutubeDLError):
- """ Download speed below --throttled-rate. """
- pass
+
+class RejectedVideoReached(DownloadCancelled):
+ """ --break-on-reject triggered """
+ msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
-class MaxDownloadsReached(YoutubeDLError):
+class MaxDownloadsReached(DownloadCancelled):
""" --max-downloads limit has been reached. """
+ msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
+
+
+class ThrottledDownload(YoutubeDLError):
+ """ Download speed below --throttled-rate. """
pass
@@ -3714,14 +3724,14 @@ def parse_resolution(s):
if s is None:
return {}
- mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
+ mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
if mobj:
return {
'width': int(mobj.group('w')),
'height': int(mobj.group('h')),
}
- mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+ mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
if mobj:
return {'height': int(mobj.group(1))}
@@ -4050,6 +4060,8 @@ class LazyList(collections.abc.Sequence):
def __exhaust(self):
self.__cache.extend(self.__iterable)
+ # Discard the emptied iterable to make it pickle-able
+ self.__iterable = []
return self.__cache
def exhaust(self):
@@ -4501,6 +4513,7 @@ OUTTMPL_TYPES = {
'description': 'description',
'annotation': 'annotations.xml',
'infojson': 'info.json',
+ 'link': None,
'pl_thumbnail': None,
'pl_description': 'description',
'pl_infojson': 'info.json',
@@ -4729,7 +4742,7 @@ def determine_protocol(info_dict):
if protocol is not None:
return protocol
- url = info_dict['url']
+ url = sanitize_url(info_dict['url'])
if url.startswith('rtmp'):
return 'rtmp'
elif url.startswith('mms'):
@@ -4748,9 +4761,11 @@ def determine_protocol(info_dict):
def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
""" Render a list of rows, each as a list of values """
+ def width(string):
+ return len(remove_terminal_sequences(string))
def get_max_lens(table):
- return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ return [max(width(str(v)) for v in col) for col in zip(*table)]
def filter_using_list(row, filterArray):
return [col for (take, col) in zip(filterArray, row) if take]
@@ -4762,10 +4777,15 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
table = [header_row] + data
max_lens = get_max_lens(table)
+ extraGap += 1
if delim:
- table = [header_row] + [['-' * ml for ml in max_lens]] + data
- format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
- return '\n'.join(format_str % tuple(row) for row in table)
+ table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
+ max_lens[-1] = 0
+ for row in table:
+ for pos, text in enumerate(map(str, row)):
+ row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
+ ret = '\n'.join(''.join(row) for row in table)
+ return ret
def _match_one(filter_part, dct, incomplete):
@@ -6229,6 +6249,12 @@ URL=%(url)s
Icon=text-html
'''.lstrip()
+LINK_TEMPLATES = {
+ 'url': DOT_URL_LINK_TEMPLATE,
+ 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
+ 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
+}
+
def iri_to_uri(iri):
"""
@@ -6486,6 +6512,13 @@ def jwt_encode_hs256(payload_data, key, headers={}):
return token
+# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
+def jwt_decode_hs256(jwt):
+ header_b64, payload_b64, signature_b64 = jwt.split('.')
+ payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
+ return payload_data
+
+
def supports_terminal_sequences(stream):
if compat_os_name == 'nt':
if get_windows_version() < (10, 0, 10586):
@@ -6498,12 +6531,12 @@ def supports_terminal_sequences(stream):
return False
-TERMINAL_SEQUENCES = {
- 'DOWN': '\n',
- 'UP': '\x1b[A',
- 'ERASE_LINE': '\x1b[K',
- 'RED': '\033[0;31m',
- 'YELLOW': '\033[0;33m',
- 'BLUE': '\033[0;34m',
- 'RESET_STYLE': '\033[0m',
-}
+_terminal_sequences_re = re.compile('\033\\[[^m]+m')
+
+
+def remove_terminal_sequences(string):
+ return _terminal_sequences_re.sub('', string)
+
+
+def number_of_digits(number):
+ return len('%d' % number)
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 83b6fea9f..e7203be6b 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2021.10.10'
+__version__ = '2021.10.22'