From c9f5ce511877ae4f22d2eb2f70c3c6edf6c1971d Mon Sep 17 00:00:00 2001 From: Benjamin Ryan Date: Fri, 2 Dec 2022 03:38:00 -0600 Subject: [extractor/tiktok] Update API hostname (#5690) Closes #5688 Authored by: redraskal --- yt_dlp/extractor/tiktok.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 1bbf88495..95223f5de 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -30,7 +30,7 @@ class TikTokBaseIE(InfoExtractor): _WORKING_APP_VERSION = None _APP_NAME = 'trill' _AID = 1180 - _API_HOSTNAME = 'api-h2.tiktokv.com' + _API_HOSTNAME = 'api16-normal-c-useast1a.tiktokv.com' _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') -- cgit v1.2.3 From 71df9b7fd504767583cf1e088ae307c942799f2b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 30 Nov 2022 11:34:51 +0530 Subject: [cleanup] Misc --- .github/workflows/core.yml | 11 ++++++----- .github/workflows/quick-test.yml | 13 ++++++------- .gitignore | 1 + CONTRIBUTING.md | 22 +++++++++++++++++++--- README.md | 26 +++++++++++++------------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/__init__.py | 7 +++---- yt_dlp/downloader/common.py | 5 ++++- yt_dlp/extractor/common.py | 12 +++++++++++- yt_dlp/options.py | 8 ++++---- yt_dlp/utils.py | 5 ++++- 11 files changed, 72 insertions(+), 40 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index e12918626..dead444c0 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -12,13 +12,13 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - # CPython 3.9 is in quick-test - python-version: ['3.7', '3.10', 3.11-dev, pypy-3.7, pypy-3.8] + # CPython 3.11 is in quick-test + python-version: ['3.8', '3.9', '3.10', pypy-3.7, pypy-3.8] run-tests-ext: [sh] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.7' run-tests-ext: bat - os: windows-latest python-version: pypy-3.9 @@ -33,5 +33,6 @@ jobs: run: pip install pytest - name: Run tests continue-on-error: False - run: ./devscripts/run_tests.${{ matrix.run-tests-ext }} core - # Linter is in quick-test + run: | + python3 -m yt_dlp -v || true # Print debug head + ./devscripts/run_tests.${{ matrix.run-tests-ext }} core diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 8a0ac98bb..930e58152 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -10,24 +10,23 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python + - name: Set up Python 3.11 uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: '3.11' - name: Install test requirements run: pip install pytest pycryptodomex - name: Run tests - run: ./devscripts/run_tests.sh core + run: | + python3 -m yt_dlp -v || true + ./devscripts/run_tests.sh core flake8: name: Linter if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 + - uses: actions/setup-python@v4 - name: Install flake8 run: pip install flake8 - name: Make lazy extractors diff --git a/.gitignore b/.gitignore index 0ce059b34..00d74057f 100644 --- a/.gitignore +++ b/.gitignore @@ -71,6 +71,7 @@ dist/ zip/ tmp/ venv/ +.venv/ completions/ # Misc diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8ac671dc..551db674e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -351,8 +351,9 @@ Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate ```python thumbnail_data = data.get('thumbnails') or [] thumbnails = [{ - 'url': item['url'] -} for item in thumbnail_data] # correct + 'url': item['url'], + 'height': item.get('h'), +} for item in thumbnail_data if item.get('url')] # correct ``` and not like: @@ -360,12 +361,27 @@ and not like: ```python thumbnail_data = data.get('thumbnails') thumbnails = [{ - 'url': item['url'] + 'url': item['url'], + 'height': item.get('h'), } for item in thumbnail_data] # incorrect ``` In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead. +Alternately, this can be further simplified by using `traverse_obj` + +```python +thumbnails = [{ + 'url': item['url'], + 'height': item.get('h'), +} for item in traverse_obj(data, ('thumbnails', lambda _, v: v['url']))] +``` + +or, even better, + +```python +thumbnails = traverse_obj(data, ('thumbnails', ..., {'url': 'url', 'height': 'h'})) +``` ### Provide fallbacks diff --git a/README.md b/README.md index fa55d130b..b6a07da9a 100644 --- a/README.md +++ b/README.md @@ -432,19 +432,19 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi explicitly provided IP block in CIDR notation ## Video Selection: - -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the videos + -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items to download. You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. Use negative indices to count from the right and negative STEP to download in reverse order. E.g. "-I 1:3,7,-5::2" used on a - playlist of size 15 will download the videos + playlist of size 15 will download the items at index 1,2,3,7,11,13,15 - --min-filesize SIZE Do not download any videos smaller than + --min-filesize SIZE Abort download if filesize is smaller than + SIZE, e.g. 50k or 44.6M + --max-filesize SIZE Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M - --max-filesize SIZE Do not download any videos larger than SIZE, - e.g. 50k or 44.6M --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. @@ -491,9 +491,9 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi a file that is in the archive --break-on-reject Stop the download process when encountering a file that has been filtered out - --break-per-input --break-on-existing, --break-on-reject, - --max-downloads, and autonumber resets per - input URL + --break-per-input Alters --max-downloads, --break-on-existing, + --break-on-reject, and autonumber to reset + per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue --skip-playlist-after-errors N Number of allowed failures until the rest of @@ -1046,10 +1046,10 @@ Make chapter entries for, or remove various segments (sponsor, for, separated by commas. Available categories are sponsor, intro, outro, selfpromo, preview, filler, interaction, - music_offtopic, poi_highlight, chapter, all and - default (=all). You can prefix the category - with a "-" to exclude it. See [1] for - description of the categories. E.g. + music_offtopic, poi_highlight, chapter, all + and default (=all). You can prefix the + category with a "-" to exclude it. See [1] + for description of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from @@ -1058,7 +1058,7 @@ Make chapter entries for, or remove various segments (sponsor, remove takes precedence. The syntax and available categories are the same as for --sponsorblock-mark except that "default" - refers to "all,-filler" and poi_highlight and + refers to "all,-filler" and poi_highlight, chapter are not available --sponsorblock-chapter-title TEMPLATE An output template for the title of the diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index b1d009280..8d28783d8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3123,7 +3123,7 @@ class YoutubeDL: fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') - if fd is not FFmpegFD and ( + if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( info_dict.get('section_start') or info_dict.get('section_end')): msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f1a347514..f1d6c369b 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -91,12 +91,11 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): - # Importing GenericIE is currently slow since it imports other extractors - # TODO: Move this back to module level after generalization of embed detection - from .extractor.generic import GenericIE - out = '' if opts.list_extractors: + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE + urls = dict.fromkeys(urls, False) for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index fe3633250..077b29b41 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -20,6 +20,7 @@ from ..utils import ( RetryManager, classproperty, decodeArgument, + deprecation_warning, encodeFilename, format_bytes, join_nonempty, @@ -180,7 +181,9 @@ class FileDownloader: @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - parse_bytes(bytestr) + deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' + 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') + return parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3ca8fe24c..3910c55ad 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -71,6 +71,7 @@ from ..utils import ( str_to_int, strip_or_none, traverse_obj, + truncate_string, try_call, try_get, unescapeHTML, @@ -674,7 +675,8 @@ class InfoExtractor: for _ in range(2): try: self.initialize() - self.write_debug('Extracting URL: %s' % url) + self.to_screen('Extracting URL: %s' % ( + url if self.get_param('verbose') else truncate_string(url, 100, 20))) ie_result = self._real_extract(url) if ie_result is None: return None @@ -1906,6 +1908,14 @@ class InfoExtractor: errnote=None, fatal=True, live=False, data=None, headers={}, query={}): + if not m3u8_url: + if errnote is not False: + errnote = errnote or 'Failed to obtain m3u8 URL' + if fatal: + raise ExtractorError(errnote, video_id=video_id) + self.report_warning(f'{errnote}{bug_reports_message()}') + return [], {} + res = self._download_webpage_handle( m3u8_url, video_id, note='Downloading m3u8 information' if note is None else note, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index bee867aa9..bc574b885 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -535,10 +535,10 @@ def create_parser(): '-I', '--playlist-items', dest='playlist_items', metavar='ITEM_SPEC', default=None, help=( - 'Comma separated playlist_index of the videos to download. ' + 'Comma separated playlist_index of the items to download. ' 'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. ' 'Use negative indices to count from the right and negative STEP to download in reverse order. ' - 'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15')) + 'E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the items at index 1,2,3,7,11,13,15')) selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', @@ -554,7 +554,7 @@ def create_parser(): selection.add_option( '--max-filesize', metavar='SIZE', dest='max_filesize', default=None, - help='Abort download if filesize if larger than SIZE, e.g. 50k or 44.6M') + help='Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M') selection.add_option( '--date', metavar='DATE', dest='date', default=None, @@ -635,7 +635,7 @@ def create_parser(): selection.add_option( '--break-per-input', action='store_true', dest='break_per_url', default=False, - help='--break-on-existing, --break-on-reject, --max-downloads, and autonumber resets per input URL') + help='Alters --max-downloads, --break-on-existing, --break-on-reject, and autonumber to reset per input URL') selection.add_option( '--no-break-per-input', action='store_false', dest='break_per_url', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ed1b24335..a3da3c69e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3872,6 +3872,9 @@ class download_range_func: return (isinstance(other, download_range_func) and self.chapters == other.chapters and self.ranges == other.ranges) + def __repr__(self): + return f'{type(self).__name__}({self.chapters}, {self.ranges})' + def parse_dfxp_time_expr(time_expr): if not time_expr: @@ -5976,7 +5979,7 @@ def truncate_string(s, left, right=0): assert left > 3 and right >= 0 if s is None or len(s) <= left + right: return s - return f'{s[:left-3]}...{s[-right:]}' + return f'{s[:left-3]}...{s[-right:] if right else ""}' def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None): -- cgit v1.2.3 From c53a18f016fe6ff774411d938c9959097f00b44c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 5 Dec 2022 01:06:37 +0530 Subject: [utils] windows_enable_vt_mode: Proper implementation Authored by: Grub4K --- yt_dlp/utils.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a3da3c69e..36170e125 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5579,17 +5579,39 @@ def supports_terminal_sequences(stream): return False -def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 +def windows_enable_vt_mode(): + """Ref: https://bugs.python.org/issue30075 """ if get_windows_version() < (10, 0, 10586): return - global WINDOWS_VT_MODE - try: - Popen.run('', shell=True) - except Exception: - return - WINDOWS_VT_MODE = True - supports_terminal_sequences.cache_clear() + import ctypes + import ctypes.wintypes + import msvcrt + + ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + + dll = ctypes.WinDLL('kernel32', use_last_error=False) + handle = os.open('CONOUT$', os.O_RDWR) + + try: + h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle)) + dw_original_mode = ctypes.wintypes.DWORD() + success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode)) + if not success: + raise Exception('GetConsoleMode failed') + + success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD( + dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) + if not success: + raise Exception('SetConsoleMode failed') + except Exception as e: + write_string(f'WARNING: Cannot enable VT mode - {e}') + else: + global WINDOWS_VT_MODE + WINDOWS_VT_MODE = True + supports_terminal_sequences.cache_clear() + finally: + os.close(handle) _terminal_sequences_re = re.compile('\033\\[[^m]+m') -- cgit v1.2.3 From c4cbd3bebd33d2d77fa340a4035447ab1b9eb3eb Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 4 Dec 2022 22:30:31 +0000 Subject: [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg (#5708) Closes #5706 Authored by: bashonly --- README.md | 1 + yt_dlp/extractor/tiktok.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b6a07da9a..8fdedacf5 100644 --- a/README.md +++ b/README.md @@ -1765,6 +1765,7 @@ The following extractors use this feature: * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### tiktok +* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com` * `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1` * `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221` diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 95223f5de..2dd4510cc 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -30,11 +30,15 @@ class TikTokBaseIE(InfoExtractor): _WORKING_APP_VERSION = None _APP_NAME = 'trill' _AID = 1180 - _API_HOSTNAME = 'api16-normal-c-useast1a.tiktokv.com' _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' _WEBPAGE_HOST = 'https://www.tiktok.com/' QUALITIES = ('360p', '540p', '720p', '1080p') + @property + def _API_HOSTNAME(self): + return self._configuration_arg( + 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0] + @staticmethod def _create_url(user_id, video_id): return f'https://www.tiktok.com/@{user_id or "_"}/video/{video_id}' @@ -398,7 +402,7 @@ class TikTokBaseIE(InfoExtractor): class TikTokIE(TikTokBaseIE): - _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P[\w\.-]+)/video)/(?P\d+)' + _VALID_URL = r'https?://www\.tiktok\.com/(?:embed|@(?P[\w\.-]+)?/video)/(?P\d+)' _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P{_VALID_URL})'] _TESTS = [{ @@ -944,8 +948,27 @@ class TikTokVMIE(InfoExtractor): 'creator': 'SigmaChad', }, }, { - 'url': 'https://vm.tiktok.com/ZSe4FqkKd', - 'only_matching': True, + 'url': 'https://vm.tiktok.com/ZTR45GpSF/', + 'info_dict': { + 'id': '7106798200794926362', + 'ext': 'mp4', + 'title': 'md5:edc3e7ea587847f8537468f2fe51d074', + 'uploader_id': '6997695878846268418', + 'upload_date': '20220608', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'thumbnail': r're:https://.+\.webp.*', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAdZ_NcPPgMneaGrW0hN8O_J_bwLshwNNERRF5DxOw2HKIzk0kdlLrR8RkVl1ksrMO', + 'duration': 29, + 'timestamp': 1654680400, + 'repost_count': int, + 'artist': 'Akihitoko', + 'track': 'original sound', + 'description': 'md5:edc3e7ea587847f8537468f2fe51d074', + 'uploader': 'akihitoko1', + 'creator': 'Akihitoko', + }, }, { 'url': 'https://vt.tiktok.com/ZSe4FqkKd', 'only_matching': True, -- cgit v1.2.3 From 935bac1e4de35107a15ea2ad45402f507527dcfb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 6 Dec 2022 00:35:08 +0530 Subject: Fix `--cookies-from-browser` CLI parsing Closes #5716 --- yt_dlp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f1d6c369b..202f102ba 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -350,7 +350,7 @@ def validate_options(opts): mobj = re.fullmatch(r'''(?x) (?P[^+:]+) (?:\s*\+\s*(?P[^:]+))? - (?:\s*:\s*(?P.+?))? + (?:\s*:\s*(?!:)(?P.+?))? (?:\s*::\s*(?P.+))? ''', opts.cookiesfrombrowser) if mobj is None: -- cgit v1.2.3 From 7991ae57a800316930e20a15df8314616c5cba8f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 8 Dec 2022 17:17:16 +0530 Subject: [extractor/sibnet] Separate from VKIE Fixes https://github.com/yt-dlp/yt-dlp/commit/bfd973ece3369c593b5e82a88cc16de80088a73e#commitcomment-91834251 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/generic.py | 5 ----- yt_dlp/extractor/sibnet.py | 17 +++++++++++++++++ yt_dlp/extractor/vk.py | 6 +++--- 4 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 yt_dlp/extractor/sibnet.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 2fe15f6d2..137284089 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1639,6 +1639,7 @@ from .shared import ( VivoIE, ) from .sharevideos import ShareVideosEmbedIE +from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE from .simplecast import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2fcbc6f43..190aff331 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1864,11 +1864,6 @@ class GenericIE(InfoExtractor): 'title': 'I AM BIO Podcast | BIO', }, 'playlist_mincount': 52, - }, - { - # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed) - 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', - 'only_matching': True, }, { # WimTv embed player 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', diff --git a/yt_dlp/extractor/sibnet.py b/yt_dlp/extractor/sibnet.py new file mode 100644 index 000000000..73bb75d8f --- /dev/null +++ b/yt_dlp/extractor/sibnet.py @@ -0,0 +1,17 @@ +from .common import InfoExtractor + + +class SibnetEmbedIE(InfoExtractor): + # Ref: https://help.sibnet.ru/?sibnet_video_embed + _VALID_URL = False + _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1'] + _WEBPAGE_TESTS = [{ + 'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html', + 'info_dict': { + 'id': 'shell', # FIXME? + 'ext': 'mp4', + 'age_limit': 0, + 'thumbnail': 'https://video.sibnet.ru/upload/cover/video_1887072_0.jpg', + 'title': 'КВН Москва не сразу строилась - Девушка впервые играет в Mortal Kombat', + } + }] diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 347aa381d..0fb95c863 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from .dailymotion import DailymotionIE from .odnoklassniki import OdnoklassnikiIE from .pladform import PladformIE +from .sibnet import SibnetEmbedIE from .vimeo import VimeoIE from .youtube import YoutubeIE from ..compat import compat_urlparse @@ -101,8 +102,7 @@ class VKIE(VKBaseIE): (?P-?\d+_\d+)(?:.*\blist=(?P([\da-f]+)|(ln-[\da-zA-Z]+)))? ) ''' - # https://help.sibnet.ru/?sibnet_video_embed - _EMBED_REGEX = [r']+\bsrc=(["\'])(?P(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1'] + _TESTS = [ { 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', @@ -455,7 +455,7 @@ class VKIE(VKBaseIE): if odnoklassniki_url: return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) - sibnet_url = next(self._extract_embed_urls(url, info_page), None) + sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None) if sibnet_url: return self.url_result(sibnet_url) -- cgit v1.2.3 From 42ec478fc4abe4131a0908881673a19aa750bc97 Mon Sep 17 00:00:00 2001 From: David Turner <547637+digitall@users.noreply.github.com> Date: Thu, 8 Dec 2022 12:38:52 +0000 Subject: [extractor/plutotv] Fix videos with non-zero start (#5745) Authored by: digitall --- yt_dlp/extractor/plutotv.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 71a05cc7a..caffeb21d 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -84,6 +84,17 @@ class PlutoTVIE(InfoExtractor): }, { 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', 'only_matching': True, + }, + { + 'url': 'https://pluto.tv/en/on-demand/movies/attack-of-the-killer-tomatoes-1977-1-1-ptv1', + 'md5': '7db56369c0da626a32d505ec6eb3f89f', + 'info_dict': { + 'id': '5b190c7bb0875c36c90c29c4', + 'ext': 'mp4', + 'title': 'Attack of the Killer Tomatoes', + 'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)', + 'duration': 5700, + } } ] @@ -103,7 +114,7 @@ class PlutoTVIE(InfoExtractor): compat_urlparse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) continue first_segment_url = re.search( - r'^(https?://.*/).+\-0+\.ts$', res, + r'^(https?://.*/).+\-0+[0-1]0\.ts$', res, re.MULTILINE) if first_segment_url: m3u8_urls.add( -- cgit v1.2.3 From dfc186d4220081fdf7184347187639b15ab68a2f Mon Sep 17 00:00:00 2001 From: lkw123 <2020393267@qq.com> Date: Thu, 8 Dec 2022 20:43:29 +0800 Subject: [extractor/xiami] Remove extractors (#5711) Authored by: synthpop123 --- supportedsites.md | 4 - yt_dlp/extractor/_extractors.py | 6 -- yt_dlp/extractor/xiami.py | 198 ---------------------------------------- 3 files changed, 208 deletions(-) delete mode 100644 yt_dlp/extractor/xiami.py diff --git a/supportedsites.md b/supportedsites.md index d7565c139..fbada177e 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1624,10 +1624,6 @@ - **XHamster** - **XHamsterEmbed** - **XHamsterUser** - - **xiami:album**: 虾米音乐 - 专辑 - - **xiami:artist**: 虾米音乐 - 歌手 - - **xiami:collection**: 虾米音乐 - 精选集 - - **xiami:song**: 虾米音乐 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **xinpianchang**: xinpianchang.com diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 137284089..54ac1b730 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2236,12 +2236,6 @@ from .xhamster import ( XHamsterEmbedIE, XHamsterUserIE, ) -from .xiami import ( - XiamiSongIE, - XiamiAlbumIE, - XiamiArtistIE, - XiamiCollectionIE -) from .ximalaya import ( XimalayaIE, XimalayaAlbumIE diff --git a/yt_dlp/extractor/xiami.py b/yt_dlp/extractor/xiami.py deleted file mode 100644 index 71b2956a8..000000000 --- a/yt_dlp/extractor/xiami.py +++ /dev/null @@ -1,198 +0,0 @@ -from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote -from ..utils import int_or_none - - -class XiamiBaseIE(InfoExtractor): - _API_BASE_URL = 'https://emumo.xiami.com/song/playlist/cat/json/id' - - def _download_webpage_handle(self, *args, **kwargs): - webpage = super(XiamiBaseIE, self)._download_webpage_handle(*args, **kwargs) - if '>Xiami is currently not available in your country.<' in webpage: - self.raise_geo_restricted('Xiami is currently not available in your country') - return webpage - - def _extract_track(self, track, track_id=None): - track_name = track.get('songName') or track.get('name') or track['subName'] - artist = track.get('artist') or track.get('artist_name') or track.get('singers') - title = '%s - %s' % (artist, track_name) if artist else track_name - track_url = self._decrypt(track['location']) - - subtitles = {} - lyrics_url = track.get('lyric_url') or track.get('lyric') - if lyrics_url and lyrics_url.startswith('http'): - subtitles['origin'] = [{'url': lyrics_url}] - - return { - 'id': track.get('song_id') or track_id, - 'url': track_url, - 'title': title, - 'thumbnail': track.get('pic') or track.get('album_pic'), - 'duration': int_or_none(track.get('length')), - 'creator': track.get('artist', '').split(';')[0], - 'track': track_name, - 'track_number': int_or_none(track.get('track')), - 'album': track.get('album_name') or track.get('title'), - 'artist': artist, - 'subtitles': subtitles, - } - - def _extract_tracks(self, item_id, referer, typ=None): - playlist = self._download_json( - '%s/%s%s' % (self._API_BASE_URL, item_id, '/type/%s' % typ if typ else ''), - item_id, headers={ - 'Referer': referer, - }) - return [ - self._extract_track(track, item_id) - for track in playlist['data']['trackList']] - - @staticmethod - def _decrypt(origin): - n = int(origin[0]) - origin = origin[1:] - short_length = len(origin) // n - long_num = len(origin) - short_length * n - l = tuple() - for i in range(0, n): - length = short_length - if i < long_num: - length += 1 - l += (origin[0:length], ) - origin = origin[length:] - ans = '' - for i in range(0, short_length + 1): - for j in range(0, n): - if len(l[j]) > i: - ans += l[j][i] - return compat_urllib_parse_unquote(ans).replace('^', '0') - - -class XiamiSongIE(XiamiBaseIE): - IE_NAME = 'xiami:song' - IE_DESC = '虾米音乐' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/song/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'http://www.xiami.com/song/1775610518', - 'md5': '521dd6bea40fd5c9c69f913c232cb57e', - 'info_dict': { - 'id': '1775610518', - 'ext': 'mp3', - 'title': 'HONNE - Woman', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'duration': 265, - 'creator': 'HONNE', - 'track': 'Woman', - 'album': 'Woman', - 'artist': 'HONNE', - 'subtitles': { - 'origin': [{ - 'ext': 'lrc', - }], - }, - }, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/song/1775256504', - 'md5': '932a3abd45c6aa2b1fdbe028fcb4c4fc', - 'info_dict': { - 'id': '1775256504', - 'ext': 'mp3', - 'title': '戴荃 - 悟空', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'duration': 200, - 'creator': '戴荃', - 'track': '悟空', - 'album': '悟空', - 'artist': '戴荃', - 'subtitles': { - 'origin': [{ - 'ext': 'lrc', - }], - }, - }, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/song/1775953850', - 'info_dict': { - 'id': '1775953850', - 'ext': 'mp3', - 'title': 'До Скону - Чума Пожирает Землю', - 'thumbnail': r're:http://img\.xiami\.net/images/album/.*\.jpg', - 'duration': 683, - 'creator': 'До Скону', - 'track': 'Чума Пожирает Землю', - 'track_number': 7, - 'album': 'Ад', - 'artist': 'До Скону', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.xiami.com/song/xLHGwgd07a1', - 'only_matching': True, - }] - - def _real_extract(self, url): - return self._extract_tracks(self._match_id(url), url)[0] - - -class XiamiPlaylistBaseIE(XiamiBaseIE): - def _real_extract(self, url): - item_id = self._match_id(url) - return self.playlist_result(self._extract_tracks(item_id, url, self._TYPE), item_id) - - -class XiamiAlbumIE(XiamiPlaylistBaseIE): - IE_NAME = 'xiami:album' - IE_DESC = '虾米音乐 - 专辑' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/album/(?P[^/?#&]+)' - _TYPE = '1' - _TESTS = [{ - 'url': 'http://www.xiami.com/album/2100300444', - 'info_dict': { - 'id': '2100300444', - }, - 'playlist_count': 10, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/album/512288?spm=a1z1s.6843761.1110925389.6.hhE9p9', - 'only_matching': True, - }, { - 'url': 'http://www.xiami.com/album/URVDji2a506', - 'only_matching': True, - }] - - -class XiamiArtistIE(XiamiPlaylistBaseIE): - IE_NAME = 'xiami:artist' - IE_DESC = '虾米音乐 - 歌手' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/artist/(?P[^/?#&]+)' - _TYPE = '2' - _TESTS = [{ - 'url': 'http://www.xiami.com/artist/2132?spm=0.0.0.0.dKaScp', - 'info_dict': { - 'id': '2132', - }, - 'playlist_count': 20, - 'skip': 'Georestricted', - }, { - 'url': 'http://www.xiami.com/artist/bC5Tk2K6eb99', - 'only_matching': True, - }] - - -class XiamiCollectionIE(XiamiPlaylistBaseIE): - IE_NAME = 'xiami:collection' - IE_DESC = '虾米音乐 - 精选集' - _VALID_URL = r'https?://(?:www\.)?xiami\.com/collect/(?P[^/?#&]+)' - _TYPE = '3' - _TEST = { - 'url': 'http://www.xiami.com/collect/156527391?spm=a1z1s.2943601.6856193.12.4jpBnr', - 'info_dict': { - 'id': '156527391', - }, - 'playlist_mincount': 29, - 'skip': 'Georestricted', - } -- cgit v1.2.3 From 28b8f57b4b2a2e1bd1fbe68ae1ab2c44fdd51992 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 8 Dec 2022 22:58:36 +0900 Subject: [extractor/noice] Add NoicePodcast extractor (#5621) Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/noice.py | 116 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 yt_dlp/extractor/noice.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 54ac1b730..c9dd7463c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1211,6 +1211,7 @@ from .nintendo import NintendoIE from .nitter import NitterIE from .njpwworld import NJPWWorldIE from .nobelprize import NobelPrizeIE +from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE diff --git a/yt_dlp/extractor/noice.py b/yt_dlp/extractor/noice.py new file mode 100644 index 000000000..e6e343303 --- /dev/null +++ b/yt_dlp/extractor/noice.py @@ -0,0 +1,116 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + int_or_none, + parse_iso8601, + traverse_obj, + variadic, +) + + +class NoicePodcastIE(InfoExtractor): + _VALID_URL = r'https?://open\.noice\.id/content/(?P[a-fA-F0-9-]+)' + _TESTS = [{ + 'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2', + 'info_dict': { + 'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2', + 'ext': 'm4a', + 'season': 'Season 1', + 'description': 'md5:58d1274e6857b6fbbecf47075885380d', + 'release_date': '20221115', + 'timestamp': 1668496642, + 'season_number': 1, + 'upload_date': '20221115', + 'release_timestamp': 1668496642, + 'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)', + 'modified_date': '20221121', + 'categories': ['Bisnis dan Keuangan'], + 'duration': 3567, + 'modified_timestamp': 1669030647, + 'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560', + 'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832', + 'like_count': int, + 'channel': 'Noice Space Talks', + 'comment_count': int, + 'dislike_count': int, + 'channel_follower_count': int, + } + }, { + 'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063', + 'info_dict': { + 'id': '222134e4-99f2-456f-b8a2-b8be404bf063', + 'ext': 'm4a', + 'release_timestamp': 1653488220, + 'description': 'md5:35074f6190cef52b05dd133bb2ef460e', + 'upload_date': '20220525', + 'timestamp': 1653460637, + 'release_date': '20220525', + 'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625', + 'title': 'Eps 1: Dijodohin Sama Anak Pak RT', + 'modified_timestamp': 1669030647, + 'season_number': 1, + 'modified_date': '20221121', + 'categories': ['Cerita dan Drama'], + 'duration': 1830, + 'season': 'Season 1', + 'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74', + 'dislike_count': int, + 'like_count': int, + 'comment_count': int, + 'channel': 'Dear Jerome', + 'channel_follower_count': int, + } + }] + + def _get_formats_and_subtitles(self, media_url, video_id): + formats, subtitles = [], {} + for url in variadic(media_url): + ext = determine_ext(url) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': url, + 'ext': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + }) + return formats, subtitles + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails'] + + media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), )) + formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id) + + return { + 'id': nextjs_data.get('id') or display_id, + 'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage), + 'formats': formats, + 'subtitles': subtitles, + 'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription')) + or self._html_search_meta(['description', 'og:description'], webpage)), + 'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage), + 'timestamp': parse_iso8601(nextjs_data.get('createdAt')), + 'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')), + 'modified_timestamp': parse_iso8601( + nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)), + 'duration': int_or_none(nextjs_data.get('duration')), + 'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')), + 'season': nextjs_data.get('seasonName'), + 'season_number': int_or_none(nextjs_data.get('seasonNumber')), + 'channel': traverse_obj(nextjs_data, ('catalog', 'title')), + 'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'), + **traverse_obj(nextjs_data, ('meta', 'aggregations', { + 'like_count': 'likes', + 'dislike_count': 'dislikes', + 'comment_count': 'comments', + 'channel_follower_count': 'followers', + })) + } -- cgit v1.2.3 From 839e2a62ae977ae51b1fcec50a8af3d28e1d230c Mon Sep 17 00:00:00 2001 From: MMM Date: Thu, 8 Dec 2022 17:32:17 +0100 Subject: [extractor/rumble] Add RumbleIE extractor (#5515) Closes #2846 Authored by: flashdagger --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/rumble.py | 102 ++++++++++++++++++++++++++++++++-------- 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c9dd7463c..b1d0a9fb0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1568,6 +1568,7 @@ from .ruhd import RUHDIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleEmbedIE, + RumbleIE, RumbleChannelIE, ) from .rutube import ( diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index 102615c60..b7f798ffb 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -4,11 +4,15 @@ import re from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( + ExtractorError, + UnsupportedError, + clean_html, + get_element_by_class, int_or_none, + parse_count, parse_iso8601, traverse_obj, unescapeHTML, - ExtractorError, ) @@ -111,24 +115,6 @@ class RumbleEmbedIE(InfoExtractor): }] _WEBPAGE_TESTS = [ - { - 'note': 'Rumble embed', - 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', - 'md5': '53af34098a7f92c4e51cf0bd1c33f009', - 'info_dict': { - 'id': 'vb0ofn', - 'ext': 'mp4', - 'timestamp': 1612662578, - 'uploader': 'LovingMontana', - 'channel': 'LovingMontana', - 'upload_date': '20210207', - 'title': 'Winter-loving dog helps girls dig a snow fort ', - 'channel_url': 'https://rumble.com/c/c-546523', - 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg', - 'duration': 103, - 'live_status': 'not_live', - } - }, { 'note': 'Rumble JS embed', 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', @@ -235,6 +221,84 @@ class RumbleEmbedIE(InfoExtractor): } +class RumbleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?Pv(?!ideos)[\w.-]+)[^/]*$' + _EMBED_REGEX = [r'/v[\w.-]+\.html)>'] + _TESTS = [{ + 'add_ie': ['RumbleEmbed'], + 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', + 'md5': '53af34098a7f92c4e51cf0bd1c33f009', + 'info_dict': { + 'id': 'vb0ofn', + 'ext': 'mp4', + 'timestamp': 1612662578, + 'uploader': 'LovingMontana', + 'channel': 'LovingMontana', + 'upload_date': '20210207', + 'title': 'Winter-loving dog helps girls dig a snow fort ', + 'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!', + 'channel_url': 'https://rumble.com/c/c-546523', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 103, + 'like_count': int, + 'view_count': int, + 'live_status': 'not_live', + } + }, { + 'url': 'http://www.rumble.com/vDMUM1?key=value', + 'only_matching': True, + }] + + _WEBPAGE_TESTS = [{ + 'url': 'https://rumble.com/videos?page=2', + 'playlist_count': 25, + 'info_dict': { + 'id': 'videos?page=2', + 'title': 'All videos', + 'description': 'Browse videos uploaded to Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/live-videos', + 'playlist_mincount': 19, + 'info_dict': { + 'id': 'live-videos', + 'title': 'Live Videos', + 'description': 'Live videos on Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/search/video?q=rumble&sort=views', + 'playlist_count': 24, + 'info_dict': { + 'id': 'video?q=rumble&sort=views', + 'title': 'Search results for: rumble', + 'age_limit': 0, + }, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None) + if not url_info: + raise UnsupportedError(url) + + release_ts_str = self._search_regex( + r'(?:Livestream begins|Streamed on):\s+

([^<]+)\s+playlist\s*<', webpage, 'playlist title', + r'([^<]+)\s+playlist\s*<', webpage, 'playlist title', fatal=False) return self.playlist_result(entries, playlist_id, title) -- cgit v1.2.3 From 153e88a75151a51cc2a2fbf02d62f66fc09b29d9 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 29 Dec 2022 17:12:07 +0900 Subject: [extractor/netverse] Add `NetverseSearch` extractor (#5838) Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/netverse.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 672eb9596..1b76d8264 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1160,6 +1160,7 @@ from .neteasemusic import ( from .netverse import ( NetverseIE, NetversePlaylistIE, + NetverseSearchIE, ) from .newgrounds import ( NewgroundsIE, diff --git a/yt_dlp/extractor/netverse.py b/yt_dlp/extractor/netverse.py index 3c4fd92eb..398198a1b 100644 --- a/yt_dlp/extractor/netverse.py +++ b/yt_dlp/extractor/netverse.py @@ -1,6 +1,6 @@ import itertools -from .common import InfoExtractor +from .common import InfoExtractor, SearchInfoExtractor from .dailymotion import DailymotionIE from ..utils import smuggle_url, traverse_obj @@ -251,3 +251,31 @@ class NetversePlaylistIE(NetverseBaseIE): self.parse_playlist(playlist_data['response'], playlist_id), traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')), traverse_obj(playlist_data, ('response', 'webseries_info', 'title'))) + + +class NetverseSearchIE(SearchInfoExtractor): + _SEARCH_KEY = 'netsearch' + + _TESTS = [{ + 'url': 'netsearch10:tetangga', + 'info_dict': { + 'id': 'tetangga', + 'title': 'tetangga', + }, + 'playlist_count': 10, + }] + + def _search_results(self, query): + last_page = None + for i in itertools.count(1): + search_data = self._download_json( + 'https://api.netverse.id/search/elastic/search', query, + query={'q': query, 'page': i}, note=f'Downloading page {i}') + + videos = traverse_obj(search_data, ('response', 'data', ...)) + for video in videos: + yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE) + + last_page = last_page or traverse_obj(search_data, ('response', 'lastpage')) + if not videos or i >= (last_page or 0): + break -- cgit v1.2.3 From 9a9006ba20f1f9f34183e1bde098c75502a018f8 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 29 Dec 2022 06:15:38 -0500 Subject: [extractor/twitcasting] Fix videos with password (#5894) Closes #5888 Authored by: bashonly, Spicadox --- yt_dlp/extractor/twitcasting.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 735cb0bb0..2548dae04 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -38,7 +38,7 @@ class TwitCastingIE(InfoExtractor): 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20110822', - 'timestamp': 1314010824, + 'timestamp': 1313978424, 'duration': 32, 'view_count': int, }, @@ -52,10 +52,10 @@ class TwitCastingIE(InfoExtractor): 'ext': 'mp4', 'title': 'Live playing something #3689740', 'uploader_id': 'mttbernardini', - 'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.', + 'description': 'md5:1dc7efa2f1ab932fcd119265cebeec69', 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20120212', - 'timestamp': 1329028024, + 'upload_date': '20120211', + 'timestamp': 1328995624, 'duration': 681, 'view_count': int, }, @@ -64,15 +64,22 @@ class TwitCastingIE(InfoExtractor): 'videopassword': 'abc', }, }, { - 'note': 'archive is split in 2 parts', 'url': 'https://twitcasting.tv/loft_heaven/movie/685979292', 'info_dict': { 'id': '685979292', 'ext': 'mp4', - 'title': '南波一海のhear_here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”', - 'duration': 6964.599334, + 'title': '【無料配信】南波一海のhear/here “ナタリー望月哲さんに聞く編集と「渋谷系狂騒曲」”', + 'uploader_id': 'loft_heaven', + 'description': 'md5:3a0c7b53019df987ce545c935538bacf', + 'upload_date': '20210604', + 'timestamp': 1622802114, + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 6964, + 'view_count': int, + }, + 'params': { + 'skip_download': True, }, - 'playlist_mincount': 2, }] def _parse_data_movie_playlist(self, dmp, video_id): @@ -88,15 +95,18 @@ class TwitCastingIE(InfoExtractor): def _real_extract(self, url): uploader_id, video_id = self._match_valid_url(url).groups() + webpage, urlh = self._download_webpage_handle(url, video_id) video_password = self.get_param('videopassword') request_data = None if video_password: request_data = urlencode_postdata({ 'password': video_password, + **self._hidden_inputs(webpage), }, encoding='utf-8') - webpage, urlh = self._download_webpage_handle( - url, video_id, data=request_data, - headers={'Origin': 'https://twitcasting.tv'}) + webpage, urlh = self._download_webpage_handle( + url, video_id, data=request_data, + headers={'Origin': 'https://twitcasting.tv'}, + note='Trying video password') if urlh.geturl() != url and request_data: webpage = self._download_webpage( urlh.geturl(), video_id, data=request_data, @@ -122,7 +132,7 @@ class TwitCastingIE(InfoExtractor): duration = (try_get(video_js_data, lambda x: sum(float_or_none(y.get('duration')) for y in x) / 1000) or parse_duration(clean_html(get_element_by_class('tw-player-duration-time', webpage)))) view_count = str_to_int(self._search_regex( - (r'Total\s*:\s*([\d,]+)\s*Views', r'総視聴者\s*:\s*([\d,]+)\s*]+datetime="([^"]+)"', webpage, 'datetime', None)) -- cgit v1.2.3 From 3d667e0047915c32f5df9fdd86a4223dc0e9ce8f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 12:03:03 +0000 Subject: [extractor/slideslive] Support embeds and slides (#5784) Authored by: bashonly, Grub4K, pukkandan --- yt_dlp/extractor/slideslive.py | 390 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 362 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index 86c26a8a2..4268bfeaf 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -1,16 +1,24 @@ +import re +import urllib.parse + from .common import InfoExtractor from ..utils import ( + ExtractorError, + int_or_none, + parse_qs, smuggle_url, traverse_obj, unified_timestamp, + update_url_query, url_or_none, + xpath_text, ) class SlidesLiveIE(InfoExtractor): - _VALID_URL = r'https?://slideslive\.com/(?P[0-9]+)' + _VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P[0-9]+)' _TESTS = [{ - # service_name = yoda + # service_name = yoda, only XML slides info 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', 'info_dict': { 'id': '38902413', @@ -19,12 +27,14 @@ class SlidesLiveIE(InfoExtractor): 'timestamp': 1648189972, 'upload_date': '20220325', 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:42', + 'chapters': 'count:41', }, 'params': { 'skip_download': 'm3u8', }, }, { - # service_name = yoda + # service_name = yoda, /v7/ slides 'url': 'https://slideslive.com/38935785', 'info_dict': { 'id': '38935785', @@ -32,13 +42,15 @@ class SlidesLiveIE(InfoExtractor): 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges', 'upload_date': '20211115', 'timestamp': 1636996003, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:640', + 'chapters': 'count:639', }, 'params': { 'skip_download': 'm3u8', }, }, { - # service_name = yoda + # service_name = yoda, /v1/ slides 'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics', 'info_dict': { 'id': '38973182', @@ -47,12 +59,14 @@ class SlidesLiveIE(InfoExtractor): 'upload_date': '20220201', 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1643728135, + 'thumbnails': 'count:3', + 'chapters': 'count:2', }, 'params': { 'skip_download': 'm3u8', }, }, { - # service_name = youtube + # service_name = youtube, only XML slides info 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost', 'md5': '8a79b5e3d700837f40bd2afca3c8fa01', 'info_dict': { @@ -76,26 +90,253 @@ class SlidesLiveIE(InfoExtractor): 'comment_count': int, 'channel_follower_count': int, 'age_limit': 0, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', + 'thumbnails': 'count:169', 'playable_in_embed': True, 'availability': 'unlisted', 'tags': [], 'categories': ['People & Blogs'], + 'chapters': 'count:168', + }, + }, { + # embed-only presentation, only XML slides info + 'url': 'https://slideslive.com/embed/presentation/38925850', + 'info_dict': { + 'id': '38925850', + 'ext': 'mp4', + 'title': 'Towards a Deep Network Architecture for Structured Smoothness', + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:8', + 'timestamp': 1629671508, + 'upload_date': '20210822', + 'chapters': 'count:7', + }, + 'params': { + 'skip_download': 'm3u8', }, }, { - # service_name = youtube + # embed-only presentation, only JSON slides info, /v5/ slides (.png) + 'url': 'https://slideslive.com/38979920/', + 'info_dict': { + 'id': '38979920', + 'ext': 'mp4', + 'title': 'MoReL: Multi-omics Relational Learning', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:7', + 'timestamp': 1654714970, + 'upload_date': '20220608', + 'chapters': 'count:6', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v2/ slides (.jpg) + 'url': 'https://slideslive.com/38954074', + 'info_dict': { + 'id': '38954074', + 'ext': 'mp4', + 'title': 'Decentralized Attribution of Generative Models', + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:16', + 'timestamp': 1622806321, + 'upload_date': '20210604', + 'chapters': 'count:15', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v4/ slides (.png) + 'url': 'https://slideslive.com/38979570/', + 'info_dict': { + 'id': '38979570', + 'ext': 'mp4', + 'title': 'Efficient Active Search for Combinatorial Optimization Problems', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:9', + 'timestamp': 1654714896, + 'upload_date': '20220608', + 'chapters': 'count:8', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v10/ slides + 'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F', + 'info_dict': { + 'id': '38979880', + 'ext': 'mp4', + 'title': 'The Representation Power of Neural Networks', + 'timestamp': 1654714962, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:22', + 'upload_date': '20220608', + 'chapters': 'count:21', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v7/ slides, 2 video slides + 'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com', + 'playlist_count': 3, + 'info_dict': { + 'id': '38979682-playlist', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models', + }, + 'playlist': [{ + 'info_dict': { + 'id': '38979682', + 'ext': 'mp4', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models', + 'timestamp': 1654714920, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:30', + 'upload_date': '20220608', + 'chapters': 'count:31', + }, + }, { + 'info_dict': { + 'id': '38979682-021', + 'ext': 'mp4', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021', + 'duration': 3, + 'timestamp': 1654714920, + 'upload_date': '20220608', + }, + }, { + 'info_dict': { + 'id': '38979682-024', + 'ext': 'mp4', + 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024', + 'duration': 4, + 'timestamp': 1654714920, + 'upload_date': '20220608', + }, + }], + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v6/ slides, 1 video slide, edit.videoken.com embed + 'url': 'https://slideslive.com/38979481/', + 'playlist_count': 2, + 'info_dict': { + 'id': '38979481-playlist', + 'title': 'How to Train Your MAML to Excel in Few-Shot Classification', + }, + 'playlist': [{ + 'info_dict': { + 'id': '38979481', + 'ext': 'mp4', + 'title': 'How to Train Your MAML to Excel in Few-Shot Classification', + 'timestamp': 1654714877, + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:43', + 'upload_date': '20220608', + 'chapters': 'count:43', + }, + }, { + 'info_dict': { + 'id': '38979481-013', + 'ext': 'mp4', + 'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013', + 'duration': 3, + 'timestamp': 1654714877, + 'upload_date': '20220608', + }, + }], + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # /v3/ slides, .jpg and .png, service_name = youtube + 'url': 'https://slideslive.com/embed/38932460/', + 'info_dict': { + 'id': 'RTPdrgkyTiE', + 'display_id': '38932460', + 'ext': 'mp4', + 'title': 'Active Learning for Hierarchical Multi-Label Classification', + 'description': 'Watch full version of this video at https://slideslive.com/38932460.', + 'channel': 'SlidesLive Videos - A', + 'channel_id': 'UC62SdArr41t_-_fX40QCLRw', + 'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw', + 'uploader': 'SlidesLive Videos - A', + 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', + 'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw', + 'upload_date': '20200903', + 'timestamp': 1602599092, + 'duration': 942, + 'age_limit': 0, + 'live_status': 'not_live', + 'playable_in_embed': True, + 'availability': 'unlisted', + 'categories': ['People & Blogs'], + 'tags': [], + 'channel_follower_count': int, + 'like_count': int, + 'view_count': int, + 'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)', + 'thumbnails': 'count:21', + 'chapters': 'count:20', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + # service_name = yoda 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'only_matching': True, }, { - # service_name = url + # dead link, service_name = url 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1', 'only_matching': True, }, { - # service_name = vimeo + # dead link, service_name = vimeo 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3', 'only_matching': True, }] + _WEBPAGE_TESTS = [{ + # only XML slides info + 'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html', + 'info_dict': { + 'id': '38925850', + 'ext': 'mp4', + 'title': 'Towards a Deep Network Architecture for Structured Smoothness', + 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnails': 'count:8', + 'timestamp': 1629671508, + 'upload_date': '20210822', + 'chapters': 'count:7', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + @classmethod + def _extract_embed_urls(cls, url, webpage): + # Reference: https://slideslive.com/embed_presentation.js + for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage): + url_parsed = urllib.parse.urlparse(url) + origin = f'{url_parsed.scheme}://{url_parsed.netloc}' + yield update_url_query( + f'https://slideslive.com/embed/presentation/{embed_id}', { + 'embed_parent_url': url, + 'embed_container_origin': origin, + }) + + def _download_embed_webpage_handle(self, video_id, headers): + return self._download_webpage_handle( + f'https://slideslive.com/embed/presentation/{video_id}', video_id, + headers=headers, query=traverse_obj(headers, { + 'embed_parent_url': 'Referer', + 'embed_container_origin': 'Origin', + })) + def _extract_custom_m3u8_info(self, m3u8_data): m3u8_dict = {} @@ -108,6 +349,8 @@ class SlidesLiveIE(InfoExtractor): 'VOD-VIDEO-ID': 'service_id', 'VOD-VIDEO-SERVERS': 'video_servers', 'VOD-SUBTITLES': 'subtitles', + 'VOD-SLIDES-JSON-URL': 'slides_json_url', + 'VOD-SLIDES-XML-URL': 'slides_xml_url', } for line in m3u8_data.splitlines(): @@ -126,9 +369,33 @@ class SlidesLiveIE(InfoExtractor): return m3u8_dict + def _extract_formats(self, cdn_hostname, path, video_id): + formats = [] + formats.extend(self._extract_m3u8_formats( + f'https://{cdn_hostname}/{path}/master.m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)) + formats.extend(self._extract_mpd_formats( + f'https://{cdn_hostname}/{path}/master.mpd', + video_id, mpd_id='dash', fatal=False)) + return formats + def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_embed_webpage_handle( + video_id, headers=traverse_obj(parse_qs(url), { + 'Referer': ('embed_parent_url', -1), + 'Origin': ('embed_container_origin', -1)})) + redirect_url = urlh.geturl() + if 'domain_not_allowed' in redirect_url: + domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False) + if not domain: + raise ExtractorError( + 'This is an embed-only presentation. Try passing --referer', expected=True) + webpage, _ = self._download_embed_webpage_handle(video_id, headers={ + 'Referer': f'https://{domain}/', + 'Origin': f'https://{domain}', + }) + player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token') player_data = self._download_webpage( f'https://ben.slideslive.com/player/{video_id}', video_id, @@ -139,6 +406,50 @@ class SlidesLiveIE(InfoExtractor): assert service_name in ('url', 'yoda', 'vimeo', 'youtube') service_id = player_info['service_id'] + slides_info_url = None + slides, slides_info = [], [] + if player_info.get('slides_json_url'): + slides_info_url = player_info['slides_json_url'] + slides = traverse_obj(self._download_json( + slides_info_url, video_id, fatal=False, + note='Downloading slides JSON', errnote=False), 'slides', expected_type=list) or [] + for slide_id, slide in enumerate(slides, start=1): + slides_info.append(( + slide_id, traverse_obj(slide, ('image', 'name')), + int_or_none(slide.get('time'), scale=1000))) + + if not slides and player_info.get('slides_xml_url'): + slides_info_url = player_info['slides_xml_url'] + slides = self._download_xml( + slides_info_url, video_id, fatal=False, + note='Downloading slides XML', errnote='Failed to download slides info') + for slide_id, slide in enumerate(slides.findall('./slide'), start=1): + slides_info.append(( + slide_id, xpath_text(slide, './slideName', 'name'), + int_or_none(xpath_text(slide, './timeSec', 'time')))) + + slides_version = int(self._search_regex( + r'https?://slides\.slideslive\.com/\d+/v(\d+)/\w+\.(?:json|xml)', + slides_info_url, 'slides version', default=0)) + if slides_version < 4: + slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s.jpg' + else: + slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s.png' + + chapters, thumbnails = [], [] + if url_or_none(player_info.get('thumbnail')): + thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']}) + for slide_id, slide_path, start_time in slides_info: + if slide_path: + thumbnails.append({ + 'id': f'{slide_id:03d}', + 'url': slide_url_template % (video_id, slide_path), + }) + chapters.append({ + 'title': f'Slide {slide_id:03d}', + 'start_time': start_time, + }) + subtitles = {} for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict): webvtt_url = url_or_none(sub.get('webvtt_url')) @@ -154,25 +465,15 @@ class SlidesLiveIE(InfoExtractor): 'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''), 'timestamp': unified_timestamp(player_info.get('timestamp')), 'is_live': player_info.get('playlist_type') != 'vod', - 'thumbnail': url_or_none(player_info.get('thumbnail')), + 'thumbnails': thumbnails, + 'chapters': chapters, 'subtitles': subtitles, } - if service_name in ('url', 'yoda'): - if service_name == 'url': - info['url'] = service_id - else: - cdn_hostname = player_info['video_servers'][0] - formats = [] - formats.extend(self._extract_m3u8_formats( - f'https://{cdn_hostname}/{service_id}/master.m3u8', - video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)) - formats.extend(self._extract_mpd_formats( - f'https://{cdn_hostname}/{service_id}/master.mpd', - video_id, mpd_id='dash', fatal=False)) - info.update({ - 'formats': formats, - }) + if service_name == 'url': + info['url'] = service_id + elif service_name == 'yoda': + info['formats'] = self._extract_formats(player_info['video_servers'][0], service_id, video_id) else: info.update({ '_type': 'url_transparent', @@ -185,4 +486,37 @@ class SlidesLiveIE(InfoExtractor): f'https://player.vimeo.com/video/{service_id}', {'http_headers': {'Referer': url}}) - return info + video_slides = traverse_obj(slides, (..., 'video', 'id')) + if not video_slides: + return info + + def entries(): + yield info + + service_data = self._download_json( + f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data', + video_id, fatal=False, query={ + 'player_token': player_token, + 'videos': ','.join(video_slides), + }, note='Downloading video slides info', errnote='Failed to download video slides info') or {} + + for slide_id, slide in enumerate(slides, 1): + if not traverse_obj(slide, ('video', 'service')) == 'yoda': + continue + video_path = traverse_obj(slide, ('video', 'id')) + cdn_hostname = traverse_obj(service_data, ( + video_path, 'video_servers', ...), get_all=False) + if not cdn_hostname or not video_path: + continue + formats = self._extract_formats(cdn_hostname, video_path, video_id) + if not formats: + continue + yield { + 'id': f'{video_id}-{slide_id:03d}', + 'title': f'{info["title"]} - Slide {slide_id:03d}', + 'timestamp': info['timestamp'], + 'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000), + 'formats': formats, + } + + return self.playlist_result(entries(), f'{video_id}-playlist', info['title']) -- cgit v1.2.3 From 4b183d49620e564219c01714ca8639199f6b1cc0 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 14:29:08 +0000 Subject: [extractor/videoken] Add extractors (#5824) Closes #5818 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 7 + yt_dlp/extractor/videoken.py | 336 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+) create mode 100644 yt_dlp/extractor/videoken.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 1b76d8264..e51228aff 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2097,6 +2097,13 @@ from .videocampus_sachsen import ( ) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE +from .videoken import ( + VideoKenIE, + VideoKenPlayerIE, + VideoKenPlaylistIE, + VideoKenCategoryIE, + VideoKenTopicIE, +) from .videomore import ( VideomoreIE, VideomoreVideoIE, diff --git a/yt_dlp/extractor/videoken.py b/yt_dlp/extractor/videoken.py new file mode 100644 index 000000000..560b41a6d --- /dev/null +++ b/yt_dlp/extractor/videoken.py @@ -0,0 +1,336 @@ +import base64 +import functools +import math +import re +import time +import urllib.parse + +from .common import InfoExtractor +from .slideslive import SlidesLiveIE +from ..utils import ( + ExtractorError, + InAdvancePagedList, + int_or_none, + traverse_obj, + update_url_query, + url_or_none, +) + + +class VideoKenBaseIE(InfoExtractor): + _ORGANIZATIONS = { + 'videos.icts.res.in': 'icts', + 'videos.cncf.io': 'cncf', + 'videos.neurips.cc': 'neurips', + } + _BASE_URL_RE = rf'https?://(?P{"|".join(map(re.escape, _ORGANIZATIONS))})/' + + _PAGE_SIZE = 12 + + def _get_org_id_and_api_key(self, org, video_id): + details = self._download_json( + f'https://analytics.videoken.com/api/videolake/{org}/details', video_id, + note='Downloading organization ID and API key', headers={ + 'Accept': 'application/json', + }) + return details['id'], details['apikey'] + + def _create_slideslive_url(self, video_url, video_id, referer): + if not video_url and not video_id: + return + elif not video_url or 'embed/sign-in' in video_url: + video_url = f'https://slideslive.com/embed/{video_id.lstrip("slideslive-")}' + if url_or_none(referer): + return update_url_query(video_url, { + 'embed_parent_url': referer, + 'embed_container_origin': f'https://{urllib.parse.urlparse(referer).netloc}', + }) + return video_url + + def _extract_videos(self, videos, url): + for video in traverse_obj(videos, (('videos', 'results'), ...)): + video_id = traverse_obj(video, 'youtube_id', 'videoid') + if not video_id: + continue + ie_key = None + if traverse_obj(video, 'type', 'source') == 'youtube': + video_url = video_id + ie_key = 'Youtube' + else: + video_url = traverse_obj(video, 'embed_url', 'embeddableurl') + if urllib.parse.urlparse(video_url).netloc == 'slideslive.com': + ie_key = SlidesLiveIE + video_url = self._create_slideslive_url(video_url, video_id, url) + if not video_url: + continue + yield self.url_result(video_url, ie_key, video_id) + + +class VideoKenIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P[\w-]+)' + _TESTS = [{ + # neurips -> videoken -> slideslive + 'url': 'https://videos.neurips.cc/video/slideslive-38922815', + 'info_dict': { + 'id': '38922815', + 'ext': 'mp4', + 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures', + 'timestamp': 1630939331, + 'upload_date': '20210906', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:330', + 'chapters': 'count:329', + }, + 'params': { + 'skip_download': 'm3u8', + }, + 'expected_warnings': ['Failed to download VideoKen API JSON'], + }, { + # neurips -> videoken -> slideslive -> youtube + 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348', + 'info_dict': { + 'id': '2Xa_dt78rJE', + 'ext': 'mp4', + 'display_id': '38923348', + 'title': 'Machine Education', + 'description': 'Watch full version of this video at https://slideslive.com/38923348.', + 'channel': 'SlidesLive Videos - G2', + 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w', + 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', + 'uploader': 'SlidesLive Videos - G2', + 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w', + 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', + 'duration': 2504, + 'timestamp': 1618922125, + 'upload_date': '20200131', + 'age_limit': 0, + 'channel_follower_count': int, + 'view_count': int, + 'availability': 'unlisted', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'categories': ['People & Blogs'], + 'tags': [], + 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', + 'thumbnails': 'count:78', + 'chapters': 'count:77', + }, + 'params': { + 'skip_download': 'm3u8', + }, + 'expected_warnings': ['Failed to download VideoKen API JSON'], + }, { + # icts -> videoken -> youtube + 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc', + 'info_dict': { + 'id': 'zysIsojYdvc', + 'ext': 'mp4', + 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad', + 'description': 'md5:87433069d79719eeadc1962cc2ace00b', + 'channel': 'International Centre for Theoretical Sciences', + 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ', + 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ', + 'uploader': 'International Centre for Theoretical Sciences', + 'uploader_id': 'ICTStalks', + 'uploader_url': 'http://www.youtube.com/user/ICTStalks', + 'duration': 3372, + 'upload_date': '20191004', + 'age_limit': 0, + 'live_status': 'not_live', + 'availability': 'public', + 'playable_in_embed': True, + 'channel_follower_count': int, + 'like_count': int, + 'view_count': int, + 'categories': ['Science & Technology'], + 'tags': [], + 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', + 'thumbnails': 'count:42', + 'chapters': 'count:20', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, { + 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8', + 'only_matching': True, + }, { + 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI', + 'only_matching': True, + }, { + 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU', + 'only_matching': True, + }] + + def _real_extract(self, url): + hostname, video_id = self._match_valid_url(url).group('host', 'id') + org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id) + details = self._download_json( + 'https://analytics.videoken.com/api/videoinfo_private', video_id, query={ + 'videoid': video_id, + 'org_id': org_id, + }, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON', + errnote='Failed to download VideoKen API JSON', fatal=False) + if details: + return next(self._extract_videos({'videos': [details]}, url)) + # fallback for API error 400 response + elif video_id.startswith('slideslive-'): + return self.url_result( + self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) + elif re.match(r'^[\w-]{11}$', video_id): + self.url_result(video_id, 'Youtube', video_id) + else: + raise ExtractorError('Unable to extract without VideoKen API response') + + +class VideoKenPlayerIE(VideoKenBaseIE): + _VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P\d+)' + _TESTS = [{ + 'url': 'https://player.videoken.com/embed/slideslive-38968434', + 'info_dict': { + 'id': '38968434', + 'ext': 'mp4', + 'title': 'Deep Learning with Label Differential Privacy', + 'timestamp': 1643377020, + 'upload_date': '20220128', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)', + 'thumbnails': 'count:30', + 'chapters': 'count:29', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return self.url_result( + self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) + + +class VideoKenPlaylistIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P\d+)' + _TESTS = [{ + 'url': 'https://videos.icts.res.in/category/1822/playlist/381', + 'playlist_mincount': 117, + 'info_dict': { + 'id': '381', + 'title': 'Cosmology - The Next Decade', + }, + }] + + def _real_extract(self, url): + hostname, playlist_id = self._match_valid_url(url).group('host', 'id') + org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id) + videos = self._download_json( + f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/', + playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON') + return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title')) + + +class VideoKenCategoryIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P\d+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://videos.icts.res.in/category/1822/', + 'playlist_mincount': 500, + 'info_dict': { + 'id': '1822', + 'title': 'Programs', + }, + }, { + 'url': 'https://videos.neurips.cc/category/350/', + 'playlist_mincount': 34, + 'info_dict': { + 'id': '350', + 'title': 'NeurIPS 2018', + }, + }, { + 'url': 'https://videos.cncf.io/category/479/', + 'playlist_mincount': 328, + 'info_dict': { + 'id': '479', + 'title': 'KubeCon + CloudNativeCon Europe\'19', + }, + }] + + def _get_category_page(self, category_id, org_id, page=1, note=None): + return self._download_json( + f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id, + fatal=False, note=note if note else f'Downloading category page {page}', + query={ + 'category_id': category_id, + 'page_number': page, + 'length': self._PAGE_SIZE, + }, headers={'Accept': 'application/json'}) or {} + + def _entries(self, category_id, org_id, url, page): + videos = self._get_category_page(category_id, org_id, page + 1) + yield from self._extract_videos(videos, url) + + def _real_extract(self, url): + hostname, category_id = self._match_valid_url(url).group('host', 'id') + org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id) + category_info = self._get_category_page(category_id, org_id, note='Downloading category info') + category = category_info['category_name'] + total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, category_id, org_id, url), + total_pages, self._PAGE_SIZE), category_id, category) + + +class VideoKenTopicIE(VideoKenBaseIE): + _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P[^/#?]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://videos.neurips.cc/topic/machine%20learning/', + 'playlist_mincount': 500, + 'info_dict': { + 'id': 'machine_learning', + 'title': 'machine learning', + }, + }, { + 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/', + 'playlist_mincount': 77, + 'info_dict': { + 'id': 'gravitational_waves', + 'title': 'gravitational waves' + }, + }, { + 'url': 'https://videos.cncf.io/topic/prometheus/', + 'playlist_mincount': 134, + 'info_dict': { + 'id': 'prometheus', + 'title': 'prometheus', + }, + }] + + def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None): + return self._download_json( + 'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={ + 'orgid': org_id, + 'size': self._PAGE_SIZE, + 'query': topic, + 'page': page, + 'sort': 'upload_desc', + 'filter': 'all', + 'token': api_key, + 'is_topic': 'true', + 'category': '', + 'searchid': search_id, + }, headers={'Accept': 'application/json'}, + note=note if note else f'Downloading topic page {page}') or {} + + def _entries(self, topic, org_id, search_id, api_key, url, page): + videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1) + yield from self._extract_videos(videos, url) + + def _real_extract(self, url): + hostname, topic_id = self._match_valid_url(url).group('host', 'id') + topic = urllib.parse.unquote(topic_id) + topic_id = topic.replace(' ', '_') + org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic) + search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode() + total_pages = int_or_none(self._get_topic_page( + topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages']) + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, topic, org_id, search_id, api_key, url), + total_pages, self._PAGE_SIZE), topic_id, topic) -- cgit v1.2.3 From 53006b35ea8b26ff31a96a423ddaa3304d0a124e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 15:04:09 +0000 Subject: [extractor/amazon] Add `AmazonReviews` extractor (#5857) Closes #5766 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 5 +- yt_dlp/extractor/amazon.py | 116 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 113 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e51228aff..4fed24c35 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -87,7 +87,10 @@ from .alura import ( AluraCourseIE ) from .amcnetworks import AMCNetworksIE -from .amazon import AmazonStoreIE +from .amazon import ( + AmazonStoreIE, + AmazonReviewsIE, +) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 4d3170683..a03f983e0 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,5 +1,17 @@ +import re + from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none +from ..utils import ( + ExtractorError, + clean_html, + float_or_none, + get_element_by_attribute, + get_element_by_class, + int_or_none, + js_to_json, + traverse_obj, + url_or_none, +) class AmazonStoreIE(InfoExtractor): @@ -9,7 +21,7 @@ class AmazonStoreIE(InfoExtractor): 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', 'info_dict': { 'id': 'B098XNCHLD', - 'title': 'md5:dae240564cbb2642170c02f7f0d7e472', + 'title': str, }, 'playlist_mincount': 1, 'playlist': [{ @@ -20,28 +32,32 @@ class AmazonStoreIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 34, }, - }] + }], + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.in/Sony-WH-1000XM4-Cancelling-Headphones-Bluetooth/dp/B0863TXGM3', 'info_dict': { 'id': 'B0863TXGM3', - 'title': 'md5:d1d3352428f8f015706c84b31e132169', + 'title': str, }, 'playlist_mincount': 4, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.com/dp/B0845NXCXF/', 'info_dict': { 'id': 'B0845NXCXF', - 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', + 'title': str, }, 'playlist-mincount': 1, + 'expected_warnings': ['Unable to extract data'], }, { 'url': 'https://www.amazon.es/Samsung-Smartphone-s-AMOLED-Quad-c%C3%A1mara-espa%C3%B1ola/dp/B08WX337PQ', 'info_dict': { 'id': 'B08WX337PQ', - 'title': 'md5:f3fa12779bf62ddb6a6ec86a360a858e', + 'title': str, }, 'playlist_mincount': 1, + 'expected_warnings': ['Unable to extract data'], }] def _real_extract(self, url): @@ -52,7 +68,7 @@ class AmazonStoreIE(InfoExtractor): try: data_json = self._search_json( r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, - transform_source=lambda x: x.replace(R'\\u', R'\u')) + transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -66,3 +82,89 @@ class AmazonStoreIE(InfoExtractor): 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + + +class AmazonReviewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/gp/customer-reviews/(?P[^/&#$?]+)' + _TESTS = [{ + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.com/gp/customer-reviews/R10VE9VUSY19L3/ref=cm_cr_arp_d_rvw_ttl?language=es_US', + 'info_dict': { + 'id': 'R10VE9VUSY19L3', + 'ext': 'mp4', + 'title': 'Get squad #Suspicious', + 'description': 'md5:7012695052f440a1e064e402d87e0afb', + 'uploader': 'Kimberly Cronkright', + 'average_rating': 1.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }, { + 'url': 'https://www.amazon.in/gp/customer-reviews/RV1CO8JN5VGXV/', + 'info_dict': { + 'id': 'RV1CO8JN5VGXV', + 'ext': 'mp4', + 'title': 'Not sure about its durability', + 'description': 'md5:1a252c106357f0a3109ebf37d2e87494', + 'uploader': 'Shoaib Gulzar', + 'average_rating': 2.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['Review body was not found in webpage'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + for retry in self.RetryManager(): + webpage = self._download_webpage(url, video_id) + review_body = get_element_by_attribute('data-hook', 'review-body', webpage) + if not review_body: + retry.error = ExtractorError('Review body was not found in webpage', expected=True) + + formats, subtitles = [], {} + + manifest_url = self._search_regex( + r'data-video-url="([^"]+)"', review_body, 'm3u8 url', default=None) + if url_or_none(manifest_url): + fmts, subtitles = self._extract_m3u8_formats_and_subtitles( + manifest_url, video_id, 'mp4', fatal=False) + formats.extend(fmts) + + video_url = self._search_regex( + r']+\bvalue="([^"]+)"[^>]+\bclass="video-url"', review_body, 'mp4 url', default=None) + if url_or_none(video_url): + formats.append({ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'http-mp4', + }) + + if not formats: + self.raise_no_formats('No video found for this customer review', expected=True) + + return { + 'id': video_id, + 'title': (clean_html(get_element_by_attribute('data-hook', 'review-title', webpage)) + or self._html_extract_title(webpage)), + 'description': clean_html(traverse_obj(re.findall( + r'(.+?)', review_body), -1)), + 'uploader': clean_html(get_element_by_class('a-profile-name', webpage)), + 'average_rating': float_or_none(clean_html(get_element_by_attribute( + 'data-hook', 'review-star-rating', webpage) or '').partition(' ')[0]), + 'thumbnail': self._search_regex( + r'data-thumbnail-url="([^"]+)"', review_body, 'thumbnail', default=None), + 'formats': formats, + 'subtitles': subtitles, + } -- cgit v1.2.3 From 2647c933b8ed22f95dd8e9866c4db031867a1bc8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 16:32:54 +0000 Subject: [extractor/wistia] Improve extension detection (#5415) Closes #5053 Authored by: bashonly, Grub4k, pukkandan --- yt_dlp/extractor/wistia.py | 41 ++++++++++------ yt_dlp/utils.py | 120 ++++++++++++++++++++++++++++----------------- 2 files changed, 103 insertions(+), 58 deletions(-) diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index 38dcc2f5b..884fa4b5f 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -6,12 +6,15 @@ from base64 import b64decode from .common import InfoExtractor from ..utils import ( ExtractorError, + HEADRequest, + determine_ext, float_or_none, int_or_none, parse_qs, traverse_obj, try_get, update_url_query, + urlhandle_detect_ext, ) @@ -34,6 +37,16 @@ class WistiaBaseIE(InfoExtractor): return embed_config + def _get_real_ext(self, url): + ext = determine_ext(url, default_ext='bin') + if ext == 'bin': + urlh = self._request_webpage( + HEADRequest(url), None, note='Checking media extension', + errnote='HEAD request returned error', fatal=False) + if urlh: + ext = urlhandle_detect_ext(urlh, default='bin') + return 'mp4' if ext == 'mov' else ext + def _extract_media(self, embed_config): data = embed_config['media'] video_id = data['hashedId'] @@ -51,13 +64,13 @@ class WistiaBaseIE(InfoExtractor): continue elif atype in ('still', 'still_image'): thumbnails.append({ - 'url': aurl, + 'url': aurl.replace('.bin', f'.{self._get_real_ext(aurl)}'), 'width': int_or_none(a.get('width')), 'height': int_or_none(a.get('height')), 'filesize': int_or_none(a.get('size')), }) else: - aext = a.get('ext') + aext = a.get('ext') or self._get_real_ext(aurl) display_name = a.get('display_name') format_id = atype if atype and atype.endswith('_video') and display_name: @@ -169,26 +182,26 @@ class WistiaIE(WistiaBaseIE): 'md5': '10c1ce9c4dde638202513ed17a3767bd', 'info_dict': { 'id': 'a6ndpko1wg', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'Episode 2: Boxed Water\'s retention is thirsty', 'upload_date': '20210324', 'description': 'md5:da5994c2c2d254833b412469d9666b7a', 'duration': 966.0, 'timestamp': 1616614369, - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/53dc60239348dc9b9fba3755173ea4c2.png', } }, { 'url': 'wistia:5vd7p4bct5', 'md5': 'b9676d24bf30945d97060638fbfe77f0', 'info_dict': { 'id': '5vd7p4bct5', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'md5:eaa9f64c4efd7b5f098b9b6118597679', 'description': 'md5:a9bea0315f0616aa5df2dc413ddcdd0f', 'upload_date': '20220915', 'timestamp': 1663258727, 'duration': 623.019, - 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.(?:jpg|bin)$', + 'thumbnail': r're:https?://embed(?:-ssl)?.wistia.com/.+\.jpg$', }, }, { 'url': 'wistia:sh7fpupwlt', @@ -208,25 +221,25 @@ class WistiaIE(WistiaBaseIE): 'url': 'https://www.weidert.com/blog/wistia-channels-video-marketing-tool', 'info_dict': { 'id': 'cqwukac3z1', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'How Wistia Channels Can Help Capture Inbound Value From Your Video Content', 'duration': 158.125, 'timestamp': 1618974400, 'description': 'md5:27abc99a758573560be72600ef95cece', 'upload_date': '20210421', - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/6c551820ae950cdee2306d6cbe9ef742.jpg', } }, { 'url': 'https://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', 'md5': 'b9676d24bf30945d97060638fbfe77f0', 'info_dict': { 'id': '5vd7p4bct5', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', 'upload_date': '20220915', 'timestamp': 1663258727, 'duration': 623.019, - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/83e6ec693e2c05a0ce65809cbaead86a.jpg', 'description': 'a Paywall Videos video', }, }] @@ -302,9 +315,9 @@ class WistiaChannelIE(WistiaBaseIE): 'url': 'https://fast.wistia.net/embed/channel/3802iirk0l?wchannelid=3802iirk0l&wmediaid=sp5dqjzw3n', 'info_dict': { 'id': 'sp5dqjzw3n', - 'ext': 'bin', + 'ext': 'mp4', 'title': 'The Roof S2: The Modern CRO', - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/dadfa9233eaa505d5e0c85c23ff70741.png', 'duration': 86.487, 'description': 'A sales leader on The Roof? Man, they really must be letting anyone up here this season.\n', 'timestamp': 1619790290, @@ -334,12 +347,12 @@ class WistiaChannelIE(WistiaBaseIE): 'info_dict': { 'id': 'pz0m0l0if3', 'title': 'A Framework for Improving Product Team Performance', - 'ext': 'bin', + 'ext': 'mp4', 'timestamp': 1653935275, 'upload_date': '20220530', 'description': 'Learn how to help your company improve and achieve your product related goals.', 'duration': 1854.39, - 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.bin', + 'thumbnail': 'https://embed-ssl.wistia.com/deliveries/12fd19e56413d9d6f04e2185c16a6f8854e25226.png', }, 'params': {'noplaylist': True, 'skip_download': True}, }] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 65408bf19..3947dcf2e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3480,67 +3480,93 @@ def error_to_str(err): return f'{type(err).__name__}: {err}' -def mimetype2ext(mt): - if mt is None: +def mimetype2ext(mt, default=NO_DEFAULT): + if not isinstance(mt, str): + if default is not NO_DEFAULT: + return default return None - mt, _, params = mt.partition(';') - mt = mt.strip() - - FULL_MAP = { - 'audio/mp4': 'm4a', - # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as - # it's the most popular one - 'audio/mpeg': 'mp3', - 'audio/x-wav': 'wav', - 'audio/wav': 'wav', - 'audio/wave': 'wav', - } - - ext = FULL_MAP.get(mt) - if ext is not None: - return ext - - SUBTYPE_MAP = { + MAP = { + # video '3gpp': '3gp', - 'smptett+xml': 'tt', - 'ttaf+xml': 'dfxp', - 'ttml+xml': 'ttml', + 'mp2t': 'ts', + 'mp4': 'mp4', + 'mpeg': 'mpeg', + 'mpegurl': 'm3u8', + 'quicktime': 'mov', + 'webm': 'webm', + 'vp9': 'vp9', 'x-flv': 'flv', + 'x-m4v': 'm4v', + 'x-matroska': 'mkv', + 'x-mng': 'mng', 'x-mp4-fragmented': 'mp4', - 'x-ms-sami': 'sami', + 'x-ms-asf': 'asf', 'x-ms-wmv': 'wmv', - 'mpegurl': 'm3u8', - 'x-mpegurl': 'm3u8', - 'vnd.apple.mpegurl': 'm3u8', + 'x-msvideo': 'avi', + + # application (streaming playlists) 'dash+xml': 'mpd', 'f4m+xml': 'f4m', 'hds+xml': 'f4m', + 'vnd.apple.mpegurl': 'm3u8', 'vnd.ms-sstr+xml': 'ism', - 'quicktime': 'mov', - 'mp2t': 'ts', + 'x-mpegurl': 'm3u8', + + # audio + 'audio/mp4': 'm4a', + # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. + # Using .mp3 as it's the most popular one + 'audio/mpeg': 'mp3', + 'audio/webm': 'weba', + 'audio/x-matroska': 'mka', + 'audio/x-mpegurl': 'm3u', + 'midi': 'mid', + 'ogg': 'ogg', + 'wav': 'wav', + 'wave': 'wav', + 'x-aac': 'aac', + 'x-flac': 'flac', + 'x-m4a': 'm4a', + 'x-realaudio': 'ra', 'x-wav': 'wav', - 'filmstrip+json': 'fs', - 'svg+xml': 'svg', - } - _, _, subtype = mt.rpartition('/') - ext = SUBTYPE_MAP.get(subtype.lower()) - if ext is not None: - return ext + # image + 'avif': 'avif', + 'bmp': 'bmp', + 'gif': 'gif', + 'jpeg': 'jpg', + 'png': 'png', + 'svg+xml': 'svg', + 'tiff': 'tif', + 'vnd.wap.wbmp': 'wbmp', + 'webp': 'webp', + 'x-icon': 'ico', + 'x-jng': 'jng', + 'x-ms-bmp': 'bmp', + + # caption + 'filmstrip+json': 'fs', + 'smptett+xml': 'tt', + 'ttaf+xml': 'dfxp', + 'ttml+xml': 'ttml', + 'x-ms-sami': 'sami', - SUFFIX_MAP = { + # misc + 'gzip': 'gz', 'json': 'json', 'xml': 'xml', 'zip': 'zip', - 'gzip': 'gz', } - _, _, suffix = subtype.partition('+') - ext = SUFFIX_MAP.get(suffix) - if ext is not None: - return ext + mimetype = mt.partition(';')[0].strip().lower() + _, _, subtype = mimetype.rpartition('/') + ext = traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1]) + if ext: + return ext + elif default is not NO_DEFAULT: + return default return subtype.replace('+', '.') @@ -3634,7 +3660,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): return 'mkv' if allow_mkv else preferences[-1] -def urlhandle_detect_ext(url_handle): +def urlhandle_detect_ext(url_handle, default=NO_DEFAULT): getheader = url_handle.headers.get cd = getheader('Content-Disposition') @@ -3645,7 +3671,13 @@ def urlhandle_detect_ext(url_handle): if e: return e - return mimetype2ext(getheader('Content-Type')) + meta_ext = getheader('x-amz-meta-name') + if meta_ext: + e = meta_ext.rpartition('.')[2] + if e: + return e + + return mimetype2ext(getheader('Content-Type'), default=default) def encode_data_uri(data, mime_type): -- cgit v1.2.3 From c1edb853b0a0cc69ea08337c0c5aee669b26d3d2 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 29 Dec 2022 17:31:01 +0000 Subject: [extractor/kick] Add extractor (#5736) Closes #5722 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 ++ yt_dlp/extractor/kick.py | 127 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 yt_dlp/extractor/kick.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4fed24c35..a2b92b85a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -844,6 +844,10 @@ from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, ) +from .kick import ( + KickIE, + KickVODIE, +) from .kicker import KickerIE from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py new file mode 100644 index 000000000..a79ffb7a9 --- /dev/null +++ b/yt_dlp/extractor/kick.py @@ -0,0 +1,127 @@ +from .common import InfoExtractor + +from ..utils import ( + HEADRequest, + UserNotLive, + float_or_none, + merge_dicts, + str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, +) + + +class KickBaseIE(InfoExtractor): + def _real_initialize(self): + self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session') + xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN') + if not xsrf_token: + self.write_debug('kick.com did not set XSRF-TOKEN cookie') + KickBaseIE._API_HEADERS = { + 'Authorization': f'Bearer {xsrf_token.value}', + 'X-XSRF-TOKEN': xsrf_token.value, + } if xsrf_token else {} + + def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs): + return self._download_json( + f'https://kick.com/api/v1/{path}', display_id, note=note, + headers=merge_dicts(headers, self._API_HEADERS), **kwargs) + + +class KickIE(KickBaseIE): + _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P[\w_]+)' + _TESTS = [{ + 'url': 'https://kick.com/yuppy', + 'info_dict': { + 'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21', + 'ext': 'mp4', + 'title': str, + 'description': str, + 'channel': 'yuppy', + 'channel_id': '33538', + 'uploader': 'Yuppy', + 'uploader_id': '33793', + 'upload_date': str, + 'live_status': 'is_live', + 'timestamp': int, + 'thumbnail': r're:^https?://.*\.jpg', + 'categories': list, + }, + 'skip': 'livestream', + }, { + 'url': 'https://kick.com/kmack710', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel = self._match_id(url) + response = self._call_api(f'channels/{channel}', channel) + if not traverse_obj(response, 'livestream', expected_type=dict): + raise UserNotLive(video_id=channel) + + return { + 'id': str(traverse_obj( + response, ('livestream', ('slug', 'id')), get_all=False, default=channel)), + 'formats': self._extract_m3u8_formats( + response['playback_url'], channel, 'mp4', live=True), + 'title': traverse_obj( + response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), + 'description': traverse_obj(response, ('user', 'bio')), + 'channel': channel, + 'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))), + 'uploader': traverse_obj(response, 'name', ('user', 'username')), + 'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))), + 'is_live': True, + 'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))), + 'thumbnail': traverse_obj( + response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none), + 'categories': traverse_obj(response, ('recent_categories', ..., 'name')), + } + + +class KickVODIE(KickBaseIE): + _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _TESTS = [{ + 'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35', + 'md5': '73691206a6a49db25c5aa1588e6538fc', + 'info_dict': { + 'id': '54244b5e-050a-4df4-a013-b2433dafbe35', + 'ext': 'mp4', + 'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links', + 'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f', + 'channel': 'kmack710', + 'channel_id': '16278', + 'uploader': 'Kmack710', + 'uploader_id': '16412', + 'upload_date': '20221206', + 'timestamp': 1670318289, + 'duration': 40104.0, + 'thumbnail': r're:^https?://.*\.jpg', + 'categories': ['Grand Theft Auto V'], + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + response = self._call_api(f'video/{video_id}', video_id) + + return { + 'id': video_id, + 'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'), + 'title': traverse_obj( + response, ('livestream', ('session_title', 'slug')), get_all=False, default=''), + 'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')), + 'channel': traverse_obj(response, ('livestream', 'channel', 'slug')), + 'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))), + 'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')), + 'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))), + 'timestamp': unified_timestamp(response.get('created_at')), + 'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000), + 'thumbnail': traverse_obj( + response, ('livestream', 'thumbnail'), expected_type=url_or_none), + 'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')), + } -- cgit v1.2.3 From ca2f6e14e65f0faf92cabff8b7e5b4760363c52e Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Fri, 30 Dec 2022 03:01:22 +0900 Subject: [extractor/BiliLive] Fix extractor - Remove unnecessary group in `_VALID_URL` - This extractor always returns livestreams --- yt_dlp/extractor/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 616a54960..37711c138 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1034,7 +1034,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): class BiliLiveIE(InfoExtractor): - _VALID_URL = r'https?://live.bilibili.com/(blanc/)?(?P\d+)' + _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P\d+)' _TESTS = [{ 'url': 'https://live.bilibili.com/196', @@ -1114,6 +1114,7 @@ class BiliLiveIE(InfoExtractor): 'thumbnail': room_data.get('user_cover'), 'timestamp': stream_data.get('live_time'), 'formats': formats, + 'is_live': True, 'http_headers': { 'Referer': url, }, -- cgit v1.2.3 From e107c2b8cf8d6f3506d07bc64fc243682ee49b1e Mon Sep 17 00:00:00 2001 From: nosoop Date: Thu, 29 Dec 2022 10:46:43 -0800 Subject: [extractor/soundcloud] Support user permalink (#5842) Closes #5841 Authored by: nosoop --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/soundcloud.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index a2b92b85a..352de83ca 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1710,6 +1710,7 @@ from .soundcloud import ( SoundcloudSetIE, SoundcloudRelatedIE, SoundcloudUserIE, + SoundcloudUserPermalinkIE, SoundcloudTrackStationIE, SoundcloudPlaylistIE, SoundcloudSearchIE, diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 4879d48c8..979f23f44 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -782,6 +782,27 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): '%s (%s)' % (user['username'], resource.capitalize())) +class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://api\.soundcloud\.com/users/(?P\d+)' + IE_NAME = 'soundcloud:user:permalink' + _TESTS = [{ + 'url': 'https://api.soundcloud.com/users/30909869', + 'info_dict': { + 'id': '30909869', + 'title': 'neilcic', + }, + 'playlist_mincount': 23, + }] + + def _real_extract(self, url): + user_id = self._match_id(url) + user = self._download_json( + self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) + + return self._extract_playlist( + f'{self._API_V2_BASE}stream/users/{user["id"]}', str(user['id']), user.get('username')) + + class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P[^/?#&]+)' IE_NAME = 'soundcloud:trackstation' -- cgit v1.2.3 From efa944f4bc892321a0d01dcddb210405761ecada Mon Sep 17 00:00:00 2001 From: Anant Murmu Date: Fri, 30 Dec 2022 08:13:49 +0530 Subject: [cleanup] Use `random.choices` (#5800) Authored by: freezboltz --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/adn.py | 2 +- yt_dlp/extractor/discovery.py | 2 +- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/linuxacademy.py | 5 ++--- yt_dlp/extractor/tencent.py | 4 ++-- yt_dlp/extractor/tiktok.py | 10 +++++----- yt_dlp/extractor/videa.py | 2 +- yt_dlp/extractor/viu.py | 2 +- yt_dlp/extractor/vrv.py | 2 +- yt_dlp/extractor/youku.py | 4 ++-- 11 files changed, 18 insertions(+), 19 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index abb0ddfe5..17f37a643 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1068,7 +1068,7 @@ class YoutubeDL: # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. - sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) + sep = ''.join(random.choices(ascii_letters, k=32)) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index e0c18c877..f1f55e87f 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -168,7 +168,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' }, data=b'')['token'] links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') - self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) + self._K = ''.join(random.choices('0123456789abcdef', k=16)) message = bytes_to_intlist(json.dumps({ 'k': self._K, 't': token, diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index fd3fc8fb0..e6e109d5c 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -78,7 +78,7 @@ class DiscoveryIE(DiscoveryGoBaseIE): 'Downloading token JSON metadata', query={ 'authRel': 'authorization', 'client_id': '3020a40c2356a645b4b4', - 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]), + 'nonce': ''.join(random.choices(string.ascii_letters, k=32)), 'redirectUri': 'https://www.discovery.com/', })['access_token'] diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 18363c1b9..47c316664 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -210,7 +210,7 @@ class FunimationIE(FunimationBaseIE): page = self._download_json( 'https://www.funimation.com/api/showexperience/%s/' % experience_id, display_id, headers=headers, expected_status=403, query={ - 'pinst_id': ''.join([random.choice(string.digits + string.ascii_letters) for _ in range(8)]), + 'pinst_id': ''.join(random.choices(string.digits + string.ascii_letters, k=8)), }, note=f'Downloading {format_name} JSON') sources = page.get('items') or [] if not sources: diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index a570248b7..7bb64e17c 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -75,9 +75,8 @@ class LinuxAcademyIE(InfoExtractor): def _perform_login(self, username, password): def random_string(): - return ''.join([ - random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') - for _ in range(32)]) + return ''.join(random.choices( + '0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~', k=32)) webpage, urlh = self._download_webpage_handle( self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index ff8bf991e..44cae0472 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -32,7 +32,7 @@ class TencentBaseIE(InfoExtractor): padding_mode='whitespace').hex().upper() def _get_video_api_response(self, video_url, video_id, series_id, subtitle_format, video_format, video_quality): - guid = ''.join([random.choice(string.digits + string.ascii_lowercase) for _ in range(16)]) + guid = ''.join(random.choices(string.digits + string.ascii_lowercase, k=16)) ckey = self._get_ckey(video_id, video_url, guid) query = { 'vid': video_id, @@ -55,7 +55,7 @@ class TencentBaseIE(InfoExtractor): 'platform': self._PLATFORM, # For VQQ 'guid': guid, - 'flowid': ''.join(random.choice(string.digits + string.ascii_lowercase) for _ in range(32)), + 'flowid': ''.join(random.choices(string.digits + string.ascii_lowercase, k=32)), } return self._search_json(r'QZOutputJson=', self._download_webpage( diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 2dd4510cc..709d944dc 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -49,7 +49,7 @@ class TikTokBaseIE(InfoExtractor): def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): - self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160))) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) if webpage_cookies.get('sid_tt'): self._set_cookie(self._API_HOSTNAME, 'sid_tt', webpage_cookies['sid_tt'].value) @@ -68,8 +68,8 @@ class TikTokBaseIE(InfoExtractor): 'build_number': app_version, 'manifest_version_code': manifest_app_version, 'update_version_code': manifest_app_version, - 'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)), - 'uuid': ''.join([random.choice(string.digits) for _ in range(16)]), + 'openudid': ''.join(random.choices('0123456789abcdef', k=16)), + 'uuid': ''.join(random.choices(string.digits, k=16)), '_rticket': int(time.time() * 1000), 'ts': int(time.time()), 'device_brand': 'Google', @@ -638,7 +638,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } for page in itertools.count(1): @@ -686,7 +686,7 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul 'cursor': 0, 'count': 20, 'type': 5, - 'device_id': ''.join(random.choice(string.digits) for i in range(19)) + 'device_id': ''.join(random.choices(string.digits, k=19)) } for page in itertools.count(1): diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 52fa8fcec..59ae933b0 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -119,7 +119,7 @@ class VideaIE(InfoExtractor): result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] query = parse_qs(player_url) - random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) + random_seed = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) query['_s'] = random_seed query['_t'] = result[:16] diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 19d48234e..dd4cad7ba 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -251,7 +251,7 @@ class ViuOTTIE(InfoExtractor): return self._user_token def _get_token(self, country_code, video_id): - rand = ''.join(random.choice('0123456789') for _ in range(10)) + rand = ''.join(random.choices('0123456789', k=10)) return self._download_json( f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id, headers={'Content-Type': 'application/json'}, note='Getting bearer token', diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 89fa7affc..ad9dc568a 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -30,7 +30,7 @@ class VRVBaseIE(InfoExtractor): base_url = self._API_DOMAIN + '/core/' + path query = [ ('oauth_consumer_key', self._API_PARAMS['oAuthKey']), - ('oauth_nonce', ''.join([random.choice(string.ascii_letters) for _ in range(32)])), + ('oauth_nonce', ''.join(random.choices(string.ascii_letters, k=32))), ('oauth_signature_method', 'HMAC-SHA1'), ('oauth_timestamp', int(time.time())), ] diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index ab59200d7..404f196f4 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -129,8 +129,8 @@ class YoukuIE(InfoExtractor): @staticmethod def get_ysuid(): - return '%d%s' % (int(time.time()), ''.join([ - random.choice(string.ascii_letters) for i in range(3)])) + return '%d%s' % (int(time.time()), ''.join( + random.choices(string.ascii_letters, k=3))) def get_format_name(self, fm): _dict = { -- cgit v1.2.3 From 4455918e7f090ace0b0c2537bbfd364956eb66cb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 10:12:13 +0530 Subject: [extractor/stv] Detect DRM Closes #5320 --- yt_dlp/extractor/stv.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index c879fb52e..8b3e63538 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -73,6 +73,8 @@ class STVPlayerIE(InfoExtractor): }) programme = result.get('programme') or {} + if programme.get('drmEnabled'): + self.report_drm(video_id) return { '_type': 'url_transparent', -- cgit v1.2.3 From 119e40ef64b25f66a39246e87ce6c143cd34276d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 11:15:41 +0530 Subject: Add pre-processor stage `video` Related: #456, #5808 --- README.md | 44 +++++++++++++++++++++++--------------------- yt_dlp/YoutubeDL.py | 17 ++++++++++------- yt_dlp/options.py | 53 +++++++++++++++++++++++++---------------------------- yt_dlp/utils.py | 2 +- 4 files changed, 59 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index 440ed1934..d31fedb00 100644 --- a/README.md +++ b/README.md @@ -725,7 +725,7 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of - --use-postprocessor, and "video" (default). + --use-postprocessor (default: video). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times @@ -979,18 +979,18 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with - when to execute it (after_move if - unspecified), separated by a ":". Supported - values of "WHEN" are the same as that of - --use-postprocessor. Same syntax as the - output template can be used to pass any - field as arguments to the command. After - download, an additional field "filepath" - that contains the final path of the - downloaded file is also available, and if no - fields are passed, %(filepath)q is appended - to the end of the command. This option can - be used multiple times + when to execute it, separated by a ":". + Supported values of "WHEN" are the same as + that of --use-postprocessor (default: + after_move). Same syntax as the output + template can be used to pass any field as + arguments to the command. After download, an + additional field "filepath" that contains + the final path of the downloaded file is + also available, and if no fields are passed, + %(filepath)q is appended to the end of the + command. This option can be used multiple + times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt) @@ -1028,14 +1028,16 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), - "before_dl" (before each video download), - "post_process" (after each video download; - default), "after_move" (after moving video - file to it's final locations), "after_video" - (after downloading and processing all - formats of a video), or "playlist" (at end - of playlist). This option can be used - multiple times to add different postprocessors + "video" (after --format; before + --print/--output), "before_dl" (before each + video download), "post_process" (after each + video download; default), "after_move" + (after moving video file to it's final + locations), "after_video" (after downloading + and processing all formats of a video), or + "playlist" (at end of playlist). This option + can be used multiple times to add different + postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 17f37a643..505732327 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2977,6 +2977,16 @@ class YoutubeDL: # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) + + def replace_info_dict(new_info): + nonlocal info_dict + if new_info == info_dict: + return + info_dict.clear() + info_dict.update(new_info) + + new_info, _ = self.pre_process(info_dict, 'video') + replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility @@ -3090,13 +3100,6 @@ class YoutubeDL: for link_type, should_write in write_links.items()): return - def replace_info_dict(new_info): - nonlocal info_dict - if new_info == info_dict: - return - info_dict.clear() - info_dict.update(new_info) - new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index bc574b885..096a50249 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -277,6 +277,20 @@ def create_parser(): out_dict[key] = out_dict.get(key, []) + [val] if append else val setattr(parser.values, option.dest, out_dict) + def when_prefix(default): + return { + 'default': {}, + 'type': 'str', + 'action': 'callback', + 'callback': _dict_from_options_callback, + 'callback_kwargs': { + 'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)), + 'default_key': default, + 'multiple_keys': False, + 'append': True, + }, + } + parser = _YoutubeDLOptionParser() alias_group = optparse.OptionGroup(parser, 'Aliases') Formatter = string.Formatter() @@ -1086,28 +1100,16 @@ def create_parser(): help='Do not download the video but write all related files (Alias: --no-download)') verbosity.add_option( '-O', '--print', - metavar='[WHEN:]TEMPLATE', dest='forceprint', default={}, type='str', - action='callback', callback=_dict_from_options_callback, - callback_kwargs={ - 'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)), - 'default_key': 'video', - 'multiple_keys': False, - 'append': True, - }, help=( + metavar='[WHEN:]TEMPLATE', dest='forceprint', **when_prefix('video'), + help=( 'Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". ' - 'Supported values of "WHEN" are the same as that of --use-postprocessor, and "video" (default). ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: video). ' 'Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. ' 'This option can be used multiple times')) verbosity.add_option( '--print-to-file', - metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', default={}, type='str', nargs=2, - action='callback', callback=_dict_from_options_callback, - callback_kwargs={ - 'allowed_keys': 'video|' + '|'.join(map(re.escape, POSTPROCESS_WHEN)), - 'default_key': 'video', - 'multiple_keys': False, - 'append': True, - }, help=( + metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', nargs=2, **when_prefix('video'), + help=( 'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. ' 'FILE uses the same syntax as the output template. This option can be used multiple times')) verbosity.add_option( @@ -1629,16 +1631,10 @@ def create_parser(): help='Location of the ffmpeg binary; either the path to the binary or its containing directory') postproc.add_option( '--exec', - metavar='[WHEN:]CMD', dest='exec_cmd', default={}, type='str', - action='callback', callback=_dict_from_options_callback, - callback_kwargs={ - 'allowed_keys': '|'.join(map(re.escape, POSTPROCESS_WHEN)), - 'default_key': 'after_move', - 'multiple_keys': False, - 'append': True, - }, help=( - 'Execute a command, optionally prefixed with when to execute it (after_move if unspecified), separated by a ":". ' - 'Supported values of "WHEN" are the same as that of --use-postprocessor. ' + metavar='[WHEN:]CMD', dest='exec_cmd', **when_prefix('after_move'), + help=( + 'Execute a command, optionally prefixed with when to execute it, separated by a ":". ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). ' 'Same syntax as the output template can be used to pass any field as arguments to the command. ' 'After download, an additional field "filepath" that contains the final path of the downloaded file ' 'is also available, and if no fields are passed, %(filepath)q is appended to the end of the command. ' @@ -1714,7 +1710,8 @@ def create_parser(): 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'The "when" argument determines when the postprocessor is invoked. ' 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' - '"before_dl" (before each video download), "post_process" (after each video download; default), ' + '"video" (after --format; before --print/--output), "before_dl" (before each video download), ' + '"post_process" (after each video download; default), ' '"after_move" (after moving video file to it\'s final locations), ' '"after_video" (after downloading and processing all formats of a video), ' 'or "playlist" (at end of playlist). ' diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3947dcf2e..43b5fda1d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3395,7 +3395,7 @@ def qualities(quality_ids): return q -POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist') +POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist') DEFAULT_OUTTMPL = { -- cgit v1.2.3 From fe74d5b592438c669f5717b34504f27c34ca9904 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 11:01:14 +0530 Subject: Let `--parse/replace-in-metadata` run at any post-processing stage Closes #5808, #456 --- README.md | 13 +++++++++---- yt_dlp/__init__.py | 14 ++++++++------ yt_dlp/options.py | 12 +++++++----- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d31fedb00..500f92387 100644 --- a/README.md +++ b/README.md @@ -952,13 +952,18 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file - --parse-metadata FROM:TO Parse additional metadata like title/artist + --parse-metadata [WHEN:]FROM:TO + Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" - for details - --replace-in-metadata FIELDS REGEX REPLACE + for details. Supported values of "WHEN" are + the same as that of --use-postprocessor + (default: pre_process) + --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used - multiple times + multiple times. Supported values of "WHEN" + are the same as that of --use-postprocessor + (default: pre_process) --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 202f102ba..3490816c4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -386,10 +386,12 @@ def validate_options(opts): raise ValueError(f'{cmd} is invalid; {err}') yield action - parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } # Other options if opts.playlist_items is not None: @@ -561,11 +563,11 @@ def validate_options(opts): def get_postprocessors(opts): yield from opts.add_postprocessors - if opts.parse_metadata: + for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - 'when': 'pre_process' + 'actions': actions, + 'when': when } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 096a50249..ed83cb763 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1586,14 +1586,16 @@ def create_parser(): help=optparse.SUPPRESS_HELP) postproc.add_option( '--parse-metadata', - metavar='FROM:TO', dest='parse_metadata', action='append', + metavar='[WHEN:]FROM:TO', dest='parse_metadata', **when_prefix('pre_process'), help=( - 'Parse additional metadata like title/artist from other fields; ' - 'see "MODIFYING METADATA" for details')) + 'Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details. ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)')) postproc.add_option( '--replace-in-metadata', - dest='parse_metadata', metavar='FIELDS REGEX REPLACE', action='append', nargs=3, - help='Replace text in a metadata field using the given regex. This option can be used multiple times') + dest='parse_metadata', metavar='[WHEN:]FIELDS REGEX REPLACE', nargs=3, **when_prefix('pre_process'), + help=( + 'Replace text in a metadata field using the given regex. This option can be used multiple times. ' + 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process)')) postproc.add_option( '--xattrs', '--xattr', action='store_true', dest='xattrs', default=False, -- cgit v1.2.3 From d5f043d127cac1e8ec8a6eacde04ad1133600a16 Mon Sep 17 00:00:00 2001 From: ChillingPepper <90042155+ChillingPepper@users.noreply.github.com> Date: Fri, 30 Dec 2022 07:38:38 +0100 Subject: [utils] js_to_json: Fix bug in f55523c (#5771) Authored by: ChillingPepper, pukkandan --- test/test_utils.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 8 +++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 49ab3796b..82ae77ea2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -954,6 +954,85 @@ class TestUtil(unittest.TestCase): ) self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') + def test_js_to_json_vars_strings(self): + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'null': a, + 'nullStr': b, + 'true': c, + 'trueStr': d, + 'false': e, + 'falseStr': f, + 'unresolvedVar': g, + }''', + { + 'a': 'null', + 'b': '"null"', + 'c': 'true', + 'd': '"true"', + 'e': 'false', + 'f': '"false"', + 'g': 'var', + } + )), + { + 'null': None, + 'nullStr': 'null', + 'true': True, + 'trueStr': 'true', + 'false': False, + 'falseStr': 'false', + 'unresolvedVar': 'var' + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'int': a, + 'intStr': b, + 'float': c, + 'floatStr': d, + }''', + { + 'a': '123', + 'b': '"123"', + 'c': '1.23', + 'd': '"1.23"', + } + )), + { + 'int': 123, + 'intStr': '123', + 'float': 1.23, + 'floatStr': '1.23', + } + ) + + self.assertDictEqual( + json.loads(js_to_json( + '''{ + 'object': a, + 'objectStr': b, + 'array': c, + 'arrayStr': d, + }''', + { + 'a': '{}', + 'b': '"{}"', + 'c': '[]', + 'd': '"[]"', + } + )), + { + 'object': {}, + 'objectStr': '{}', + 'array': [], + 'arrayStr': '[]', + } + ) + def test_js_to_json_realworld(self): inp = '''{ 'clip':{'provider':'pseudo'} diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 43b5fda1d..64c83a77a 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3360,7 +3360,13 @@ def js_to_json(code, vars={}, *, strict=False): return f'"{i}":' if v.endswith(':') else str(i) if v in vars: - return json.dumps(vars[v]) + try: + if not strict: + json.loads(vars[v]) + except json.decoder.JSONDecodeError: + return json.dumps(vars[v]) + else: + return vars[v] if not strict: return f'"{v}"' -- cgit v1.2.3 From f74371a97d67237e055612006602934b910b1275 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 11:57:33 +0530 Subject: [extractor/bilibili] Fix `--no-playlist` for anthology Closes #5797 --- yt_dlp/extractor/bilibili.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 37711c138..92620f697 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -303,7 +303,8 @@ class BiliBiliIE(BilibiliBaseIE): getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}') if is_anthology: - title += f' p{part_id:02d} {traverse_obj(page_list_json, ((part_id or 1) - 1, "part")) or ""}' + part_id = part_id or 1 + title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}' aid = video_data.get('aid') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') -- cgit v1.2.3 From ec54bd43f374cee429d67078ac61b75e66afb3fa Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 14:07:11 +0530 Subject: Fix bug in writing playlist info-json Closes #4889 --- yt_dlp/YoutubeDL.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 505732327..db6bfded8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1862,11 +1862,10 @@ class YoutubeDL: self.to_screen('[download] Downloading item %s of %s' % ( self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) - extra.update({ + entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, - }) - entry_result = self.__process_iterable_entry(entry, download, extra) + }, extra)) if not entry_result: failures += 1 if failures >= max_failures: -- cgit v1.2.3 From fbb73833067ba742459729809679a62f34b3e41e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 15:30:56 +0530 Subject: Add `weba` to known extensions --- test/test_utils.py | 2 ++ yt_dlp/utils.py | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 82ae77ea2..3d5a6ea6b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1953,6 +1953,8 @@ Line 1 vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') + self.assertEqual(get_compatible_ext( + vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 64c83a77a..ee5340cd2 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3656,7 +3656,7 @@ def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None): COMPATIBLE_EXTS = ( {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'}, - {'webm'}, + {'webm', 'weba'}, ) for ext in preferences or vexts: current_exts = {ext, *vexts, *aexts} @@ -5962,7 +5962,7 @@ MEDIA_EXTENSIONS = Namespace( common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'), video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'), common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'), - audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma'), + audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'), thumbnails=('jpg', 'png', 'webp'), storyboards=('mhtml', ), subtitles=('srt', 'vtt', 'ass', 'lrc'), @@ -6094,9 +6094,9 @@ class FormatSorter: 'vext': {'type': 'ordered', 'field': 'video_ext', 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'), 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')}, - 'aext': {'type': 'ordered', 'field': 'audio_ext', - 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), - 'order_free': ('ogg', 'opus', 'webm', 'mp3', 'm4a', 'aac', '', 'none')}, + 'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext', + 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'), + 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')}, 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, 'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple', 'field': ('vcodec', 'acodec'), -- cgit v1.2.3 From 9bb856998b0d5a0ad58268f0ba8d784fb9d934e3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 30 Dec 2022 15:32:33 +0530 Subject: [extractor/youtube] Extract DRC formats --- yt_dlp/extractor/youtube.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 9dde34fb0..506bd1e19 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2544,6 +2544,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'tags': [], }, 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'}, + }, { + 'note': 'Audio formats with Dynamic Range Compression', + 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg', + 'info_dict': { + 'id': 'Tq92D6wQ1mg', + 'ext': 'weba', + 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', + 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_follower_count': int, + 'description': 'md5:17eccca93a786d51bc67646756894066', + 'upload_date': '20191228', + 'uploader_url': 'http://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'], + 'playable_in_embed': True, + 'like_count': int, + 'categories': ['Entertainment'], + 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg', + 'age_limit': 18, + 'channel': 'Projekt Melody', + 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', + 'view_count': int, + 'availability': 'needs_auth', + 'comment_count': int, + 'live_status': 'not_live', + 'uploader': 'Projekt Melody', + 'duration': 106, + }, + 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, } ] @@ -3553,7 +3582,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itag = str_or_none(fmt.get('itag')) audio_track = fmt.get('audioTrack') or {} - stream_id = '%s.%s' % (itag or '', audio_track.get('id', '')) + stream_id = (itag, audio_track.get('id'), fmt.get('isDrc')) if stream_id in stream_ids: continue @@ -3634,11 +3663,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), - 'format_id': itag, + 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( '%s%s' % (audio_track.get('displayName') or '', ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), + 'DRC' if fmt.get('isDrc') else None, try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), @@ -3647,7 +3677,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fps': int_or_none(fmt.get('fps')) or None, 'audio_channels': fmt.get('audioChannels'), 'height': height, - 'quality': q(quality), + 'quality': q(quality) - bool(fmt.get('isDrc')) / 2, 'has_drm': bool(fmt.get('drmFamilies')), 'tbr': tbr, 'url': fmt_url, -- cgit v1.2.3 From 8d1ddb0805c7c56bd03a5c0837c55602473d213f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 31 Dec 2022 09:45:12 +0530 Subject: [extractor/udemy] Fix lectures that have no URL and detect DRM Closes #5662 --- yt_dlp/extractor/udemy.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 8b99c59cf..329e5da2d 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -11,8 +11,10 @@ from ..utils import ( int_or_none, js_to_json, sanitized_Request, + smuggle_url, try_get, unescapeHTML, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -106,7 +108,7 @@ class UdemyIE(InfoExtractor): % (course_id, lecture_id), lecture_id, 'Downloading lecture JSON', query={ 'fields[lecture]': 'title,description,view_html,asset', - 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', + 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed', }) def _handle_error(self, response): @@ -199,16 +201,19 @@ class UdemyIE(InfoExtractor): def _real_extract(self, url): lecture_id = self._match_id(url) + course_id = unsmuggle_url(url, {})[1].get('course_id') - webpage = self._download_webpage(url, lecture_id) - - course_id, _ = self._extract_course_info(webpage, lecture_id) + webpage = None + if not course_id: + webpage = self._download_webpage(url, lecture_id) + course_id, _ = self._extract_course_info(webpage, lecture_id) try: lecture = self._download_lecture(course_id, lecture_id) except ExtractorError as e: # Error could possibly mean we are not enrolled in the course if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + webpage = webpage or self._download_webpage(url, lecture_id) self._enroll_course(url, webpage, course_id) lecture = self._download_lecture(course_id, lecture_id) else: @@ -391,6 +396,9 @@ class UdemyIE(InfoExtractor): if f.get('url'): formats.append(f) + if not formats and asset.get('course_is_drmed'): + self.report_drm(video_id) + return { 'id': video_id, 'title': title, @@ -449,7 +457,9 @@ class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE if lecture_id: entry = { '_type': 'url_transparent', - 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']), + 'url': smuggle_url( + f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}', + {'course_id': course_id}), 'title': entry.get('title'), 'ie_key': UdemyIE.ie_key(), } -- cgit v1.2.3 From a0e526ed4d042c88771cd5669ceb4413d2b8c47f Mon Sep 17 00:00:00 2001 From: Stel Abrego Date: Fri, 30 Dec 2022 20:58:33 -0800 Subject: [extractor/bandcamp] Add `album_artist` (#5537) Closes #5536 Authored by: stelcodes --- yt_dlp/extractor/bandcamp.py | 48 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index de81e0de7..e89b3a69b 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -29,11 +29,18 @@ class BandcampIE(InfoExtractor): 'info_dict': { 'id': '1812978515', 'ext': 'mp3', - 'title': "youtube-dl \"'/\\ä↭ - youtube-dl \"'/\\ä↭ - youtube-dl test song \"'/\\ä↭", + 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', 'duration': 9.8485, - 'uploader': 'youtube-dl "\'/\\ä↭', + 'uploader': 'youtube-dl "\'/\\ä↭', 'upload_date': '20121129', 'timestamp': 1354224127, + 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭', + 'album_artist': 'youtube-dl "\'/\\ä↭', + 'track_id': '1812978515', + 'artist': 'youtube-dl "\'/\\ä↭', + 'uploader_url': 'https://youtube-dl.bandcamp.com', + 'uploader_id': 'youtube-dl', + 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, '_skip': 'There is a limit of 200 free downloads / month for the test song' }, { @@ -41,7 +48,8 @@ class BandcampIE(InfoExtractor): 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'info_dict': { 'id': '2650410135', - 'ext': 'aiff', + 'ext': 'm4a', + 'acodec': r're:[fa]lac', 'title': 'Ben Prunty - Lanius (Battle)', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Ben Prunty', @@ -54,7 +62,10 @@ class BandcampIE(InfoExtractor): 'track_number': 1, 'track_id': '2650410135', 'artist': 'Ben Prunty', + 'album_artist': 'Ben Prunty', 'album': 'FTL: Advanced Edition Soundtrack', + 'uploader_url': 'https://benprunty.bandcamp.com', + 'uploader_id': 'benprunty', }, }, { # no free download, mp3 128 @@ -75,7 +86,34 @@ class BandcampIE(InfoExtractor): 'track_number': 5, 'track_id': '2584466013', 'artist': 'Mastodon', + 'album_artist': 'Mastodon', 'album': 'Call of the Mastodon', + 'uploader_url': 'https://relapsealumni.bandcamp.com', + 'uploader_id': 'relapsealumni', + }, + }, { + # track from compilation album (artist/album_artist difference) + 'url': 'https://diskotopia.bandcamp.com/track/safehouse', + 'md5': '19c5337bca1428afa54129f86a2f6a69', + 'info_dict': { + 'id': '1978174799', + 'ext': 'mp3', + 'title': 'submerse - submerse - Safehouse', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'submerse', + 'timestamp': 1480779297, + 'upload_date': '20161203', + 'release_timestamp': 1481068800, + 'release_date': '20161207', + 'duration': 154.066, + 'track': 'submerse - Safehouse', + 'track_number': 3, + 'track_id': '1978174799', + 'artist': 'submerse', + 'album_artist': 'Diskotopia', + 'album': 'DSK F/W 2016-2017 Free Compilation', + 'uploader_url': 'https://diskotopia.bandcamp.com', + 'uploader_id': 'diskotopia', }, }] @@ -121,6 +159,9 @@ class BandcampIE(InfoExtractor): embed = self._extract_data_attr(webpage, title, 'embed', False) current = tralbum.get('current') or {} artist = embed.get('artist') or current.get('artist') or tralbum.get('artist') + album_artist = self._html_search_regex( + r'

[\S\s]*?by\s*\s*\s*([^>]+?)\s*', + webpage, 'album artist', fatal=False) timestamp = unified_timestamp( current.get('publish_date') or tralbum.get('album_publish_date')) @@ -205,6 +246,7 @@ class BandcampIE(InfoExtractor): 'track_id': track_id, 'artist': artist, 'album': embed.get('album_title'), + 'album_artist': album_artist, 'formats': formats, } -- cgit v1.2.3 From 2fb0f858686c46abc50a0e253245afe750746775 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 31 Dec 2022 11:02:24 +0530 Subject: [update] Workaround #5632 --- yt_dlp/update.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index ac3e28057..a3a731aef 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -15,7 +15,6 @@ from .utils import ( Popen, cached_method, deprecation_warning, - remove_end, shell_quote, system_identifier, traverse_obj, @@ -43,7 +42,8 @@ def _get_variant_and_executable_path(): # Ref: https://en.wikipedia.org/wiki/Uname#Examples if machine[1:] in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): machine = '_x86' if platform.architecture()[0][:2] == '32' else '' - return f'{remove_end(sys.platform, "32")}{machine}_exe', path + # NB: https://github.com/yt-dlp/yt-dlp/issues/5632 + return f'{sys.platform}{machine}_exe', path path = os.path.dirname(__file__) if isinstance(__loader__, zipimporter): @@ -74,8 +74,8 @@ def current_git_head(): _FILE_SUFFIXES = { 'zip': '', 'py2exe': '_min.exe', - 'win_exe': '.exe', - 'win_x86_exe': '_x86.exe', + 'win32_exe': '.exe', + 'win32_x86_exe': '_x86.exe', 'darwin_exe': '_macos', 'darwin_legacy_exe': '_macos_legacy', 'linux_exe': '_linux', -- cgit v1.2.3 From 8e40b9d1ec132ae1bcac50b3ee520ece46ac9c55 Mon Sep 17 00:00:00 2001 From: Matthew Date: Sun, 1 Jan 2023 04:29:22 +0000 Subject: Improve plugin architecture (#5553) to make plugins easier to develop and use: * Plugins are now loaded as namespace packages. * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.). * Plugin packages can be installed and managed via pip, or dropped into any of the documented locations. * Users do not need to edit any code files to install plugins. * Backwards-compatible with previous plugin architecture. As a side-effect, yt-dlp will now search in a few more locations for config files. Closes https://github.com/yt-dlp/yt-dlp/issues/1389 Authored by: flashdagger, coletdjnz, pukkandan, Grub4K Co-authored-by: Marcel Co-authored-by: pukkandan Co-authored-by: Simon Sawicki --- .gitignore | 8 +- README.md | 66 +++++++- devscripts/make_lazy_extractors.py | 4 + test/test_plugins.py | 73 +++++++++ test/testdata/yt_dlp_plugins/extractor/_ignore.py | 5 + test/testdata/yt_dlp_plugins/extractor/ignore.py | 12 ++ test/testdata/yt_dlp_plugins/extractor/normal.py | 9 ++ .../yt_dlp_plugins/postprocessor/normal.py | 5 + .../yt_dlp_plugins/extractor/zipped.py | 5 + .../yt_dlp_plugins/postprocessor/zipped.py | 5 + yt_dlp/YoutubeDL.py | 15 +- yt_dlp/extractor/extractors.py | 4 +- yt_dlp/options.py | 91 ++++++----- yt_dlp/plugins.py | 171 +++++++++++++++++++++ yt_dlp/postprocessor/__init__.py | 5 +- yt_dlp/utils.py | 55 +++++-- ytdlp_plugins/extractor/__init__.py | 4 - ytdlp_plugins/extractor/sample.py | 14 -- ytdlp_plugins/postprocessor/__init__.py | 4 - ytdlp_plugins/postprocessor/sample.py | 26 ---- 20 files changed, 455 insertions(+), 126 deletions(-) create mode 100644 test/test_plugins.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/_ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/ignore.py create mode 100644 test/testdata/yt_dlp_plugins/extractor/normal.py create mode 100644 test/testdata/yt_dlp_plugins/postprocessor/normal.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py create mode 100644 test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py create mode 100644 yt_dlp/plugins.py delete mode 100644 ytdlp_plugins/extractor/__init__.py delete mode 100644 ytdlp_plugins/extractor/sample.py delete mode 100644 ytdlp_plugins/postprocessor/__init__.py delete mode 100644 ytdlp_plugins/postprocessor/sample.py diff --git a/.gitignore b/.gitignore index 00d74057f..ef4d11616 100644 --- a/.gitignore +++ b/.gitignore @@ -120,9 +120,5 @@ yt-dlp.zip */extractor/lazy_extractors.py # Plugins -ytdlp_plugins/extractor/* -!ytdlp_plugins/extractor/__init__.py -!ytdlp_plugins/extractor/sample.py -ytdlp_plugins/postprocessor/* -!ytdlp_plugins/postprocessor/__init__.py -!ytdlp_plugins/postprocessor/sample.py +ytdlp_plugins/* +yt-dlp-plugins/* diff --git a/README.md b/README.md index 500f92387..4294090dc 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,8 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) + * [Installing Plugins](#installing-plugins) + * [Developing Plugins](#developing-plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [DEPRECATED OPTIONS](#deprecated-options) @@ -1110,15 +1112,20 @@ You can configure yt-dlp by placing any supported command line option to a confi * If `-P` is not given, the current directory is searched 1. **User Configuration**: * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp/config.txt` * `${XDG_CONFIG_HOME}/yt-dlp.conf` * `${APPDATA}/yt-dlp/config` (recommended on Windows) * `${APPDATA}/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` + * `~/.yt-dlp/config` + * `~/.yt-dlp/config.txt` See also: [Notes about environment variables](#notes-about-environment-variables) 1. **System Configuration**: * `/etc/yt-dlp.conf` + * `/etc/yt-dlp/config` + * `/etc/yt-dlp/config.txt` E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` @@ -1789,19 +1796,68 @@ NOTE: These options may be changed/removed in the future without concern for bac # PLUGINS -Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** -Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. +Plugins can be of ``s `extractor` or `postprocessor`. +- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. +- Extractor plugins take priority over builtin extractors. +- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. -See [ytdlp_plugins](ytdlp_plugins) for example plugins. -Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code +Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`. -If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability +In other words, the file structure on the disk looks something like: + + yt_dlp_plugins/ + extractor/ + myplugin.py + postprocessor/ + myplugin.py + +yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) +## Installing Plugins + +Plugins can be installed using various methods and locations. + +1. **Configuration directories**: + Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): + * **User Plugins** + * `${XDG_CONFIG_HOME}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Linux/macOS) + * `${XDG_CONFIG_HOME}/yt-dlp-plugins//yt_dlp_plugins/` + * `${APPDATA}/yt-dlp/plugins//yt_dlp_plugins/` (recommended on Windows) + * `~/.yt-dlp/plugins//yt_dlp_plugins/` + * `~/yt-dlp-plugins//yt_dlp_plugins/` + * **System Plugins** + * `/etc/yt-dlp/plugins//yt_dlp_plugins/` + * `/etc/yt-dlp-plugins//yt_dlp_plugins/` +2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location: + * Binary: where `/yt-dlp.exe`, `/yt-dlp-plugins//yt_dlp_plugins/` + * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` + +3. **pip and other locations in `PYTHONPATH`** + * Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Note: plugin files between plugin packages installed with pip must have unique filenames + * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. + * Note: This does not apply for Pyinstaller/py2exe builds. + + +.zip, .egg and .whl archives containing a `yt_dlp_plugins` namespace folder in their root are also supported. These can be placed in the same locations `yt_dlp_plugins` namespace folders can be found. +- e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins//myplugin.py` + +Run yt-dlp with `--verbose`/`-v` to check if the plugin has been loaded. + +## Developing Plugins + +See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. + +All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) + +If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability +See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. # EMBEDDING YT-DLP diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index c502bdf89..d74ea202f 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -40,8 +40,12 @@ def main(): _ALL_CLASSES = get_all_ies() # Must be before import + import yt_dlp.plugins from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + # Filter out plugins + _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, diff --git a/test/test_plugins.py b/test/test_plugins.py new file mode 100644 index 000000000..6cde579e1 --- /dev/null +++ b/test/test_plugins.py @@ -0,0 +1,73 @@ +import importlib +import os +import shutil +import sys +import unittest +from pathlib import Path + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata') +sys.path.append(str(TEST_DATA_DIR)) +importlib.invalidate_caches() + +from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins + + +class TestPlugins(unittest.TestCase): + + TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME + + def test_directories_containing_plugins(self): + self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories())) + + def test_extractor_classes(self): + for module_name in tuple(sys.modules): + if module_name.startswith(f'{PACKAGE_NAME}.extractor'): + del sys.modules[module_name] + plugins_ie = load_plugins('extractor', 'IE') + + self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys()) + self.assertIn('NormalPluginIE', plugins_ie.keys()) + + # don't load modules with underscore prefix + self.assertFalse( + f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(), + 'loaded module beginning with underscore') + self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) + + # Don't load extractors with underscore prefix + self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys()) + + # Don't load extractors not specified in __all__ (if supplied) + self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys()) + self.assertIn('InAllPluginIE', plugins_ie.keys()) + + def test_postprocessor_classes(self): + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('NormalPluginPP', plugins_pp.keys()) + + def test_importing_zipped_module(self): + zip_path = TEST_DATA_DIR / 'zipped_plugins.zip' + shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4]) + sys.path.append(str(zip_path)) # add zip to search paths + importlib.invalidate_caches() # reset the import caches + + try: + for plugin_type in ('extractor', 'postprocessor'): + package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}') + self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__)) + + plugins_ie = load_plugins('extractor', 'IE') + self.assertIn('ZippedPluginIE', plugins_ie.keys()) + + plugins_pp = load_plugins('postprocessor', 'PP') + self.assertIn('ZippedPluginPP', plugins_pp.keys()) + + finally: + sys.path.remove(str(zip_path)) + os.remove(zip_path) + importlib.invalidate_caches() # reset the import caches + + +if __name__ == '__main__': + unittest.main() diff --git a/test/testdata/yt_dlp_plugins/extractor/_ignore.py b/test/testdata/yt_dlp_plugins/extractor/_ignore.py new file mode 100644 index 000000000..57faf75bb --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/_ignore.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/extractor/ignore.py b/test/testdata/yt_dlp_plugins/extractor/ignore.py new file mode 100644 index 000000000..816a16aa2 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/ignore.py @@ -0,0 +1,12 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class IgnoreNotInAllPluginIE(InfoExtractor): + pass + + +class InAllPluginIE(InfoExtractor): + pass + + +__all__ = ['InAllPluginIE'] diff --git a/test/testdata/yt_dlp_plugins/extractor/normal.py b/test/testdata/yt_dlp_plugins/extractor/normal.py new file mode 100644 index 000000000..b09009bdc --- /dev/null +++ b/test/testdata/yt_dlp_plugins/extractor/normal.py @@ -0,0 +1,9 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class NormalPluginIE(InfoExtractor): + pass + + +class _IgnoreUnderscorePluginIE(InfoExtractor): + pass diff --git a/test/testdata/yt_dlp_plugins/postprocessor/normal.py b/test/testdata/yt_dlp_plugins/postprocessor/normal.py new file mode 100644 index 000000000..315b85a48 --- /dev/null +++ b/test/testdata/yt_dlp_plugins/postprocessor/normal.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class NormalPluginPP(PostProcessor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py new file mode 100644 index 000000000..01542e0d8 --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class ZippedPluginIE(InfoExtractor): + pass diff --git a/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py new file mode 100644 index 000000000..223822bd6 --- /dev/null +++ b/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py @@ -0,0 +1,5 @@ +from yt_dlp.postprocessor.common import PostProcessor + + +class ZippedPluginPP(PostProcessor): + pass diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index db6bfded8..9ef56a46b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -32,6 +32,7 @@ from .extractor import gen_extractor_classes, get_info_extractor from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text +from .plugins import directories as plugin_directories from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors from .postprocessor import ( EmbedThumbnailPP, @@ -3773,10 +3774,6 @@ class YoutubeDL: write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') - if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s' % [ - '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params['compat_opts']: write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) @@ -3810,6 +3807,16 @@ class YoutubeDL: proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') + for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items(): + if not plugins: + continue + write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % ( + klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in plugins.items()))))) + plugin_dirs = plugin_directories() + if plugin_dirs: + write_debug(f'Plugin directories: {plugin_dirs}') + # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 610e02f90..beda02917 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,10 +1,10 @@ import contextlib import os -from ..utils import load_plugins +from ..plugins import load_plugins # NB: Must be before other imports so that plugins can be correctly injected -_PLUGIN_CLASSES = load_plugins('extractor', 'IE', {}) +_PLUGIN_CLASSES = load_plugins('extractor', 'IE') _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index ed83cb763..be4695cbb 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -29,6 +29,8 @@ from .utils import ( expand_path, format_field, get_executable_path, + get_system_config_dirs, + get_user_config_dirs, join_nonempty, orderedSet_from_options, remove_end, @@ -42,62 +44,67 @@ def parseOpts(overrideArguments=None, ignore_config_files='if_override'): if ignore_config_files == 'if_override': ignore_config_files = overrideArguments is not None - def _readUserConf(package_name, default=[]): - # .config + def _load_from_config_dirs(config_dirs): + for config_dir in config_dirs: + conf_file_path = os.path.join(config_dir, 'config') + conf = Config.read_file(conf_file_path, default=None) + if conf is None: + conf_file_path += '.txt' + conf = Config.read_file(conf_file_path, default=None) + if conf is not None: + return conf, conf_file_path + return None, None + + def _read_user_conf(package_name, default=None): + # .config/package_name.conf xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') - userConfFile = os.path.join(xdg_config_home, package_name, 'config') - if not os.path.isfile(userConfFile): - userConfFile = os.path.join(xdg_config_home, '%s.conf' % package_name) - userConf = Config.read_file(userConfFile, default=None) - if userConf is not None: - return userConf, userConfFile + user_conf_file = os.path.join(xdg_config_home, '%s.conf' % package_name) + user_conf = Config.read_file(user_conf_file, default=None) + if user_conf is not None: + return user_conf, user_conf_file - # appdata - appdata_dir = os.getenv('appdata') - if appdata_dir: - userConfFile = os.path.join(appdata_dir, package_name, 'config') - userConf = Config.read_file(userConfFile, default=None) - if userConf is None: - userConfFile += '.txt' - userConf = Config.read_file(userConfFile, default=None) - if userConf is not None: - return userConf, userConfFile + # home (~/package_name.conf or ~/package_name.conf.txt) + user_conf_file = os.path.join(compat_expanduser('~'), '%s.conf' % package_name) + user_conf = Config.read_file(user_conf_file, default=None) + if user_conf is None: + user_conf_file += '.txt' + user_conf = Config.read_file(user_conf_file, default=None) + if user_conf is not None: + return user_conf, user_conf_file - # home - userConfFile = os.path.join(compat_expanduser('~'), '%s.conf' % package_name) - userConf = Config.read_file(userConfFile, default=None) - if userConf is None: - userConfFile += '.txt' - userConf = Config.read_file(userConfFile, default=None) - if userConf is not None: - return userConf, userConfFile + # Package config directories (e.g. ~/.config/package_name/package_name.txt) + user_conf, user_conf_file = _load_from_config_dirs(get_user_config_dirs(package_name)) + if user_conf is not None: + return user_conf, user_conf_file + return default if default is not None else [], None - return default, None + def _read_system_conf(package_name, default=None): + system_conf, system_conf_file = _load_from_config_dirs(get_system_config_dirs(package_name)) + if system_conf is not None: + return system_conf, system_conf_file + return default if default is not None else [], None - def add_config(label, path, user=False): + def add_config(label, path=None, func=None): """ Adds config and returns whether to continue """ if root.parse_known_args()[0].ignoreconfig: return False - # Multiple package names can be given here - # E.g. ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for - # the configuration file of any of these three packages - for package in ('yt-dlp',): - if user: - args, current_path = _readUserConf(package, default=None) - else: - current_path = os.path.join(path, '%s.conf' % package) - args = Config.read_file(current_path, default=None) - if args is not None: - root.append_config(args, current_path, label=label) - return True + elif func: + assert path is None + args, current_path = func('yt-dlp') + else: + current_path = os.path.join(path, 'yt-dlp.conf') + args = Config.read_file(current_path, default=None) + if args is not None: + root.append_config(args, current_path, label=label) + return True return True def load_configs(): yield not ignore_config_files yield add_config('Portable', get_executable_path()) yield add_config('Home', expand_path(root.parse_known_args()[0].paths.get('home', '')).strip()) - yield add_config('User', None, user=True) - yield add_config('System', '/etc') + yield add_config('User', func=_read_user_conf) + yield add_config('System', func=_read_system_conf) opts = optparse.Values({'verbose': True, 'print_help': False}) try: diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py new file mode 100644 index 000000000..7d2226d0f --- /dev/null +++ b/yt_dlp/plugins.py @@ -0,0 +1,171 @@ +import contextlib +import importlib +import importlib.abc +import importlib.machinery +import importlib.util +import inspect +import itertools +import os +import pkgutil +import sys +import traceback +import zipimport +from pathlib import Path +from zipfile import ZipFile + +from .compat import functools # isort: split +from .compat import compat_expanduser +from .utils import ( + get_executable_path, + get_system_config_dirs, + get_user_config_dirs, + write_string, +) + +PACKAGE_NAME = 'yt_dlp_plugins' +COMPAT_PACKAGE_NAME = 'ytdlp_plugins' + + +class PluginLoader(importlib.abc.Loader): + """Dummy loader for virtual namespace packages""" + + def exec_module(self, module): + return None + + +@functools.cache +def dirs_in_zip(archive): + with ZipFile(archive) as zip: + return set(itertools.chain.from_iterable( + Path(file).parents for file in zip.namelist())) + + +class PluginFinder(importlib.abc.MetaPathFinder): + """ + This class provides one or multiple namespace packages. + It searches in sys.path and yt-dlp config folders for + the existing subdirectories from which the modules can be imported + """ + + def __init__(self, *packages): + self._zip_content_cache = {} + self.packages = set(itertools.chain.from_iterable( + itertools.accumulate(name.split('.'), lambda a, b: '.'.join((a, b))) + for name in packages)) + + def search_locations(self, fullname): + candidate_locations = [] + + def _get_package_paths(*root_paths, containing_folder='plugins'): + for config_dir in map(Path, root_paths): + plugin_dir = config_dir / containing_folder + if not plugin_dir.is_dir(): + continue + yield from plugin_dir.iterdir() + + # Load from yt-dlp config folders + candidate_locations.extend(_get_package_paths( + *get_user_config_dirs('yt-dlp'), *get_system_config_dirs('yt-dlp'), + containing_folder='plugins')) + + # Load from yt-dlp-plugins folders + candidate_locations.extend(_get_package_paths( + get_executable_path(), + compat_expanduser('~'), + '/etc', + os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config'), + containing_folder='yt-dlp-plugins')) + + candidate_locations.extend(map(Path, sys.path)) # PYTHONPATH + + parts = Path(*fullname.split('.')) + locations = set() + for path in dict.fromkeys(candidate_locations): + candidate = path / parts + if candidate.is_dir(): + locations.add(str(candidate)) + elif path.name and any(path.with_suffix(suffix).is_file() for suffix in {'.zip', '.egg', '.whl'}): + with contextlib.suppress(FileNotFoundError): + if parts in dirs_in_zip(path): + locations.add(str(candidate)) + return locations + + def find_spec(self, fullname, path=None, target=None): + if fullname not in self.packages: + return None + + search_locations = self.search_locations(fullname) + if not search_locations: + return None + + spec = importlib.machinery.ModuleSpec(fullname, PluginLoader(), is_package=True) + spec.submodule_search_locations = search_locations + return spec + + def invalidate_caches(self): + dirs_in_zip.cache_clear() + for package in self.packages: + if package in sys.modules: + del sys.modules[package] + + +def directories(): + spec = importlib.util.find_spec(PACKAGE_NAME) + return spec.submodule_search_locations if spec else [] + + +def iter_modules(subpackage): + fullname = f'{PACKAGE_NAME}.{subpackage}' + with contextlib.suppress(ModuleNotFoundError): + pkg = importlib.import_module(fullname) + yield from pkgutil.iter_modules(path=pkg.__path__, prefix=f'{fullname}.') + + +def load_module(module, module_name, suffix): + return inspect.getmembers(module, lambda obj: ( + inspect.isclass(obj) + and obj.__name__.endswith(suffix) + and obj.__module__.startswith(module_name) + and not obj.__name__.startswith('_') + and obj.__name__ in getattr(module, '__all__', [obj.__name__]))) + + +def load_plugins(name, suffix): + classes = {} + + for finder, module_name, _ in iter_modules(name): + if any(x.startswith('_') for x in module_name.split('.')): + continue + try: + if sys.version_info < (3, 10) and isinstance(finder, zipimport.zipimporter): + # zipimporter.load_module() is deprecated in 3.10 and removed in 3.12 + # The exec_module branch below is the replacement for >= 3.10 + # See: https://docs.python.org/3/library/zipimport.html#zipimport.zipimporter.exec_module + module = finder.load_module(module_name) + else: + spec = finder.find_spec(module_name) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module + spec.loader.exec_module(module) + except Exception: + write_string(f'Error while importing module {module_name!r}\n{traceback.format_exc(limit=-1)}') + continue + classes.update(load_module(module, module_name, suffix)) + + # Compat: old plugin system using __init__.py + # Note: plugins imported this way do not show up in directories() + # nor are considered part of the yt_dlp_plugins namespace package + with contextlib.suppress(FileNotFoundError): + spec = importlib.util.spec_from_file_location( + name, Path(get_executable_path(), COMPAT_PACKAGE_NAME, name, '__init__.py')) + plugins = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = plugins + spec.loader.exec_module(plugins) + classes.update(load_module(plugins, spec.name, suffix)) + + return classes + + +sys.meta_path.insert(0, PluginFinder(f'{PACKAGE_NAME}.extractor', f'{PACKAGE_NAME}.postprocessor')) + +__all__ = ['directories', 'load_plugins', 'PACKAGE_NAME', 'COMPAT_PACKAGE_NAME'] diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index f168be46a..bfe9df733 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -33,14 +33,15 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP -from ..utils import load_plugins +from ..plugins import load_plugins -_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) +_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP') def get_postprocessor(key): return globals()[key + 'PP'] +globals().update(_PLUGIN_CLASSES) __all__ = [name for name in globals().keys() if name.endswith('PP')] __all__.extend(('PostProcessor', 'FFmpegPostProcessor')) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ee5340cd2..32da598d0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -18,7 +18,6 @@ import html.entities import html.parser import http.client import http.cookiejar -import importlib.util import inspect import io import itertools @@ -5372,22 +5371,37 @@ def get_executable_path(): return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1])) -def load_plugins(name, suffix, namespace): - classes = {} - with contextlib.suppress(FileNotFoundError): - plugins_spec = importlib.util.spec_from_file_location( - name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py')) - plugins = importlib.util.module_from_spec(plugins_spec) - sys.modules[plugins_spec.name] = plugins - plugins_spec.loader.exec_module(plugins) - for name in dir(plugins): - if name in namespace: - continue - if not name.endswith(suffix): - continue - klass = getattr(plugins, name) - classes[name] = namespace[name] = klass - return classes +def get_user_config_dirs(package_name): + locations = set() + + # .config (e.g. ~/.config/package_name) + xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') + config_dir = os.path.join(xdg_config_home, package_name) + if os.path.isdir(config_dir): + locations.add(config_dir) + + # appdata (%APPDATA%/package_name) + appdata_dir = os.getenv('appdata') + if appdata_dir: + config_dir = os.path.join(appdata_dir, package_name) + if os.path.isdir(config_dir): + locations.add(config_dir) + + # home (~/.package_name) + user_config_directory = os.path.join(compat_expanduser('~'), '.%s' % package_name) + if os.path.isdir(user_config_directory): + locations.add(user_config_directory) + + return locations + + +def get_system_config_dirs(package_name): + locations = set() + # /etc/package_name + system_config_directory = os.path.join('/etc', package_name) + if os.path.isdir(system_config_directory): + locations.add(system_config_directory) + return locations def traverse_obj( @@ -6367,3 +6381,10 @@ class FormatSorter: # Deprecated has_certifi = bool(certifi) has_websockets = bool(websockets) + + +def load_plugins(name, suffix, namespace): + from .plugins import load_plugins + ret = load_plugins(name, suffix) + namespace.update(ret) + return ret diff --git a/ytdlp_plugins/extractor/__init__.py b/ytdlp_plugins/extractor/__init__.py deleted file mode 100644 index 3045a590b..000000000 --- a/ytdlp_plugins/extractor/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# flake8: noqa: F401 - -# ℹ️ The imported name must end in "IE" -from .sample import SamplePluginIE diff --git a/ytdlp_plugins/extractor/sample.py b/ytdlp_plugins/extractor/sample.py deleted file mode 100644 index a8bc455eb..000000000 --- a/ytdlp_plugins/extractor/sample.py +++ /dev/null @@ -1,14 +0,0 @@ -# ⚠ Don't use relative imports -from yt_dlp.extractor.common import InfoExtractor - - -# ℹ️ Instructions on making extractors can be found at: -# 🔗 https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site - -class SamplePluginIE(InfoExtractor): - _WORKING = False - IE_DESC = False - _VALID_URL = r'^sampleplugin:' - - def _real_extract(self, url): - self.to_screen('URL "%s" successfully captured' % url) diff --git a/ytdlp_plugins/postprocessor/__init__.py b/ytdlp_plugins/postprocessor/__init__.py deleted file mode 100644 index 61099abbc..000000000 --- a/ytdlp_plugins/postprocessor/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# flake8: noqa: F401 - -# ℹ️ The imported name must end in "PP" and is the name to be used in --use-postprocessor -from .sample import SamplePluginPP diff --git a/ytdlp_plugins/postprocessor/sample.py b/ytdlp_plugins/postprocessor/sample.py deleted file mode 100644 index 4563e1c11..000000000 --- a/ytdlp_plugins/postprocessor/sample.py +++ /dev/null @@ -1,26 +0,0 @@ -# ⚠ Don't use relative imports -from yt_dlp.postprocessor.common import PostProcessor - - -# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor -class SamplePluginPP(PostProcessor): - def __init__(self, downloader=None, **kwargs): - # ⚠ Only kwargs can be passed from the CLI, and all argument values will be string - # Also, "downloader", "when" and "key" are reserved names - super().__init__(downloader) - self._kwargs = kwargs - - # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run - def run(self, info): - if info.get('_type', 'video') != 'video': # PP was called for playlist - self.to_screen(f'Post-processing playlist {info.get("id")!r} with {self._kwargs}') - elif info.get('filepath'): # PP was called after download (default) - filepath = info.get('filepath') - self.to_screen(f'Post-processed {filepath!r} with {self._kwargs}') - elif info.get('requested_downloads'): # PP was called after_video - filepaths = [f.get('filepath') for f in info.get('requested_downloads')] - self.to_screen(f'Post-processed {filepaths!r} with {self._kwargs}') - else: # PP was called before actual download - filepath = info.get('_filename') - self.to_screen(f'Pre-processed {filepath!r} with {self._kwargs}') - return [], info # return list_of_files_to_delete, info_dict -- cgit v1.2.3 From 3e01ce744a981d8f19ae77ec695005e7000f4703 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 1 Jan 2023 18:40:26 +1300 Subject: [extractor/generic] Use `Accept-Encoding: identity` for initial request The existing comment seems to imply this was the desired behavior from the beginning. Partial fix for https://github.com/yt-dlp/yt-dlp/issues/5855, https://github.com/yt-dlp/yt-dlp/issues/5851, https://github.com/yt-dlp/yt-dlp/issues/4748 --- yt_dlp/extractor/generic.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2281c71f3..ffc279023 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2154,6 +2154,21 @@ class GenericIE(InfoExtractor): 'age_limit': 0, 'direct': True, } + }, { + 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', + 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'info_dict': { + 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'ext': 'mp4', + 'title': 'čauky lidi 70 finall', + 'description': 'čauky lidi 70 finall', + 'thumbnail': 'h', + 'upload_date': '20220606', + 'timestamp': 1654513791, + 'duration': 318.0, + 'direct': True, + 'age_limit': 0, + } } ] @@ -2312,7 +2327,7 @@ class GenericIE(InfoExtractor): # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. full_response = self._request_webpage(url, video_id, headers={ - 'Accept-Encoding': '*', + 'Accept-Encoding': 'identity', **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl() -- cgit v1.2.3 From 1cdda3299810b86206853a22e680758eadcc4e05 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 14:11:14 +0530 Subject: [utils] `get_exe_version`: Detect broken executables Authored by: dirkf, pukkandan Closes #5561 --- yt_dlp/utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 32da598d0..5af176b36 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2720,8 +2720,10 @@ def _get_exe_version_output(exe, args): # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 - stdout, _, _ = Popen.run([encodeArgument(exe)] + args, text=True, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + if ret: + return None except OSError: return False return stdout @@ -2739,11 +2741,15 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): def get_exe_version(exe, args=['--version'], - version_re=None, unrecognized='present'): + version_re=None, unrecognized=('present', 'broken')): """ Returns the version of the specified executable, or False if the executable is not present """ + unrecognized = variadic(unrecognized) + assert len(unrecognized) in (1, 2) out = _get_exe_version_output(exe, args) - return detect_exe_version(out, version_re, unrecognized) if out else False + if out is None: + return unrecognized[-1] + return out and detect_exe_version(out, version_re, unrecognized[0]) def frange(start=0, stop=None, step=1): -- cgit v1.2.3 From 88fb9425775da7f92d24e8b5f3009cafb56e94d6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 13:32:05 +0530 Subject: Add message when there are no subtitles/thumbnails Closes #5551 --- yt_dlp/YoutubeDL.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9ef56a46b..866d069b7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3930,7 +3930,7 @@ class YoutubeDL: elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: - self.report_warning(f'There\'s no {label} description to write') + self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: @@ -3946,15 +3946,18 @@ class YoutubeDL: ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' ret = [] subtitles = info_dict.get('requested_subtitles') - if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret - + elif not subtitles: + self.to_screen('[info] There\'s no subtitles for the requested languages') + return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret + for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -4001,6 +4004,9 @@ class YoutubeDL: thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] + if not thumbnails: + self.to_screen(f'[info] There\'s no {label} thumbnails to download') + return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: -- cgit v1.2.3 From 2a06bb4eb671eb306a2687ef0a4f853b936f05e0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 13:42:43 +0530 Subject: Add `--compat-options 2021,2022` Use these to guard against future compat changes. This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put this in your config --- README.md | 2 ++ yt_dlp/options.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 4294090dc..f6bf1175e 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,8 @@ For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (Do NOT use) * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` +* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` +* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options # INSTALLATION diff --git a/yt_dlp/options.py b/yt_dlp/options.py index be4695cbb..e9766c02d 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -470,6 +470,8 @@ def create_parser(): }, 'aliases': { 'youtube-dl': ['all', '-multistreams'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'], + '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], + '2022': [], } }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' -- cgit v1.2.3 From 78d25e0b7c2b45597e193c0decb33f4f248502a9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 14:10:51 +0530 Subject: [extractor/embedly] Handle vimeo embeds Closes #3360 --- yt_dlp/extractor/embedly.py | 62 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index 483d018bb..db5ef055e 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -1,13 +1,63 @@ import re import urllib.parse + from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote +from .youtube import YoutubeTabIE +from ..utils import parse_qs, smuggle_url, traverse_obj class EmbedlyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P[^#&]+)' + _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)' _TESTS = [{ 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'info_dict': { + 'id': 'UUGLim4T2loE5rwCMdpCIPVg', + 'modified_date': '20221225', + 'view_count': int, + 'uploader_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'uploader': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/@TraciHinesMusic', + 'channel': 'TraciJHines', + 'availability': 'public', + 'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'description': '', + 'tags': [], + 'title': 'Uploads from TraciJHines', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', + 'params': {'noplaylist': True}, + 'info_dict': { + 'id': 'SU4fj_aEMVw', + 'ext': 'mp4', + 'title': 'I\'m on Patreon!', + 'age_limit': 0, + 'categories': ['Entertainment'], + 'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp', + 'live_status': 'not_live', + 'playable_in_embed': True, + 'channel': 'TraciJHines', + 'uploader_id': 'TraciJHines', + 'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg', + 'uploader_url': 'http://www.youtube.com/user/TraciJHines', + 'upload_date': '20150211', + 'duration': 282, + 'availability': 'public', + 'channel_follower_count': int, + 'tags': 'count:39', + 'view_count': int, + 'comment_count': int, + 'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg', + 'like_count': int, + 'uploader': 'TraciJHines', + 'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364', + 'chapters': list, + + }, + }, { + 'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh', 'only_matching': True, }] @@ -21,4 +71,10 @@ class EmbedlyIE(InfoExtractor): yield urllib.parse.unquote(mobj.group('url')) def _real_extract(self, url): - return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) + qs = parse_qs(url) + src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '') + if src and YoutubeTabIE.suitable(src): + return self.url_result(src, YoutubeTabIE) + return self.url_result(smuggle_url( + urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))), + {'http_headers': {'Referer': url}})) -- cgit v1.2.3 From 26fdfc3704a278acada27cc420d67c6d3f71423b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 14:39:58 +0530 Subject: [extractor/biliintl:series] Make partial download of series faster --- yt_dlp/extractor/bilibili.py | 51 +++++++++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 92620f697..3274a427d 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -20,9 +20,11 @@ from ..utils import ( parse_count, parse_qs, qualities, + smuggle_url, srt_subtitles_timecode, str_or_none, traverse_obj, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -881,16 +883,12 @@ class BiliIntlBaseIE(InfoExtractor): return formats - def _extract_video_info(self, video_data, *, ep_id=None, aid=None): + def _parse_video_metadata(self, video_data): return { - 'id': ep_id or aid, 'title': video_data.get('title_display') or video_data.get('title'), 'thumbnail': video_data.get('cover'), 'episode_number': int_or_none(self._search_regex( r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)), - 'formats': self._get_formats(ep_id=ep_id, aid=aid), - 'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid), - 'extractor_key': BiliIntlIE.ie_key(), } def _perform_login(self, username, password): @@ -975,9 +973,16 @@ class BiliIntlIE(BiliIntlBaseIE): 'only_matching': True, }] - def _real_extract(self, url): - season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') - video_id = ep_id or aid + def _make_url(video_id, series_id=None): + if series_id: + return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}' + return f'https://www.bilibili.tv/en/video/{video_id}' + + def _extract_video_metadata(self, url, video_id, season_id): + url, smuggled_data = unsmuggle_url(url, {}) + if smuggled_data.get('title'): + return smuggled_data + webpage = self._download_webpage(url, video_id) # Bstation layout initial_data = ( @@ -989,13 +994,26 @@ class BiliIntlIE(BiliIntlBaseIE): if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - video_data = traverse_obj(season_json, - ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), - expected_type=dict, get_all=False) - return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid) + video_data = traverse_obj(season_json, ( + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + ), expected_type=dict, get_all=False) + + return self._parse_video_metadata(video_data) + + def _real_extract(self, url): + season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid') + video_id = ep_id or aid + + return { + 'id': video_id, + **self._extract_video_metadata(url, video_id, season_id), + 'formats': self._get_formats(ep_id=ep_id, aid=aid), + 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid), + } class BiliIntlSeriesIE(BiliIntlBaseIE): + IE_NAME = 'biliintl:series' _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?play/(?P\d+)/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://www.bilibili.tv/en/play/34613', @@ -1021,9 +1039,12 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): def _entries(self, series_id): series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id) - for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]): - episode_id = str(episode.get('episode_id')) - yield self._extract_video_info(episode, ep_id=episode_id) + for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict): + episode_id = str(episode['episode_id']) + yield self.url_result(smuggle_url( + BiliIntlIE._make_url(episode_id, series_id), + self._parse_video_metadata(episode) + ), BiliIntlIE, episode_id) def _real_extract(self, url): series_id = self._match_id(url) -- cgit v1.2.3 From 193fb150b76c4aaf41fb2c98b073e7e1f8a108f0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 1 Jan 2023 17:01:48 +0530 Subject: Fix bug in 119e40ef64b25f66a39246e87ce6c143cd34276d --- yt_dlp/YoutubeDL.py | 3 ++- yt_dlp/__init__.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 866d069b7..8ce71a2dc 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3460,7 +3460,8 @@ class YoutubeDL: return infodict def run_all_pps(self, key, info, *, additional_pps=None): - self._forceprint(key, info) + if key != 'video': + self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3490816c4..9cb132410 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -703,7 +703,7 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[2:]) + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' -- cgit v1.2.3 From 8c53322cda75394a8d551dde20b2529ee5ad6e89 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Mon, 2 Jan 2023 02:16:25 +0900 Subject: [downloader/aria2c] Native progress for aria2c via RPC (#3724) Authored by: Lesmiscore, pukkandan Closes #2038 --- README.md | 3 +- yt_dlp/downloader/external.py | 109 ++++++++++++++++++++++++++++++++++++++++-- yt_dlp/options.py | 6 +-- yt_dlp/utils.py | 9 ++++ 4 files changed, 119 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f6bf1175e..83e69a236 100644 --- a/README.md +++ b/README.md @@ -153,6 +153,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior +* yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: `aria2c`). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is For ease of use, a few more compat options are available: @@ -160,7 +161,7 @@ For ease of use, a few more compat options are available: * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` -* `--compat-options 2022`: Currently does nothing. Use this to enable all future compat options +* `--compat-options 2022`: Same as `--compat-options no-external-downloader-progress`. Use this to enable all future compat options # INSTALLATION diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 575138371..569839f6f 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,9 +1,11 @@ import enum +import json import os.path import re import subprocess import sys import time +import uuid from .fragment import FragmentFD from ..compat import functools @@ -20,8 +22,10 @@ from ..utils import ( determine_ext, encodeArgument, encodeFilename, + find_available_port, handle_youtubedl_headers, remove_end, + sanitized_Request, traverse_obj, ) @@ -60,7 +64,6 @@ class ExternalFD(FragmentFD): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -129,8 +132,7 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - _, stderr, returncode = Popen.run( - cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + _, stderr, returncode = self._call_process(cmd, info_dict) if returncode and stderr: self.to_stderr(stderr) return returncode @@ -140,7 +142,7 @@ class ExternalFD(FragmentFD): retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: - _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) + _, stderr, returncode = self._call_process(cmd, info_dict) if not returncode: break # TODO: Decide whether to retry based on error code @@ -172,6 +174,9 @@ class ExternalFD(FragmentFD): self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE) + class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -256,6 +261,14 @@ class Aria2cFD(ExternalFD): def _aria2c_filename(fn): return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + def _call_downloader(self, tmpfilename, info_dict): + if 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', @@ -276,6 +289,12 @@ class Aria2cFD(ExternalFD): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -304,6 +323,88 @@ class Aria2cFD(ExternalFD): cmd += ['--', info_dict['url']] return cmd + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = sanitized_Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + 'Ytdl-request-proxy': '__noproxy__', + }) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' diff --git a/yt_dlp/options.py b/yt_dlp/options.py index e9766c02d..5bbb292de 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -464,14 +464,14 @@ def create_parser(): 'allowed_values': { 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', - 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', - 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', + 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', + 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', }, 'aliases': { 'youtube-dl': ['all', '-multistreams'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat'], '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], - '2022': [], + '2022': ['no-external-downloader-progress'], } }, help=( 'Options that can help keep compatibility with youtube-dl or youtube-dlc ' diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 5af176b36..45a7e6eaa 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5243,6 +5243,15 @@ def random_birthday(year_field, month_field, day_field): } +def find_available_port(interface=''): + try: + with socket.socket() as sock: + sock.bind((interface, 0)) + return sock.getsockname()[1] + except OSError: + return None + + # Templates for internet shortcut files, which are plain text files. DOT_URL_LINK_TEMPLATE = '''\ [InternetShortcut] -- cgit v1.2.3 From e756f45ba0648f972be71ce328419a623e381028 Mon Sep 17 00:00:00 2001 From: Matthew Date: Mon, 2 Jan 2023 04:55:11 +0000 Subject: Improve handling for overriding extractors with plugins (#5916) * Extractors replaced with plugin extractors now show in debug output * Better testcase handling * Added documentation Authored by: coletdjnz, pukkandan --- README.md | 9 ++++++--- yt_dlp/YoutubeDL.py | 22 +++++++++++++++------- yt_dlp/extractor/common.py | 13 +++++++++++-- yt_dlp/extractor/extractors.py | 2 ++ yt_dlp/extractor/testurl.py | 11 ++++++----- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 83e69a236..c4bd6ef0c 100644 --- a/README.md +++ b/README.md @@ -1841,7 +1841,7 @@ Plugins can be installed using various methods and locations. * Source: where `/yt_dlp/__main__.py`, `/yt-dlp-plugins//yt_dlp_plugins/` 3. **pip and other locations in `PYTHONPATH`** - * Plugin packages can be installed and managed using `pip`. See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. + * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. * Note: This does not apply for Pyinstaller/py2exe builds. @@ -1854,9 +1854,12 @@ Run yt-dlp with `--verbose`/`-v` to check if the plugin has been loaded. ## Developing Plugins -See [ytdlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. +See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for a sample plugin package with instructions on how to set up an environment for plugin development. -All public classes with a name ending in `IE` are imported from each file. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) +All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`) + +To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). +Due to the mechanics behind this, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8ce71a2dc..e7b469059 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -33,7 +33,7 @@ from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text from .plugins import directories as plugin_directories -from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, @@ -3730,7 +3730,10 @@ class YoutubeDL: # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER - from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors + from .extractor.extractors import ( + _PLUGIN_CLASSES as plugin_ies, + _PLUGIN_OVERRIDES as plugin_ie_overrides + ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) @@ -3808,12 +3811,17 @@ class YoutubeDL: proxy_map.update(handler.proxies) write_debug(f'Proxy map: {proxy_map}') - for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items(): - if not plugins: - continue - write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % ( + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): + display_list = ['%s%s' % ( klass.__name__, '' if klass.__name__ == name else f' as {name}') - for name, klass in plugins.items()))))) + for name, klass in plugins.items()] + if plugin_type == 'Extractor': + display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' + for parent, plugins in plugin_ie_overrides.items()) + if not display_list: + continue + write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') + plugin_dirs = plugin_directories() if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9031f3c11..f48b97a6b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3442,13 +3442,17 @@ class InfoExtractor: continue t['name'] = cls.ie_key() yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_testcases(include_onlymatching) @classmethod def get_webpage_testcases(cls): tests = vars(cls).get('_WEBPAGE_TESTS', []) for t in tests: t['name'] = cls.ie_key() - return tests + yield t + if getattr(cls, '__wrapped__', None): + yield from cls.__wrapped__.get_webpage_testcases() @classproperty(cache=True) def age_limit(cls): @@ -3710,10 +3714,12 @@ class InfoExtractor: if plugin_name: mro = inspect.getmro(cls) super_class = cls.__wrapped__ = mro[mro.index(cls) + 1] - cls.IE_NAME, cls.ie_key = f'{super_class.IE_NAME}+{plugin_name}', super_class.ie_key + cls.PLUGIN_NAME, cls.ie_key = plugin_name, super_class.ie_key + cls.IE_NAME = f'{super_class.IE_NAME}+{plugin_name}' while getattr(super_class, '__wrapped__', None): super_class = super_class.__wrapped__ setattr(sys.modules[super_class.__module__], super_class.__name__, cls) + _PLUGIN_OVERRIDES[super_class].append(cls) return super().__init_subclass__(**kwargs) @@ -3770,3 +3776,6 @@ class UnsupportedURLIE(InfoExtractor): def _real_extract(self, url): raise UnsupportedError(url) + + +_PLUGIN_OVERRIDES = collections.defaultdict(list) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index beda02917..baa69d242 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -24,3 +24,5 @@ if not _LAZY_LOADER: globals().update(_PLUGIN_CLASSES) _ALL_CLASSES[:0] = _PLUGIN_CLASSES.values() + +from .common import _PLUGIN_OVERRIDES # noqa: F401 diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index dccca1004..0da01aa53 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -23,11 +23,12 @@ class TestURLIE(InfoExtractor): if len(matching_extractors) == 0: raise ExtractorError(f'No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: - try: # Check for exact match - extractor = next( - ie for ie in matching_extractors - if ie.IE_NAME.lower() == extractor_id.lower()) - except StopIteration: + extractor = next(( # Check for exact match + ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower() + ), None) or next(( # Check for exact match without plugin suffix + ie for ie in matching_extractors if ie.IE_NAME.split('+')[0].lower() == extractor_id.lower() + ), None) + if not extractor: raise ExtractorError( 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), expected=True) -- cgit v1.2.3 From b23b503e22ff577d23920e877ee73da478bb4c6f Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 05:44:54 +0000 Subject: [extractor/odnoklassniki] Extract subtitles (#5920) Closes #5744 Authored by: bashonly --- yt_dlp/extractor/odnoklassniki.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 4f325f087..4b73eed37 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -11,6 +11,7 @@ from ..utils import ( int_or_none, qualities, smuggle_url, + traverse_obj, unescapeHTML, unified_strdate, unsmuggle_url, @@ -153,6 +154,26 @@ class OdnoklassnikiIE(InfoExtractor): 'title': 'Быковское крещение', 'duration': 3038.181, }, + 'skip': 'HTTP Error 400', + }, { + 'note': 'subtitles', + 'url': 'https://ok.ru/video/4249587550747', + 'info_dict': { + 'id': '4249587550747', + 'ext': 'mp4', + 'title': 'Small Country An African Childhood (2020) (1080p) +subtitle', + 'uploader': 'Sunflower Movies', + 'uploader_id': '595802161179', + 'upload_date': '20220816', + 'duration': 6728, + 'age_limit': 0, + 'thumbnail': r're:^https?://i\.mycdn\.me/videoPreview\?.+', + 'like_count': int, + 'subtitles': dict, + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -202,6 +223,7 @@ class OdnoklassnikiIE(InfoExtractor): 'like_count': 0, 'duration': 10444, }, + 'skip': 'Site no longer embeds', }] @classmethod @@ -294,6 +316,16 @@ class OdnoklassnikiIE(InfoExtractor): like_count = int_or_none(metadata.get('likeCount')) + subtitles = {} + for sub in traverse_obj(metadata, ('movie', 'subtitleTracks', ...), expected_type=dict): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles.setdefault(sub.get('language') or 'en', []).append({ + 'url': sub_url, + 'ext': 'vtt', + }) + info = { 'id': video_id, 'title': title, @@ -305,6 +337,7 @@ class OdnoklassnikiIE(InfoExtractor): 'like_count': like_count, 'age_limit': age_limit, 'start_time': start_time, + 'subtitles': subtitles, } # pladform -- cgit v1.2.3 From 13f930abc0c91d8e50336488e4c55defe97aa588 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 05:46:06 +0000 Subject: [extractor/fifa] Fix Preplay extraction (#5921) Closes #5839 Authored by: dirkf --- yt_dlp/extractor/fifa.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index dc00edcb3..8b4db3a8a 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -17,8 +17,10 @@ class FifaIE(InfoExtractor): 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', 'ext': 'mp4', 'categories': ['FIFA Tournaments'], - 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'thumbnail': 'https://digitalhub.fifa.com/transform/135e2656-3a51-407b-8810-6c34bec5b59b/FMR_2006_Italy_France_Final_Hero', 'duration': 8165, + 'release_timestamp': 1152403200, + 'release_date': '20060709', }, 'params': {'skip_download': 'm3u8'}, }, { @@ -54,7 +56,7 @@ class FifaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) preconnect_link = self._search_regex( - r']+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + r']+\brel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') video_details = self._download_json( f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) @@ -62,22 +64,9 @@ class FifaIE(InfoExtractor): preplay_parameters = self._download_json( f'{preconnect_link}/videoPlayerData/{video_id}', video_id, 'Downloading Preplay Parameters')['preplayParameters'] - cid = preplay_parameters['contentId'] content_data = self._download_json( - f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ - 'v': preplay_parameters['preplayAPIVersion'], - 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], - 'rn': preplay_parameters['randomNumber'], - 'exp': preplay_parameters['tokenExpirationDate'], - 'ct': preplay_parameters['contentType'], - 'cid': cid, - 'mbtracks': preplay_parameters['tracksAssetNumber'], - 'ad': preplay_parameters['adConfiguration'], - 'ad.preroll': int(preplay_parameters['adPreroll']), - 'ad.cmsid': preplay_parameters['adCMSSourceId'], - 'ad.vid': preplay_parameters['adSourceVideoID'], - 'sig': preplay_parameters['signature'], - }) + 'https://content.uplynk.com/preplay/{contentId}/multiple.json?{queryStr}&sig={signature}'.format(**preplay_parameters), + video_id, 'Downloading Content Data') formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) -- cgit v1.2.3 From d7f98714696a4c9691ed28fb9b63395b9227646a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 05:50:37 +0000 Subject: [extractor/iqiyi] Fix `Iq` JS regex (#5922) Closes #5702 Authored by: bashonly --- yt_dlp/extractor/iqiyi.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index dbc688fb9..eba89f787 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -527,11 +527,14 @@ class IqIE(InfoExtractor): webpack_js_url = self._proto_relative_url(self._search_regex( r').*?\.setup\s*\((?P[^)]+)\)', + r'''(?s)jwplayer\s*\(\s*(?P'|")(?!(?P=q)).+(?P=q)\s*\)(?!).*?\.\s*setup\s*\(\s*(?P(?:\([^)]*\)|[^)])+)\s*\)''', webpage) if mobj: try: @@ -3237,19 +3243,20 @@ class InfoExtractor: def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - # JWPlayer backward compatibility: flattened playlists - # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 - if 'playlist' not in jwplayer_data: - jwplayer_data = {'playlist': [jwplayer_data]} - entries = [] + if not isinstance(jwplayer_data, dict): + return entries - # JWPlayer backward compatibility: single playlist item + playlist_items = jwplayer_data.get('playlist') + # JWPlayer backward compatibility: single playlist item/flattened playlists # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10 - if not isinstance(jwplayer_data['playlist'], list): - jwplayer_data['playlist'] = [jwplayer_data['playlist']] + # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 + if not isinstance(playlist_items, list): + playlist_items = (playlist_items or jwplayer_data, ) - for video_data in jwplayer_data['playlist']: + for video_data in playlist_items: + if not isinstance(video_data, dict): + continue # JWPlayer backward compatibility: flattened sources # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 if 'sources' not in video_data: @@ -3287,6 +3294,13 @@ class InfoExtractor: 'timestamp': int_or_none(video_data.get('pubdate')), 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')), 'subtitles': subtitles, + 'alt_title': clean_html(video_data.get('subtitle')), # attributes used e.g. by Tele5 ... + 'genre': clean_html(video_data.get('genre')), + 'channel': clean_html(dict_get(video_data, ('category', 'channel'))), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'release_year': int_or_none(video_data.get('releasedate')), + 'age_limit': int_or_none(video_data.get('age_restriction')), } # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32 if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']): @@ -3304,7 +3318,7 @@ class InfoExtractor: def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None, m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None): - urls = [] + urls = set() formats = [] for source in jwplayer_sources_data: if not isinstance(source, dict): @@ -3313,14 +3327,14 @@ class InfoExtractor: base_url, self._proto_relative_url(source.get('file'))) if not source_url or source_url in urls: continue - urls.append(source_url) + urls.add(source_url) source_type = source.get('type') or '' ext = mimetype2ext(source_type) or determine_ext(source_url) - if source_type == 'hls' or ext == 'm3u8': + if source_type == 'hls' or ext == 'm3u8' or 'format=m3u8-aapl' in source_url: formats.extend(self._extract_m3u8_formats( source_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=m3u8_id, fatal=False)) - elif source_type == 'dash' or ext == 'mpd': + elif source_type == 'dash' or ext == 'mpd' or 'format=mpd-time-csf' in source_url: formats.extend(self._extract_mpd_formats( source_url, video_id, mpd_id=mpd_id, fatal=False)) elif ext == 'smil': @@ -3335,13 +3349,12 @@ class InfoExtractor: 'ext': ext, }) else: + format_id = str_or_none(source.get('label')) height = int_or_none(source.get('height')) - if height is None: + if height is None and format_id: # Often no height is provided but there is a label in # format like "1080p", "720p SD", or 1080. - height = int_or_none(self._search_regex( - r'^(\d{3,4})[pP]?(?:\b|$)', str(source.get('label') or ''), - 'height', default=None)) + height = parse_resolution(format_id).get('height') a_format = { 'url': source_url, 'width': int_or_none(source.get('width')), @@ -3349,6 +3362,7 @@ class InfoExtractor: 'tbr': int_or_none(source.get('bitrate'), scale=1000), 'filesize': int_or_none(source.get('filesize')), 'ext': ext, + 'format_id': format_id } if source_url.startswith('rtmp'): a_format['ext'] = 'flv' diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ffc279023..14d492f07 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -32,6 +32,7 @@ from ..utils import ( unified_timestamp, unsmuggle_url, url_or_none, + urljoin, variadic, xpath_attr, xpath_text, @@ -1867,11 +1868,13 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'description': 'Kelis - 4th Of July', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, + 'expected_warnings': ['Untested major version'], }, { # KVS Player 'url': 'https://www.kvs-demo.com/embed/105/', @@ -1880,35 +1883,12 @@ class GenericIE(InfoExtractor): 'display_id': 'kelis-4th-of-july', 'ext': 'mp4', 'title': 'Kelis - 4th Of July / Embed Player', - 'thumbnail': 'https://kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', + 'thumbnail': r're:https://(?:www\.)?kvs-demo.com/contents/videos_screenshots/0/105/preview.jpg', }, 'params': { 'skip_download': True, }, }, { - # KVS Player - 'url': 'https://thisvid.com/videos/french-boy-pantsed/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player - 'url': 'https://thisvid.com/embed/2400174/', - 'md5': '3397979512c682f6b85b3b04989df224', - 'info_dict': { - 'id': '2400174', - 'display_id': 'french-boy-pantsed', - 'ext': 'mp4', - 'title': 'French Boy Pantsed - ThisVid.com', - 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg', - } - }, { - # KVS Player 'url': 'https://youix.com/video/leningrad-zoj/', 'md5': '94f96ba95706dc3880812b27b7d8a2b8', 'info_dict': { @@ -1916,8 +1896,8 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Клип: Ленинград - ЗОЖ скачать, смотреть онлайн | Youix.com', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://youix.com/embed/18485', @@ -1927,19 +1907,20 @@ class GenericIE(InfoExtractor): 'display_id': 'leningrad-zoj', 'ext': 'mp4', 'title': 'Ленинград - ЗОЖ', - 'thumbnail': 'https://youix.com/contents/videos_screenshots/18000/18485/preview_480x320_youix_com.mp4.jpg', - } + 'thumbnail': r're:https://youix.com/contents/videos_screenshots/18000/18485/preview(?:_480x320_youix_com.mp4)?\.jpg', + }, }, { # KVS Player 'url': 'https://bogmedia.org/videos/21217/40-nochey-40-nights-2016/', 'md5': '94166bdb26b4cb1fb9214319a629fc51', 'info_dict': { 'id': '21217', - 'display_id': '40-nochey-40-nights-2016', + 'display_id': '40-nochey-2016', 'ext': 'mp4', 'title': '40 ночей (2016) - BogMedia.org', + 'description': 'md5:4e6d7d622636eb7948275432eb256dc3', 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', - } + }, }, { # KVS Player (for sites that serve kt_player.js via non-https urls) @@ -1949,9 +1930,9 @@ class GenericIE(InfoExtractor): 'id': '389508', 'display_id': 'syren-de-mer-onlyfans-05-07-2020have-a-happy-safe-holiday5f014e68a220979bdb8cd-source', 'ext': 'mp4', - 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', - 'thumbnail': 'http://www.camhub.world/contents/videos_screenshots/389000/389508/preview.mp4.jpg', - } + 'title': 'Syren De Mer onlyfans_05-07-2020Have_a_happy_safe_holiday5f014e68a220979bdb8cd_source / Embed плеер', + 'thumbnail': r're:https?://www\.camhub\.world/contents/videos_screenshots/389000/389508/preview\.mp4\.jpg', + }, }, { # Reddit-hosted video that will redirect and be processed by RedditIE @@ -2169,7 +2150,20 @@ class GenericIE(InfoExtractor): 'direct': True, 'age_limit': 0, } - } + }, + { + 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', + 'md5': 'e2f0a4c329f7986280b7328e24036d60', + 'info_dict': { + 'id': '284002', + 'display_id': 'just-out-of-the-shower-joi', + 'ext': 'mp4', + 'title': 'Just Out Of The Shower JOI - Shooshtime', + 'thumbnail': 'https://i.shoosh.co/contents/videos_screenshots/284000/284002/preview.mp4.jpg', + 'height': 720, + 'age_limit': 18, + }, + }, ] def report_following_redirect(self, new_url): @@ -2235,43 +2229,87 @@ class GenericIE(InfoExtractor): 'entries': entries, } - def _kvs_getrealurl(self, video_url, license_code): + @classmethod + def _kvs_get_real_url(cls, video_url, license_code): if not video_url.startswith('function/0/'): return video_url # not obfuscated - url_path, _, url_query = video_url.partition('?') - urlparts = url_path.split('/')[2:] - license = self._kvs_getlicensetoken(license_code) - newmagic = urlparts[5][:32] + parsed = urllib.parse.urlparse(video_url[len('function/0/'):]) + license = cls._kvs_get_license_token(license_code) + urlparts = parsed.path.split('/') - for o in range(len(newmagic) - 1, -1, -1): - new = '' - l = (o + sum(int(n) for n in license[o:])) % 32 + HASH_LENGTH = 32 + hash = urlparts[3][:HASH_LENGTH] + indices = list(range(HASH_LENGTH)) - for i in range(0, len(newmagic)): - if i == o: - new += newmagic[l] - elif i == l: - new += newmagic[o] - else: - new += newmagic[i] - newmagic = new + # Swap indices of hash according to the destination calculated from the license token + accum = 0 + for src in reversed(range(HASH_LENGTH)): + accum += license[src] + dest = (src + accum) % HASH_LENGTH + indices[src], indices[dest] = indices[dest], indices[src] + + urlparts[3] = ''.join(hash[index] for index in indices) + urlparts[3][HASH_LENGTH:] + return urllib.parse.urlunparse(parsed._replace(path='/'.join(urlparts))) - urlparts[5] = newmagic + urlparts[5][32:] - return '/'.join(urlparts) + '?' + url_query + @staticmethod + def _kvs_get_license_token(license): + license = license.replace('$', '') + license_values = [int(char) for char in license] - def _kvs_getlicensetoken(self, license): - modlicense = license.replace('$', '').replace('0', '1') - center = int(len(modlicense) / 2) + modlicense = license.replace('0', '1') + center = len(modlicense) // 2 fronthalf = int(modlicense[:center + 1]) backhalf = int(modlicense[center:]) + modlicense = str(4 * abs(fronthalf - backhalf))[:center + 1] + + return [ + (license_values[index + offset] + current) % 10 + for index, current in enumerate(map(int, modlicense)) + for offset in range(4) + ] + + def _extract_kvs(self, url, webpage, video_id): + flashvars = self._search_json( + r'(?s:]*>.*?var\s+flashvars\s*=)', + webpage, 'flashvars', video_id, transform_source=js_to_json) + + # extract the part after the last / as the display_id from the + # canonical URL. + display_id = self._search_regex( + r'(?:' + r'|)', + webpage, 'display_id', fatal=False) + title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') + + thumbnail = flashvars['preview_url'] + if thumbnail.startswith('//'): + protocol, _, _ = url.partition('/') + thumbnail = protocol + thumbnail + + url_keys = list(filter(re.compile(r'^video_(?:url|alt_url\d*)$').match, flashvars.keys())) + formats = [] + for key in url_keys: + if '/get_file/' not in flashvars[key]: + continue + format_id = flashvars.get(f'{key}_text', key) + formats.append({ + 'url': urljoin(url, self._kvs_get_real_url(flashvars[key], flashvars['license_code'])), + 'format_id': format_id, + 'ext': 'mp4', + **(parse_resolution(format_id) or parse_resolution(flashvars[key])), + 'http_headers': {'Referer': url}, + }) + if not formats[-1].get('height'): + formats[-1]['quality'] = 1 - modlicense = str(4 * abs(fronthalf - backhalf)) - retval = '' - for o in range(0, center + 1): - for i in range(1, 5): - retval += str((int(license[o + i]) + int(modlicense[o])) % 10) - return retval + return { + 'id': flashvars['video_id'], + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } def _real_extract(self, url): if url.startswith('//'): @@ -2580,6 +2618,17 @@ class GenericIE(InfoExtractor): self.report_detected('video.js embed') return [{'formats': formats, 'subtitles': subtitles}] + # Look for generic KVS player (before json-ld bc of some urls that break otherwise) + found = self._search_regex(( + r']+?\bsrc\s*=\s*(["\'])https?://(?:\S+?/)+kt_player\.js\?v=(?P\d+(?:\.\d+)+)\1[^>]*>', + r'kt_player\s*\(\s*(["\'])(?:(?!\1)[\w\W])+\1\s*,\s*(["\'])https?://(?:\S+?/)+kt_player\.swf\?v=(?P\d+(?:\.\d+)+)\2\s*,', + ), webpage, 'KVS player', group='ver', default=False) + if found: + self.report_detected('KWS Player') + if found.split('.')[0] not in ('4', '5', '6'): + self.report_warning(f'Untested major version ({found}) in player engine - download may fail.') + return [self._extract_kvs(url, webpage, video_id)] + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url') not in (url, None): @@ -2622,52 +2671,6 @@ class GenericIE(InfoExtractor): ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage)) if found: self.report_detected('JW Player embed') - if not found: - # Look for generic KVS player - found = re.search(r'', webpage) - flashvars = self._parse_json(flashvars.group(1), video_id, transform_source=js_to_json) - - # extract the part after the last / as the display_id from the - # canonical URL. - display_id = self._search_regex( - r'(?:' - r'|)', - webpage, 'display_id', fatal=False - ) - title = self._html_search_regex(r'<(?:h1|title)>(?:Video: )?(.+?)', webpage, 'title') - - thumbnail = flashvars['preview_url'] - if thumbnail.startswith('//'): - protocol, _, _ = url.partition('/') - thumbnail = protocol + thumbnail - - url_keys = list(filter(re.compile(r'video_url|video_alt_url\d*').fullmatch, flashvars.keys())) - formats = [] - for key in url_keys: - if '/get_file/' not in flashvars[key]: - continue - format_id = flashvars.get(f'{key}_text', key) - formats.append({ - 'url': self._kvs_getrealurl(flashvars[key], flashvars['license_code']), - 'format_id': format_id, - 'ext': 'mp4', - **(parse_resolution(format_id) or parse_resolution(flashvars[key])) - }) - if not formats[-1].get('height'): - formats[-1]['quality'] = 1 - - return [{ - 'id': flashvars['video_id'], - 'display_id': display_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - }] if not found: # Broaden the search a little bit found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)) diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index 2d9b9a742..d1fc058b9 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -1,71 +1,128 @@ +import re + from .common import InfoExtractor +from ..utils import ( + ExtractorError, + get_element_by_class, + int_or_none, + merge_dicts, + url_or_none, +) + + +class PeekVidsBaseIE(InfoExtractor): + def _real_extract(self, url): + domain, video_id = self._match_valid_url(url).group('domain', 'id') + webpage = self._download_webpage(url, video_id, expected_status=429) + if '>Rate Limit Exceeded' in webpage: + raise ExtractorError( + f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}', + video_id=video_id, expected=True) + + title = self._html_search_regex(r'(?s)]*>(.+?)

', webpage, 'title') + + display_id = video_id + video_id = self._search_regex(r'(?s)]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID') + srcs = self._download_json( + f'https://www.{domain}/v-alt/{video_id}', video_id, + note='Downloading list of source files') + + formats = [] + for k, v in srcs.items(): + f_url = url_or_none(v) + if not f_url: + continue + + height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None) + if not height: + continue + + formats.append({ + 'url': f_url, + 'format_id': height, + 'height': int_or_none(height), + }) + + if not formats: + formats = [{'url': url} for url in srcs.values()] + info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) + info.pop('url', None) -class PeekVidsIE(InfoExtractor): + # may not have found the thumbnail if it was in a list in the ld+json + info.setdefault('thumbnail', self._og_search_thumbnail(webpage)) + detail = (get_element_by_class('detail-video-block', webpage) + or get_element_by_class('detail-block', webpage) or '') + info['description'] = self._html_search_regex( + rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|]*>\s*{re.escape(name)}\s*:\s*(.+?)', + html, name, default='') + return list(filter(None, re.split(r'\s+', l))) + + return merge_dicts({ + 'id': video_id, + 'display_id': display_id, + 'age_limit': 18, + 'formats': formats, + 'categories': cat_tags('Categories', detail), + 'tags': cat_tags('Tags', detail), + 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None), + }, info) + + +class PeekVidsIE(PeekVidsBaseIE): _VALID_URL = r'''(?x) - https?://(?:www\.)?peekvids\.com/ + https?://(?:www\.)?(?Ppeekvids\.com)/ (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) (?P[^/?&#]*) ''' _TESTS = [{ 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', - 'md5': 'a00940646c428e232407e3e62f0e8ef5', + 'md5': '2ff6a357a9717dc9dc9894b51307e9a2', 'info_dict': { - 'id': 'BSyLMbN0YCd', - 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', + 'id': '1262717', + 'display_id': 'BSyLMbN0YCd', + 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg$', - 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com', + 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', 'timestamp': 1642579329, 'upload_date': '20220119', 'duration': 416, 'view_count': int, 'age_limit': 18, + 'uploader': 'SEXYhub.com', + 'categories': list, + 'tags': list, }, }] - _DOMAIN = 'www.peekvids.com' - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - short_video_id = self._html_search_regex(r'

', webpage, 'title') + + lecture_urls = [] + for lecture_url in re.findall(r'(?i)href="/w/(.+)(?[0-9]+) - (?:/(?P[\da-f]{10}))? - /?(?:[?&].*)?(?:[#].*)?$ - ''' + https?:// + (?: + (?: + www| + player + ) + \. + )? + vimeo\.com/ + (?: + (?Puser)| + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?:.*?/)?? + (?P + (?: + play_redirect_hls| + moogaloop\.swf)\?clip_id= + )? + (?:videos?/)? + ) + (?P[0-9]+) + (?(u) + /(?!videos|likes)[^/?#]+/?| + (?(q)|/(?P[\da-f]{10}))? + ) + (?:(?(q)[&]|(?(u)|/?)[?]).*?)?(?:[#].*)?$ + ''' IE_NAME = 'vimeo' _EMBED_REGEX = [ # iframe @@ -705,7 +711,12 @@ class VimeoIE(VimeoBaseInfoExtractor): 'params': { 'skip_download': True, }, - } + }, + { + # user playlist alias -> https://vimeo.com/258705797 + 'url': 'https://vimeo.com/user26785108/newspiritualguide', + 'only_matching': True, + }, # https://gettingthingsdone.com/workflowmap/ # vimeo embed with check-password page protected by Referer header ] diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 59eececb6..7af6c8f03 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -21,7 +21,7 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com)' + _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com|xhday\.com|xhvid\.com)' _VALID_URL = r'''(?x) https?:// (?:.+?\.)?%s/ @@ -120,6 +120,9 @@ class XHamsterIE(InfoExtractor): }, { 'url': 'https://xhday.com/videos/strapless-threesome-xhh7yVf', 'only_matching': True, + }, { + 'url': 'https://xhvid.com/videos/lk-mm-xhc6wn6', + 'only_matching': True, }] def _real_extract(self, url): @@ -422,6 +425,9 @@ class XHamsterUserIE(InfoExtractor): }, { 'url': 'https://xhday.com/users/mobhunter', 'only_matching': True, + }, { + 'url': 'https://xhvid.com/users/pelushe21', + 'only_matching': True, }] def _entries(self, user_id): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9eb9495a0..994239897 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3149,14 +3149,28 @@ def urlencode_postdata(*args, **kargs): return urllib.parse.urlencode(*args, **kargs).encode('ascii') +def update_url(url, *, query_update=None, **kwargs): + """Replace URL components specified by kwargs + @param url str or parse url tuple + @param query_update update query + @returns str + """ + if isinstance(url, str): + if not kwargs and not query_update: + return url + else: + url = urllib.parse.urlparse(url) + if query_update: + assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time' + kwargs['query'] = urllib.parse.urlencode({ + **urllib.parse.parse_qs(url.query), + **query_update + }, True) + return urllib.parse.urlunparse(url._replace(**kwargs)) + + def update_url_query(url, query): - if not query: - return url - parsed_url = urllib.parse.urlparse(url) - qs = urllib.parse.parse_qs(parsed_url.query) - qs.update(query) - return urllib.parse.urlunparse(parsed_url._replace( - query=urllib.parse.urlencode(qs, True))) + return update_url(url, query_update=query) def update_Request(req, url=None, data=None, headers=None, query=None): -- cgit v1.2.3 From a0a7c0154252900b7b154898744b698624d92b2a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Feb 2023 17:22:03 +0530 Subject: Release 2023.02.17 --- CONTRIBUTORS | 24 ++++++ Changelog.md | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Collaborators.md | 8 ++ supportedsites.md | 33 +++++-- 4 files changed, 312 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index e3b95e2f3..10fb5775b 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -381,3 +381,27 @@ gschizas JC-Chung mzhou OndrejBakan +ab4cbef +aionescu +amra +ByteDream +carusocr +chexxor +felixonmars +FrankZ85 +FriedrichRehren +gregsadetsky +LeoniePhiline +LowSuggestion912 +Matumo +OIRNOIR +OMEGARAZER +oxamun +pmitchell86 +qbnu +qulaz +rebane2001 +road-master +rohieb +sdht0 +seproDev diff --git a/Changelog.md b/Changelog.md index e4cc7fd30..36856e016 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,6 +10,259 @@ * Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master --> +# 2023.02.17 + +* Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) +* Fix `--concat-playlist` +* Imply `--no-progress` when `--print` +* Improve default subtitle language selection by [sdht0](https://github.com/sdht0) +* Make `title` completely non-fatal +* Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan) +* Support module level `__bool__` and `property` +* [dependencies] Standardize `Cryptodome` imports +* [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr) +* [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912) +* [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly) +* [jsinterp] Support `if` statements +* [plugins] Fix zip search paths +* [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K) +* [utils] `traverse_obj`: Fix more bugs +* [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K) +* [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars) +* [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) +* [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu) +* [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K) +* [compat_utils] Improve `passthrough_module` +* [compat_utils] Simplify `EnhancedModule` +* [build] Update pyinstaller +* [pyinst] Fix for pyinstaller 5.8 +* [devscripts] Provide `pyinstaller` hooks +* [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome` +* [cleanup] Misc fixes and cleanup +* [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246) +* [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) +* [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +* [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra) +* [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) +* [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) +* [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) +* **[extractor/youtube] Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) +* [extractor/youtube] Handle `consent.youtube` +* [extractor/youtube] Support `/live/` URL +* [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb) +* [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86) +* [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore) +* [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr) +* [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz) +* [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream) +* [extractor/crunchyroll] Better message for premium videos +* [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K) +* [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299) +* [extractor/embedly] Embedded links may be for other extractors +* [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001) +* [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj) +* [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor) +* [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars) +* [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa) +* [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly) +* [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly) +* [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger) +* [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly) +* [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +* [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR) +* [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev) +* [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +* [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246) +* [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master) +* [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +* [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER) +* [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan) +* [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +* [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly) +* [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren) +* [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly) +* [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +* [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai) +* [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K) +* [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai) +* [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly) +* [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev) +* [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga) +* [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev) +* [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr) +* [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev) +* [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) + + +### 2023.02.17 + +#### Core changes +### Core changes +- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/9ebac35577e61c3d25fafc959655fa3ab04ca7ef) by [pukkandan](https://github.com/pukkandan) +- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/c154302c588c3d4362cec4fc5545e7e5d2bcf7a3) by [pukkandan](https://github.com/pukkandan) +- [Fix `--concat-playlist`](https://github.com/yt-dlp/yt-dlp/commit/59d7de0da545944c48a82fc2937b996d7cd8cc9c) by [pukkandan](https://github.com/pukkandan) +- [Imply `--no-progress` when `--print`](https://github.com/yt-dlp/yt-dlp/commit/5712943b764ba819ef479524c32700228603817a) by [pukkandan](https://github.com/pukkandan) +- [Improve default subtitle language selection](https://github.com/yt-dlp/yt-dlp/commit/376aa24b1541e2bfb23337c0ae9bafa5bb3787f1) ([#6240](https://github.com/yt-dlp/yt-dlp/issues/6240)) by [sdht0](https://github.com/sdht0) +- [Make `title` completely non-fatal](https://github.com/yt-dlp/yt-dlp/commit/7aefd19afed357c80743405ec2ace2148cba42e3) by [pukkandan](https://github.com/pukkandan) +- [Sanitize formats before sorting](https://github.com/yt-dlp/yt-dlp/commit/39f32f1715c0dffb7626dda7307db6388bb7abaa) by [pukkandan](https://github.com/pukkandan) +- [Support module level `__bool__` and `property`](https://github.com/yt-dlp/yt-dlp/commit/754c84e2e416cf6609dd0e4632b4985a08d34043) by [pukkandan](https://github.com/pukkandan) +- [Update to ytdl-commit-2dd6c6e](https://github.com/yt-dlp/yt-dlp/commit/48fde8ac4ccbaaea868f6378814dde395f649fbf) by [pukkandan](https://github.com/pukkandan) +- [extractor/douyutv]: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/f14c2333481c63c24017a41ded7d8f36726504b7) ([#6074](https://github.com/yt-dlp/yt-dlp/issues/6074)) by [hatienl0i261299](https://github.com/hatienl0i261299) +- compat_utils + - [Improve `passthrough_module`](https://github.com/yt-dlp/yt-dlp/commit/88426d9446758c707fb511408f2d6f56de952db4) by [pukkandan](https://github.com/pukkandan) + - [Simplify `EnhancedModule`](https://github.com/yt-dlp/yt-dlp/commit/768a00178109508893488e53a0e720b117fbccf6) by [pukkandan](https://github.com/pukkandan) +- dependencies + - [Standardize `Cryptodome` imports](https://github.com/yt-dlp/yt-dlp/commit/f6a765ceb59c55aea06921880c1c87d1ff36e5de) by [pukkandan](https://github.com/pukkandan) +- jsinterp + - [Support `if` statements](https://github.com/yt-dlp/yt-dlp/commit/8b008d62544b82e24a0ba36c30e8e51855d93419) by [pukkandan](https://github.com/pukkandan) +- plugins + - [Fix zip search paths](https://github.com/yt-dlp/yt-dlp/commit/88d8928bf7630801865cf8728ae5c77234324b7b) by [pukkandan](https://github.com/pukkandan) +- utils + - [Don't use Content-length with encoding](https://github.com/yt-dlp/yt-dlp/commit/65e5c021e7c5f23ecbc6a982b72a02ac6cd6900d) ([#6176](https://github.com/yt-dlp/yt-dlp/issues/6176)) by [felixonmars](https://github.com/felixonmars) + - [Fix `time_seconds` to use the provided TZ](https://github.com/yt-dlp/yt-dlp/commit/83c4970e52839ce8761ec61bd19d549aed7d7920) ([#6118](https://github.com/yt-dlp/yt-dlp/issues/6118)) by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) + - [Fix race condition in `make_dir`](https://github.com/yt-dlp/yt-dlp/commit/b25d6cb96337d479bdcb41768356da414c3aa835) ([#6089](https://github.com/yt-dlp/yt-dlp/issues/6089)) by [aionescu](https://github.com/aionescu) + - [Use local kernel32 for file locking on Windows](https://github.com/yt-dlp/yt-dlp/commit/37e325b92ff9d784715ac0e5d1f7d96bf5f45ad9) by [Grub4K](https://github.com/Grub4K) + - traverse_obj + - [Fix more bugs](https://github.com/yt-dlp/yt-dlp/commit/6839ae1f6dde4c0442619e351b3f0442312ab4f9) by [pukkandan](https://github.com/pukkandan) + - [Fix several behavioral problems](https://github.com/yt-dlp/yt-dlp/commit/b1bde57bef878478e3503ab07190fd207914ade9) by [Grub4K](https://github.com/Grub4K) + - [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/776995bc109c5cd1aa56b684fada2ce718a386ec) by [Grub4K](https://github.com/Grub4K) +### Extractor changes +- [Fix `_search_nuxt_data`](https://github.com/yt-dlp/yt-dlp/commit/b23167e7542c177f32b22b29857b637dc4aede69) ([#6062](https://github.com/yt-dlp/yt-dlp/issues/6062)) by [LowSuggestion912](https://github.com/LowSuggestion912) +- 91porn + - [Fix title and comment extraction](https://github.com/yt-dlp/yt-dlp/commit/c085cc2def9862ac8a7619ce8ea5dcc177325719) ([#5932](https://github.com/yt-dlp/yt-dlp/issues/5932)) by [pmitchell86](https://github.com/pmitchell86) +- abematv + - [Cache user token whenever appropriate](https://github.com/yt-dlp/yt-dlp/commit/a4f16832213d9e29beecf685d6cd09a2f0b48c87) ([#6216](https://github.com/yt-dlp/yt-dlp/issues/6216)) by [Lesmiscore](https://github.com/Lesmiscore) +- anchorfm + - [Add episode extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ad59ff2ded208bf33f6fe07299a3449eadccdc) ([#6092](https://github.com/yt-dlp/yt-dlp/issues/6092)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) +- bfmtv + - [Support `rmc` prefix](https://github.com/yt-dlp/yt-dlp/commit/20266508dd6247dd3cf0e97b9b9f14c3afc046db) ([#6025](https://github.com/yt-dlp/yt-dlp/issues/6025)) by [carusocr](https://github.com/carusocr) +- biliintl + - [Add intro and ending chapters](https://github.com/yt-dlp/yt-dlp/commit/0ba87dd279d3565ed93c559cf7880ad61eb83af8) ([#6018](https://github.com/yt-dlp/yt-dlp/issues/6018)) by [HobbyistDev](https://github.com/HobbyistDev) +- boxcast + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/9acca71237f42a4775008e51fe26e42f0a39c552) ([#5983](https://github.com/yt-dlp/yt-dlp/issues/5983)) by [HobbyistDev](https://github.com/HobbyistDev) +- clyp + - [Support `wav`](https://github.com/yt-dlp/yt-dlp/commit/cc13293c2819b5461be211a9729fd02bb1e2f476) ([#6102](https://github.com/yt-dlp/yt-dlp/issues/6102)) by [qulaz](https://github.com/qulaz) +- crunchyroll + - [Add intro chapter](https://github.com/yt-dlp/yt-dlp/commit/93abb7406b95793f6872d12979b91d5f336b4f43) ([#6023](https://github.com/yt-dlp/yt-dlp/issues/6023)) by [ByteDream](https://github.com/ByteDream) + - [Better message for premium videos](https://github.com/yt-dlp/yt-dlp/commit/44699d10dc8de9c6a338f4a8e5c63506ec4d2118) by [pukkandan](https://github.com/pukkandan) + - [Fix incorrect premium-only error](https://github.com/yt-dlp/yt-dlp/commit/c9d14bd22ab31e2a41f9f8061843668a06db583b) by [Grub4K](https://github.com/Grub4K) +- drtv + - [Fix bug in ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/7481998b169b2a52049fc33bff82034d6563ead4) ([#6034](https://github.com/yt-dlp/yt-dlp/issues/6034)) by [bashonly](https://github.com/bashonly) +- ebay + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/da880559a6ecbbf374cc9f3378e696b55b9599af) ([#6170](https://github.com/yt-dlp/yt-dlp/issues/6170)) by [JChris246](https://github.com/JChris246) +- embedly + - [Embedded links may be for other extractors](https://github.com/yt-dlp/yt-dlp/commit/87ebab0615b1bf9b14b478b055e7059d630b4833) by [pukkandan](https://github.com/pukkandan) +- freesound + - [Workaround invalid URL in webpage](https://github.com/yt-dlp/yt-dlp/commit/9cfdbcbf3f17be51f5b6bb9bb6d880b2f3d67362) ([#6147](https://github.com/yt-dlp/yt-dlp/issues/6147)) by [rebane2001](https://github.com/rebane2001) +- generic + - [Avoid catastrophic backtracking in KVS regex](https://github.com/yt-dlp/yt-dlp/commit/8aa0bd5d10627ece3c1815c01d02fb8bf22847a7) by [bashonly](https://github.com/bashonly) +- goplay + - [Use new API](https://github.com/yt-dlp/yt-dlp/commit/d27bde98832e3b7ffb39f3cf6346011b97bb3bc3) ([#6151](https://github.com/yt-dlp/yt-dlp/issues/6151)) by [jeroenj](https://github.com/jeroenj) +- hidive + - [Fix subtitles and age-restriction](https://github.com/yt-dlp/yt-dlp/commit/7708df8da05c94270b43e0630e4e20f6d2d62c55) ([#5828](https://github.com/yt-dlp/yt-dlp/issues/5828)) by [chexxor](https://github.com/chexxor) +- huya + - [Support HD streams](https://github.com/yt-dlp/yt-dlp/commit/fbbb5508ea98ed8709847f5ecced7d70ff05e0ee) ([#6172](https://github.com/yt-dlp/yt-dlp/issues/6172)) by [felixonmars](https://github.com/felixonmars) +- hypergryph + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/31c279a2a2c2ef402a9e6dad9992b310d16439a6) ([#6094](https://github.com/yt-dlp/yt-dlp/issues/6094)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) +- moviepilot + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c62e64cf0122e52fa2175dd1b004ca6b8e1d82af) ([#5954](https://github.com/yt-dlp/yt-dlp/issues/5954)) by [panatexxa](https://github.com/panatexxa) +- nbc + - [Fix XML parsing](https://github.com/yt-dlp/yt-dlp/commit/176a068cde4f2d9dfa0336168caead0b1edcb8ac) by [bashonly](https://github.com/bashonly) + - [Fix `NBC` and `NBCStations` extractors](https://github.com/yt-dlp/yt-dlp/commit/cb73b8460c3ce6d37ab651a4e44bb23b10056154) ([#6033](https://github.com/yt-dlp/yt-dlp/issues/6033)) by [bashonly](https://github.com/bashonly) +- nebula + - [Remove broken cookie support](https://github.com/yt-dlp/yt-dlp/commit/d50ea3ce5abc3b0defc0e5d1e22b22ce9b01b07b) ([#5979](https://github.com/yt-dlp/yt-dlp/issues/5979)) by [hheimbuerger](https://github.com/hheimbuerger) +- nfl + - [Add `NFLPlus` extractors](https://github.com/yt-dlp/yt-dlp/commit/8b37c58f8b5494504acdb5ebe3f8bbd26230f725) ([#6222](https://github.com/yt-dlp/yt-dlp/issues/6222)) by [bashonly](https://github.com/bashonly) +- niconico + - [Add support for like history](https://github.com/yt-dlp/yt-dlp/commit/3b161265add30613bde2e46fca214fe94d09e651) ([#5705](https://github.com/yt-dlp/yt-dlp/issues/5705)) by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) +- nitter + - [Update instance list](https://github.com/yt-dlp/yt-dlp/commit/a9189510baadf0dccd2d4d363bc6f3a441128bb0) ([#6236](https://github.com/yt-dlp/yt-dlp/issues/6236)) by [OIRNOIR](https://github.com/OIRNOIR) +- npo + - [Fix extractor and add HD support](https://github.com/yt-dlp/yt-dlp/commit/cc2389c8ac72a514d4e002a0f6ca5a7d65c7eff0) ([#6155](https://github.com/yt-dlp/yt-dlp/issues/6155)) by [seproDev](https://github.com/seproDev) +- nzonscreen + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d3bb187f01e1e30db05e639fc23a2e1935d777fe) ([#6208](https://github.com/yt-dlp/yt-dlp/issues/6208)) by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) +- odkmedia + - [Add `OnDemandChinaEpisodeIE`](https://github.com/yt-dlp/yt-dlp/commit/10fd9e6ee833c88edf6c633f864f42843a708d32) ([#6116](https://github.com/yt-dlp/yt-dlp/issues/6116)) by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) +- pornez + - [Handle relative URLs in iframe](https://github.com/yt-dlp/yt-dlp/commit/f7efe6dc958eb0689cb9534ff0b4e592040be8df) ([#6171](https://github.com/yt-dlp/yt-dlp/issues/6171)) by [JChris246](https://github.com/JChris246) +- radiko + - [Fix format sorting for Time Free](https://github.com/yt-dlp/yt-dlp/commit/203a06f8554df6db07d8f20f465ecbfe8a14e591) ([#6159](https://github.com/yt-dlp/yt-dlp/issues/6159)) by [road-master](https://github.com/road-master) +- rcs + - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c6b657867ad68af6b930ed0aa11ec5d93ee187b7) ([#5700](https://github.com/yt-dlp/yt-dlp/issues/5700)) by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) +- reddit + - [Support user posts](https://github.com/yt-dlp/yt-dlp/commit/c77df98b1a477a020a57141464d10c0f4d0fdbc9) ([#6173](https://github.com/yt-dlp/yt-dlp/issues/6173)) by [OMEGARAZER](https://github.com/OMEGARAZER) +- rozhlas + - [Add extractor RozhlasVltavaIE](https://github.com/yt-dlp/yt-dlp/commit/355d781bed497cbcb254bf2a2737b83fa51c84ea) ([#5951](https://github.com/yt-dlp/yt-dlp/issues/5951)) by [amra](https://github.com/amra) +- rumble + - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/acacb57c7e173b93c6e0f0c43e61b9b2912719d8) by [pukkandan](https://github.com/pukkandan) +- servus + - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/f40e32fb1ac67be5bdbc8e32a3c235abfc4be260) ([#6036](https://github.com/yt-dlp/yt-dlp/issues/6036)) by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- slideslive + - [Fix slides and chapters/duration](https://github.com/yt-dlp/yt-dlp/commit/5ab3534d44231f7711398bc3cfc520e2efd09f50) ([#6024](https://github.com/yt-dlp/yt-dlp/issues/6024)) by [bashonly](https://github.com/bashonly) +- sportdeutschland + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5e1a54f63e393c218a40949012ff0de0ce63cb15) ([#6041](https://github.com/yt-dlp/yt-dlp/issues/6041)) by [FriedrichRehren](https://github.com/FriedrichRehren) +- stripchat + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d5f919bad07017f4b39b55725491b1e9717d47a) ([#5985](https://github.com/yt-dlp/yt-dlp/issues/5985)) by [bashonly](https://github.com/bashonly), [JChris246](https://github.com/JChris246) +- tempo + - [Add IVXPlayer extractor](https://github.com/yt-dlp/yt-dlp/commit/30031be974d210f451100339699ef03b0ddb5f10) ([#5837](https://github.com/yt-dlp/yt-dlp/issues/5837)) by [HobbyistDev](https://github.com/HobbyistDev) +- tnaflix + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/989f47b6315541989bb507f26b431d9586430995) ([#6086](https://github.com/yt-dlp/yt-dlp/issues/6086)) by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) +- tvp + - [Support `stream.tvp.pl`](https://github.com/yt-dlp/yt-dlp/commit/a31d0fa6c315b1145d682361149003d98f1e3782) ([#6139](https://github.com/yt-dlp/yt-dlp/issues/6139)) by [selfisekai](https://github.com/selfisekai) +- twitter + - [Fix `--no-playlist` and add media `view_count` when using GraphQL](https://github.com/yt-dlp/yt-dlp/commit/b6795fd310f1dd61dddc9fd08e52fe485bdc8a3e) ([#6211](https://github.com/yt-dlp/yt-dlp/issues/6211)) by [Grub4K](https://github.com/Grub4K) + - [Fix graphql extraction on some tweets](https://github.com/yt-dlp/yt-dlp/commit/7543c9c99bcb116b085fdb1f41b84a0ead04c05d) ([#6075](https://github.com/yt-dlp/yt-dlp/issues/6075)) by [selfisekai](https://github.com/selfisekai) +- txxx + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/389896df85ed14eaf74f72531da6c4491d6b73b0) ([#5240](https://github.com/yt-dlp/yt-dlp/issues/5240)) by [chio0hai](https://github.com/chio0hai) +- vimeo + - [Fix `playerConfig` extraction](https://github.com/yt-dlp/yt-dlp/commit/c0cd13fb1c71b842c3d272d0273c03542b467766) ([#6203](https://github.com/yt-dlp/yt-dlp/issues/6203)) by [bashonly](https://github.com/bashonly), [LeoniePhiline](https://github.com/LeoniePhiline) +- viu + - [Add `ViuOTTIndonesiaIE` extractor](https://github.com/yt-dlp/yt-dlp/commit/72671a212d7c939329cb5d34335fa089dd3acbd3) ([#6099](https://github.com/yt-dlp/yt-dlp/issues/6099)) by [HobbyistDev](https://github.com/HobbyistDev) +- vk + - [Fix playlists for new API](https://github.com/yt-dlp/yt-dlp/commit/a9c685453f7019bee94170f936619c6db76c964e) ([#6122](https://github.com/yt-dlp/yt-dlp/issues/6122)) by [the-marenga](https://github.com/the-marenga) +- vlive + - [Replace with `VLiveWebArchiveIE`](https://github.com/yt-dlp/yt-dlp/commit/b3eaab7ca2e118d4db73dcb44afd9c8717db8b67) ([#6196](https://github.com/yt-dlp/yt-dlp/issues/6196)) by [seproDev](https://github.com/seproDev) +- vocaroo + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e4a8b1769e19755acba6d8f212208359905a3159) ([#6117](https://github.com/yt-dlp/yt-dlp/issues/6117)) by [qbnu](https://github.com/qbnu), [SuperSonicHub1](https://github.com/SuperSonicHub1) +- wrestleuniverse + - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/e61acb40b2cb6ef45508d72235026d458c9d5dff) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- ximalaya + - [Update album `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/417cdaae08fc447c9d15c53a88e2e9a027cdbf0a) ([#6110](https://github.com/yt-dlp/yt-dlp/issues/6110)) by [carusocr](https://github.com/carusocr) +- yappy + - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/361630015535026712bdb67f804a15b65ff9ee7e) ([#6111](https://github.com/yt-dlp/yt-dlp/issues/6111)) by [HobbyistDev](https://github.com/HobbyistDev) +- youtube + - [Add hyperpipe instances](https://github.com/yt-dlp/yt-dlp/commit/78a78fa74dbc888d20f1b65e1382bf99131597d5) ([#6020](https://github.com/yt-dlp/yt-dlp/issues/6020)) by [Generator](https://github.com/Generator) + - [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/149eb0bbf34fa8fdf8d1e2aa28e17479d099e26b) by [bashonly](https://github.com/bashonly) + - [Handle `consent.youtube`](https://github.com/yt-dlp/yt-dlp/commit/b032ff0f032512bd6fc70c9c1994d906eacc06cb) by [pukkandan](https://github.com/pukkandan) + - [Support `/live/` URL](https://github.com/yt-dlp/yt-dlp/commit/dad2210c0cb9cf03702a9511817ee5ec646d7bc8) by [pukkandan](https://github.com/pukkandan) + - [Update invidious and piped instances](https://github.com/yt-dlp/yt-dlp/commit/05799a48c7dec12b34c8bf951c8d2eceedda59f8) ([#6030](https://github.com/yt-dlp/yt-dlp/issues/6030)) by [rohieb](https://github.com/rohieb) + - [`uploader_id` includes `@` with handle](https://github.com/yt-dlp/yt-dlp/commit/c61cf091a54d3aa3c611722035ccde5ecfe981bb) by [bashonly](https://github.com/bashonly) +- zdf + - [Use android API endpoint for UHD downloads](https://github.com/yt-dlp/yt-dlp/commit/0fe87a8730638490415d630f48e61d264d89c358) ([#6150](https://github.com/yt-dlp/yt-dlp/issues/6150)) by [seproDev](https://github.com/seproDev) +### Downloader changes +- hls + - [Allow extractors to provide AES key](https://github.com/yt-dlp/yt-dlp/commit/7e68567e508168b345266c0c19812ad50a829eaa) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +### Postprocessor changes +- extractaudio + - [Handle outtmpl without ext](https://github.com/yt-dlp/yt-dlp/commit/f737fb16d8234408c85bc189ccc926fea000515b) ([#6005](https://github.com/yt-dlp/yt-dlp/issues/6005)) by [carusocr](https://github.com/carusocr) +- pyinst + - [Fix for pyinstaller 5.8](https://github.com/yt-dlp/yt-dlp/commit/2e269bd998c61efaf7500907d114a56e5e83e65e) by [pukkandan](https://github.com/pukkandan) +### Misc. changes +- build + - [Update pyinstaller](https://github.com/yt-dlp/yt-dlp/commit/365b9006051ac7d735c20bb63c4907b758233048) by [pukkandan](https://github.com/pukkandan) +- cleanup + - Miscellaneous: [76c9c52](https://github.com/yt-dlp/yt-dlp/commit/76c9c523071150053df7b56956646b680b6a6e05) by [pukkandan](https://github.com/pukkandan) +- devscripts + - [Provide pyinstaller hooks](https://github.com/yt-dlp/yt-dlp/commit/acb1042a9ffa8769fe691beac1011d6da1fcf321) by [pukkandan](https://github.com/pukkandan) +- pyinstaller + - [Analyze sub-modules of `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/b85faf6ffb700058e774e99c04304a7a9257cdd0) by [pukkandan](https://github.com/pukkandan) ### 2023.01.06 diff --git a/Collaborators.md b/Collaborators.md index fe2a7f4b4..83dfbe389 100644 --- a/Collaborators.md +++ b/Collaborators.md @@ -59,3 +59,11 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho * `--cookies-from-browser` support for Firefox containers * Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc * Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc + + +## [Grub4K](https://github.com/Grub4K) + +[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) [![gh-sponsor](https://img.shields.io/badge/_-Github-red.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) + +* Rework internals like `traverse_obj`, various core refactors and bugs fixes +* Helped fix crunchyroll, Twitter, wrestleuniverse, wistia, slideslive etc diff --git a/supportedsites.md b/supportedsites.md index 5cef7ac90..b545ec540 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -63,14 +63,15 @@ - **AluraCourse**: [aluracourse] - **Amara** - **AmazonMiniTV** - - **amazonminitv:season**: Amazon MiniTV Series, "minitv:season:" prefix - - **amazonminitv:series** + - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix + - **amazonminitv:series**: Amazon MiniTV Series, "minitv:series:" prefix - **AmazonReviews** - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** - **AmHistoryChannel** + - **AnchorFMEpisode** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Angel** - **AnimalPlanet** @@ -177,6 +178,7 @@ - **BlackboardCollaborate** - **BleacherReport** - **BleacherReportCMS** + - **blerp** - **blogger.com** - **Bloomberg** - **BokeCC** @@ -184,6 +186,7 @@ - **BooyahClips** - **BostonGlobe** - **Box** + - **BoxCastVideo** - **Bpb**: Bundeszentrale für politische Bildung - **BR**: Bayerischer Rundfunk - **BravoTV** @@ -364,6 +367,7 @@ - **dw:article** - **EaglePlatform** - **EbaumsWorld** + - **Ebay** - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson @@ -595,6 +599,7 @@ - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV + - **IVXPlayer** - **Iwara** - **iwara:playlist** - **iwara:user** @@ -626,6 +631,7 @@ - **KickVOD** - **KinjaEmbed** - **KinoPoisk** + - **Kommunetv** - **KompasVideo** - **KonserthusetPlay** - **Koo** @@ -773,6 +779,7 @@ - **Mofosex** - **MofosexEmbed** - **Mojvideo** + - **MonsterSirenHypergryphMusic** - **Morningstar**: morningstar.com - **Motherless** - **MotherlessGroup** @@ -878,6 +885,8 @@ - **NFHSNetwork** - **nfl.com** - **nfl.com:article** + - **nfl.com:​plus:episode** + - **nfl.com:​plus:replay** - **NhkForSchoolBangumi** - **NhkForSchoolProgramList** - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) @@ -890,7 +899,7 @@ - **nickelodeonru** - **nicknight** - **niconico**: [niconico] ニコニコ動画 - - **niconico:history**: NicoNico user history. Requires cookies. + - **niconico:history**: NicoNico user history or likes. Requires cookies. - **niconico:playlist** - **niconico:series** - **niconico:tag**: NicoNico video tag URLs @@ -940,6 +949,7 @@ - **NYTimesArticle** - **NYTimesCooking** - **nzherald** + - **NZOnScreen** - **NZZ** - **ocw.mit.edu** - **OdaTV** @@ -949,6 +959,7 @@ - **OktoberfestTV** - **OlympicsReplay** - **on24**: ON24 + - **OnDemandChinaEpisode** - **OnDemandKorea** - **OneFootball** - **OnePlacePodcast** @@ -1063,7 +1074,10 @@ - **Pornotube** - **PornoVoisines** - **PornoXO** + - **PornTop** - **PornTube** + - **Pr0gramm** + - **Pr0grammStatic** - **PrankCast** - **PremiershipRugby** - **PressTV** @@ -1115,6 +1129,8 @@ - **RaiSudtirol** - **RayWenderlich** - **RayWenderlichCourse** + - **RbgTum** + - **RbgTumCourse** - **RBMARadio** - **RCS** - **RCSEmbeds** @@ -1149,6 +1165,7 @@ - **RoosterTeethSeries**: [roosterteeth] - **RottenTomatoes** - **Rozhlas** + - **RozhlasVltava** - **RTBF**: [rtbf] - **RTDocumentry** - **RTDocumentryPlaylist** @@ -1485,6 +1502,7 @@ - **twitter:card** - **twitter:shortener** - **twitter:spaces** + - **Txxx** - **udemy**: [udemy] - **udemy:course**: [udemy] - **UDNEmbed**: 聯合影音 @@ -1572,14 +1590,13 @@ - **Viu** - **viu:ott**: [viu] - **viu:playlist** + - **ViuOTTIndonesia** - **Vivo**: vivo.sx - **vk**: [vk] VK - **vk:uservideos**: [vk] VK - User's Videos - **vk:wallpost**: [vk] - - **vlive**: [vlive] - - **vlive:channel**: [vlive] - - **vlive:post**: [vlive] - **vm.tiktok** + - **Vocaroo** - **Vodlocker** - **VODPl** - **VODPlatform** @@ -1628,6 +1645,7 @@ - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** + - **web.archive:vlive**: web.archive.org saved vlive videos - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **Webcamerapl** - **Webcaster** @@ -1653,6 +1671,8 @@ - **WorldStarHipHop** - **wppilot** - **wppilot:channels** + - **WrestleUniversePPV** + - **WrestleUniverseVOD** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** @@ -1689,6 +1709,7 @@ - **YandexVideo** - **YandexVideoPreview** - **YapFiles** + - **Yappy** - **YesJapan** - **yinyuetai:video**: 音悦Tai - **YleAreena** -- cgit v1.2.3 From 41bd0dc4d71919dceeb84a3aab9c9934d46eee9f Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 17 Feb 2023 12:31:30 +0000 Subject: [version] update Created by: pukkandan :ci skip all :ci run dl --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 8 ++++---- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/4_bug_report.yml | 8 ++++---- .github/ISSUE_TEMPLATE/5_feature_request.yml | 8 ++++---- .github/ISSUE_TEMPLATE/6_question.yml | 8 ++++---- yt_dlp/version.py | 4 ++-- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 2237665e3..e1103fb84 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -62,7 +62,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -70,8 +70,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 0e2940d86..90d7294ac 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -74,7 +74,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -82,8 +82,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 92501be2e..5b59852c7 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -18,7 +18,7 @@ body: options: - label: I'm requesting a site-specific feature required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -70,7 +70,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -78,8 +78,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index bdfc0efb8..bd4695f87 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -18,7 +18,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true @@ -55,7 +55,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -63,8 +63,8 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index c9e3aba38..8c7f315e9 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -20,7 +20,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true @@ -51,7 +51,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -59,7 +59,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index fe6a4ee3f..4a1344628 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -26,7 +26,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2023.01.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2023.02.17** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true @@ -57,7 +57,7 @@ body: [debug] Command-line config: ['-vU', 'test:youtube'] [debug] Portable config "yt-dlp.conf": ['-i'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version 2023.01.06 [9d339c4] (win32_exe) + [debug] yt-dlp version 2023.02.17 [9d339c4] (win32_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffprobe -bsfs @@ -65,7 +65,7 @@ body: [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest - Latest version: 2023.01.06, Current version: 2023.01.06 - yt-dlp is up to date (2023.01.06) + Latest version: 2023.02.17, Current version: 2023.02.17 + yt-dlp is up to date (2023.02.17) render: shell diff --git a/yt_dlp/version.py b/yt_dlp/version.py index f722ec665..3c92a85e1 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2023.01.06' +__version__ = '2023.02.17' -RELEASE_GIT_HEAD = '6becd2508' +RELEASE_GIT_HEAD = 'a0a7c0154' VARIANT = None -- cgit v1.2.3 From 17ca19ab60a6a13eb8a629c51442b5248b0d8394 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Feb 2023 18:38:05 +0530 Subject: [cleanup] Fix `Changelog` --- Changelog.md | 165 +---------------------------------------------------------- 1 file changed, 1 insertion(+), 164 deletions(-) diff --git a/Changelog.md b/Changelog.md index 36856e016..8d3ac089c 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,7 +10,7 @@ * Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master --> -# 2023.02.17 +### 2023.02.17 * Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) * Fix `--concat-playlist` @@ -101,169 +101,6 @@ * [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) -### 2023.02.17 - -#### Core changes -### Core changes -- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/9ebac35577e61c3d25fafc959655fa3ab04ca7ef) by [pukkandan](https://github.com/pukkandan) -- [Bugfix for 39f32f1715c0dffb7626dda7307db6388bb7abaa](https://github.com/yt-dlp/yt-dlp/commit/c154302c588c3d4362cec4fc5545e7e5d2bcf7a3) by [pukkandan](https://github.com/pukkandan) -- [Fix `--concat-playlist`](https://github.com/yt-dlp/yt-dlp/commit/59d7de0da545944c48a82fc2937b996d7cd8cc9c) by [pukkandan](https://github.com/pukkandan) -- [Imply `--no-progress` when `--print`](https://github.com/yt-dlp/yt-dlp/commit/5712943b764ba819ef479524c32700228603817a) by [pukkandan](https://github.com/pukkandan) -- [Improve default subtitle language selection](https://github.com/yt-dlp/yt-dlp/commit/376aa24b1541e2bfb23337c0ae9bafa5bb3787f1) ([#6240](https://github.com/yt-dlp/yt-dlp/issues/6240)) by [sdht0](https://github.com/sdht0) -- [Make `title` completely non-fatal](https://github.com/yt-dlp/yt-dlp/commit/7aefd19afed357c80743405ec2ace2148cba42e3) by [pukkandan](https://github.com/pukkandan) -- [Sanitize formats before sorting](https://github.com/yt-dlp/yt-dlp/commit/39f32f1715c0dffb7626dda7307db6388bb7abaa) by [pukkandan](https://github.com/pukkandan) -- [Support module level `__bool__` and `property`](https://github.com/yt-dlp/yt-dlp/commit/754c84e2e416cf6609dd0e4632b4985a08d34043) by [pukkandan](https://github.com/pukkandan) -- [Update to ytdl-commit-2dd6c6e](https://github.com/yt-dlp/yt-dlp/commit/48fde8ac4ccbaaea868f6378814dde395f649fbf) by [pukkandan](https://github.com/pukkandan) -- [extractor/douyutv]: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/f14c2333481c63c24017a41ded7d8f36726504b7) ([#6074](https://github.com/yt-dlp/yt-dlp/issues/6074)) by [hatienl0i261299](https://github.com/hatienl0i261299) -- compat_utils - - [Improve `passthrough_module`](https://github.com/yt-dlp/yt-dlp/commit/88426d9446758c707fb511408f2d6f56de952db4) by [pukkandan](https://github.com/pukkandan) - - [Simplify `EnhancedModule`](https://github.com/yt-dlp/yt-dlp/commit/768a00178109508893488e53a0e720b117fbccf6) by [pukkandan](https://github.com/pukkandan) -- dependencies - - [Standardize `Cryptodome` imports](https://github.com/yt-dlp/yt-dlp/commit/f6a765ceb59c55aea06921880c1c87d1ff36e5de) by [pukkandan](https://github.com/pukkandan) -- jsinterp - - [Support `if` statements](https://github.com/yt-dlp/yt-dlp/commit/8b008d62544b82e24a0ba36c30e8e51855d93419) by [pukkandan](https://github.com/pukkandan) -- plugins - - [Fix zip search paths](https://github.com/yt-dlp/yt-dlp/commit/88d8928bf7630801865cf8728ae5c77234324b7b) by [pukkandan](https://github.com/pukkandan) -- utils - - [Don't use Content-length with encoding](https://github.com/yt-dlp/yt-dlp/commit/65e5c021e7c5f23ecbc6a982b72a02ac6cd6900d) ([#6176](https://github.com/yt-dlp/yt-dlp/issues/6176)) by [felixonmars](https://github.com/felixonmars) - - [Fix `time_seconds` to use the provided TZ](https://github.com/yt-dlp/yt-dlp/commit/83c4970e52839ce8761ec61bd19d549aed7d7920) ([#6118](https://github.com/yt-dlp/yt-dlp/issues/6118)) by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) - - [Fix race condition in `make_dir`](https://github.com/yt-dlp/yt-dlp/commit/b25d6cb96337d479bdcb41768356da414c3aa835) ([#6089](https://github.com/yt-dlp/yt-dlp/issues/6089)) by [aionescu](https://github.com/aionescu) - - [Use local kernel32 for file locking on Windows](https://github.com/yt-dlp/yt-dlp/commit/37e325b92ff9d784715ac0e5d1f7d96bf5f45ad9) by [Grub4K](https://github.com/Grub4K) - - traverse_obj - - [Fix more bugs](https://github.com/yt-dlp/yt-dlp/commit/6839ae1f6dde4c0442619e351b3f0442312ab4f9) by [pukkandan](https://github.com/pukkandan) - - [Fix several behavioral problems](https://github.com/yt-dlp/yt-dlp/commit/b1bde57bef878478e3503ab07190fd207914ade9) by [Grub4K](https://github.com/Grub4K) - - [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/776995bc109c5cd1aa56b684fada2ce718a386ec) by [Grub4K](https://github.com/Grub4K) -### Extractor changes -- [Fix `_search_nuxt_data`](https://github.com/yt-dlp/yt-dlp/commit/b23167e7542c177f32b22b29857b637dc4aede69) ([#6062](https://github.com/yt-dlp/yt-dlp/issues/6062)) by [LowSuggestion912](https://github.com/LowSuggestion912) -- 91porn - - [Fix title and comment extraction](https://github.com/yt-dlp/yt-dlp/commit/c085cc2def9862ac8a7619ce8ea5dcc177325719) ([#5932](https://github.com/yt-dlp/yt-dlp/issues/5932)) by [pmitchell86](https://github.com/pmitchell86) -- abematv - - [Cache user token whenever appropriate](https://github.com/yt-dlp/yt-dlp/commit/a4f16832213d9e29beecf685d6cd09a2f0b48c87) ([#6216](https://github.com/yt-dlp/yt-dlp/issues/6216)) by [Lesmiscore](https://github.com/Lesmiscore) -- anchorfm - - [Add episode extractor](https://github.com/yt-dlp/yt-dlp/commit/a4ad59ff2ded208bf33f6fe07299a3449eadccdc) ([#6092](https://github.com/yt-dlp/yt-dlp/issues/6092)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) -- bfmtv - - [Support `rmc` prefix](https://github.com/yt-dlp/yt-dlp/commit/20266508dd6247dd3cf0e97b9b9f14c3afc046db) ([#6025](https://github.com/yt-dlp/yt-dlp/issues/6025)) by [carusocr](https://github.com/carusocr) -- biliintl - - [Add intro and ending chapters](https://github.com/yt-dlp/yt-dlp/commit/0ba87dd279d3565ed93c559cf7880ad61eb83af8) ([#6018](https://github.com/yt-dlp/yt-dlp/issues/6018)) by [HobbyistDev](https://github.com/HobbyistDev) -- boxcast - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/9acca71237f42a4775008e51fe26e42f0a39c552) ([#5983](https://github.com/yt-dlp/yt-dlp/issues/5983)) by [HobbyistDev](https://github.com/HobbyistDev) -- clyp - - [Support `wav`](https://github.com/yt-dlp/yt-dlp/commit/cc13293c2819b5461be211a9729fd02bb1e2f476) ([#6102](https://github.com/yt-dlp/yt-dlp/issues/6102)) by [qulaz](https://github.com/qulaz) -- crunchyroll - - [Add intro chapter](https://github.com/yt-dlp/yt-dlp/commit/93abb7406b95793f6872d12979b91d5f336b4f43) ([#6023](https://github.com/yt-dlp/yt-dlp/issues/6023)) by [ByteDream](https://github.com/ByteDream) - - [Better message for premium videos](https://github.com/yt-dlp/yt-dlp/commit/44699d10dc8de9c6a338f4a8e5c63506ec4d2118) by [pukkandan](https://github.com/pukkandan) - - [Fix incorrect premium-only error](https://github.com/yt-dlp/yt-dlp/commit/c9d14bd22ab31e2a41f9f8061843668a06db583b) by [Grub4K](https://github.com/Grub4K) -- drtv - - [Fix bug in ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/7481998b169b2a52049fc33bff82034d6563ead4) ([#6034](https://github.com/yt-dlp/yt-dlp/issues/6034)) by [bashonly](https://github.com/bashonly) -- ebay - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/da880559a6ecbbf374cc9f3378e696b55b9599af) ([#6170](https://github.com/yt-dlp/yt-dlp/issues/6170)) by [JChris246](https://github.com/JChris246) -- embedly - - [Embedded links may be for other extractors](https://github.com/yt-dlp/yt-dlp/commit/87ebab0615b1bf9b14b478b055e7059d630b4833) by [pukkandan](https://github.com/pukkandan) -- freesound - - [Workaround invalid URL in webpage](https://github.com/yt-dlp/yt-dlp/commit/9cfdbcbf3f17be51f5b6bb9bb6d880b2f3d67362) ([#6147](https://github.com/yt-dlp/yt-dlp/issues/6147)) by [rebane2001](https://github.com/rebane2001) -- generic - - [Avoid catastrophic backtracking in KVS regex](https://github.com/yt-dlp/yt-dlp/commit/8aa0bd5d10627ece3c1815c01d02fb8bf22847a7) by [bashonly](https://github.com/bashonly) -- goplay - - [Use new API](https://github.com/yt-dlp/yt-dlp/commit/d27bde98832e3b7ffb39f3cf6346011b97bb3bc3) ([#6151](https://github.com/yt-dlp/yt-dlp/issues/6151)) by [jeroenj](https://github.com/jeroenj) -- hidive - - [Fix subtitles and age-restriction](https://github.com/yt-dlp/yt-dlp/commit/7708df8da05c94270b43e0630e4e20f6d2d62c55) ([#5828](https://github.com/yt-dlp/yt-dlp/issues/5828)) by [chexxor](https://github.com/chexxor) -- huya - - [Support HD streams](https://github.com/yt-dlp/yt-dlp/commit/fbbb5508ea98ed8709847f5ecced7d70ff05e0ee) ([#6172](https://github.com/yt-dlp/yt-dlp/issues/6172)) by [felixonmars](https://github.com/felixonmars) -- hypergryph - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/31c279a2a2c2ef402a9e6dad9992b310d16439a6) ([#6094](https://github.com/yt-dlp/yt-dlp/issues/6094)) by [bashonly](https://github.com/bashonly), [HobbyistDev](https://github.com/HobbyistDev) -- moviepilot - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c62e64cf0122e52fa2175dd1b004ca6b8e1d82af) ([#5954](https://github.com/yt-dlp/yt-dlp/issues/5954)) by [panatexxa](https://github.com/panatexxa) -- nbc - - [Fix XML parsing](https://github.com/yt-dlp/yt-dlp/commit/176a068cde4f2d9dfa0336168caead0b1edcb8ac) by [bashonly](https://github.com/bashonly) - - [Fix `NBC` and `NBCStations` extractors](https://github.com/yt-dlp/yt-dlp/commit/cb73b8460c3ce6d37ab651a4e44bb23b10056154) ([#6033](https://github.com/yt-dlp/yt-dlp/issues/6033)) by [bashonly](https://github.com/bashonly) -- nebula - - [Remove broken cookie support](https://github.com/yt-dlp/yt-dlp/commit/d50ea3ce5abc3b0defc0e5d1e22b22ce9b01b07b) ([#5979](https://github.com/yt-dlp/yt-dlp/issues/5979)) by [hheimbuerger](https://github.com/hheimbuerger) -- nfl - - [Add `NFLPlus` extractors](https://github.com/yt-dlp/yt-dlp/commit/8b37c58f8b5494504acdb5ebe3f8bbd26230f725) ([#6222](https://github.com/yt-dlp/yt-dlp/issues/6222)) by [bashonly](https://github.com/bashonly) -- niconico - - [Add support for like history](https://github.com/yt-dlp/yt-dlp/commit/3b161265add30613bde2e46fca214fe94d09e651) ([#5705](https://github.com/yt-dlp/yt-dlp/issues/5705)) by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) -- nitter - - [Update instance list](https://github.com/yt-dlp/yt-dlp/commit/a9189510baadf0dccd2d4d363bc6f3a441128bb0) ([#6236](https://github.com/yt-dlp/yt-dlp/issues/6236)) by [OIRNOIR](https://github.com/OIRNOIR) -- npo - - [Fix extractor and add HD support](https://github.com/yt-dlp/yt-dlp/commit/cc2389c8ac72a514d4e002a0f6ca5a7d65c7eff0) ([#6155](https://github.com/yt-dlp/yt-dlp/issues/6155)) by [seproDev](https://github.com/seproDev) -- nzonscreen - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d3bb187f01e1e30db05e639fc23a2e1935d777fe) ([#6208](https://github.com/yt-dlp/yt-dlp/issues/6208)) by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) -- odkmedia - - [Add `OnDemandChinaEpisodeIE`](https://github.com/yt-dlp/yt-dlp/commit/10fd9e6ee833c88edf6c633f864f42843a708d32) ([#6116](https://github.com/yt-dlp/yt-dlp/issues/6116)) by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) -- pornez - - [Handle relative URLs in iframe](https://github.com/yt-dlp/yt-dlp/commit/f7efe6dc958eb0689cb9534ff0b4e592040be8df) ([#6171](https://github.com/yt-dlp/yt-dlp/issues/6171)) by [JChris246](https://github.com/JChris246) -- radiko - - [Fix format sorting for Time Free](https://github.com/yt-dlp/yt-dlp/commit/203a06f8554df6db07d8f20f465ecbfe8a14e591) ([#6159](https://github.com/yt-dlp/yt-dlp/issues/6159)) by [road-master](https://github.com/road-master) -- rcs - - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c6b657867ad68af6b930ed0aa11ec5d93ee187b7) ([#5700](https://github.com/yt-dlp/yt-dlp/issues/5700)) by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) -- reddit - - [Support user posts](https://github.com/yt-dlp/yt-dlp/commit/c77df98b1a477a020a57141464d10c0f4d0fdbc9) ([#6173](https://github.com/yt-dlp/yt-dlp/issues/6173)) by [OMEGARAZER](https://github.com/OMEGARAZER) -- rozhlas - - [Add extractor RozhlasVltavaIE](https://github.com/yt-dlp/yt-dlp/commit/355d781bed497cbcb254bf2a2737b83fa51c84ea) ([#5951](https://github.com/yt-dlp/yt-dlp/issues/5951)) by [amra](https://github.com/amra) -- rumble - - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/acacb57c7e173b93c6e0f0c43e61b9b2912719d8) by [pukkandan](https://github.com/pukkandan) -- servus - - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/f40e32fb1ac67be5bdbc8e32a3c235abfc4be260) ([#6036](https://github.com/yt-dlp/yt-dlp/issues/6036)) by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) -- slideslive - - [Fix slides and chapters/duration](https://github.com/yt-dlp/yt-dlp/commit/5ab3534d44231f7711398bc3cfc520e2efd09f50) ([#6024](https://github.com/yt-dlp/yt-dlp/issues/6024)) by [bashonly](https://github.com/bashonly) -- sportdeutschland - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5e1a54f63e393c218a40949012ff0de0ce63cb15) ([#6041](https://github.com/yt-dlp/yt-dlp/issues/6041)) by [FriedrichRehren](https://github.com/FriedrichRehren) -- stripchat - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d5f919bad07017f4b39b55725491b1e9717d47a) ([#5985](https://github.com/yt-dlp/yt-dlp/issues/5985)) by [bashonly](https://github.com/bashonly), [JChris246](https://github.com/JChris246) -- tempo - - [Add IVXPlayer extractor](https://github.com/yt-dlp/yt-dlp/commit/30031be974d210f451100339699ef03b0ddb5f10) ([#5837](https://github.com/yt-dlp/yt-dlp/issues/5837)) by [HobbyistDev](https://github.com/HobbyistDev) -- tnaflix - - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/989f47b6315541989bb507f26b431d9586430995) ([#6086](https://github.com/yt-dlp/yt-dlp/issues/6086)) by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) -- tvp - - [Support `stream.tvp.pl`](https://github.com/yt-dlp/yt-dlp/commit/a31d0fa6c315b1145d682361149003d98f1e3782) ([#6139](https://github.com/yt-dlp/yt-dlp/issues/6139)) by [selfisekai](https://github.com/selfisekai) -- twitter - - [Fix `--no-playlist` and add media `view_count` when using GraphQL](https://github.com/yt-dlp/yt-dlp/commit/b6795fd310f1dd61dddc9fd08e52fe485bdc8a3e) ([#6211](https://github.com/yt-dlp/yt-dlp/issues/6211)) by [Grub4K](https://github.com/Grub4K) - - [Fix graphql extraction on some tweets](https://github.com/yt-dlp/yt-dlp/commit/7543c9c99bcb116b085fdb1f41b84a0ead04c05d) ([#6075](https://github.com/yt-dlp/yt-dlp/issues/6075)) by [selfisekai](https://github.com/selfisekai) -- txxx - - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/389896df85ed14eaf74f72531da6c4491d6b73b0) ([#5240](https://github.com/yt-dlp/yt-dlp/issues/5240)) by [chio0hai](https://github.com/chio0hai) -- vimeo - - [Fix `playerConfig` extraction](https://github.com/yt-dlp/yt-dlp/commit/c0cd13fb1c71b842c3d272d0273c03542b467766) ([#6203](https://github.com/yt-dlp/yt-dlp/issues/6203)) by [bashonly](https://github.com/bashonly), [LeoniePhiline](https://github.com/LeoniePhiline) -- viu - - [Add `ViuOTTIndonesiaIE` extractor](https://github.com/yt-dlp/yt-dlp/commit/72671a212d7c939329cb5d34335fa089dd3acbd3) ([#6099](https://github.com/yt-dlp/yt-dlp/issues/6099)) by [HobbyistDev](https://github.com/HobbyistDev) -- vk - - [Fix playlists for new API](https://github.com/yt-dlp/yt-dlp/commit/a9c685453f7019bee94170f936619c6db76c964e) ([#6122](https://github.com/yt-dlp/yt-dlp/issues/6122)) by [the-marenga](https://github.com/the-marenga) -- vlive - - [Replace with `VLiveWebArchiveIE`](https://github.com/yt-dlp/yt-dlp/commit/b3eaab7ca2e118d4db73dcb44afd9c8717db8b67) ([#6196](https://github.com/yt-dlp/yt-dlp/issues/6196)) by [seproDev](https://github.com/seproDev) -- vocaroo - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e4a8b1769e19755acba6d8f212208359905a3159) ([#6117](https://github.com/yt-dlp/yt-dlp/issues/6117)) by [qbnu](https://github.com/qbnu), [SuperSonicHub1](https://github.com/SuperSonicHub1) -- wrestleuniverse - - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/e61acb40b2cb6ef45508d72235026d458c9d5dff) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) -- ximalaya - - [Update album `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/417cdaae08fc447c9d15c53a88e2e9a027cdbf0a) ([#6110](https://github.com/yt-dlp/yt-dlp/issues/6110)) by [carusocr](https://github.com/carusocr) -- yappy - - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/361630015535026712bdb67f804a15b65ff9ee7e) ([#6111](https://github.com/yt-dlp/yt-dlp/issues/6111)) by [HobbyistDev](https://github.com/HobbyistDev) -- youtube - - [Add hyperpipe instances](https://github.com/yt-dlp/yt-dlp/commit/78a78fa74dbc888d20f1b65e1382bf99131597d5) ([#6020](https://github.com/yt-dlp/yt-dlp/issues/6020)) by [Generator](https://github.com/Generator) - - [Fix `uploader_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/149eb0bbf34fa8fdf8d1e2aa28e17479d099e26b) by [bashonly](https://github.com/bashonly) - - [Handle `consent.youtube`](https://github.com/yt-dlp/yt-dlp/commit/b032ff0f032512bd6fc70c9c1994d906eacc06cb) by [pukkandan](https://github.com/pukkandan) - - [Support `/live/` URL](https://github.com/yt-dlp/yt-dlp/commit/dad2210c0cb9cf03702a9511817ee5ec646d7bc8) by [pukkandan](https://github.com/pukkandan) - - [Update invidious and piped instances](https://github.com/yt-dlp/yt-dlp/commit/05799a48c7dec12b34c8bf951c8d2eceedda59f8) ([#6030](https://github.com/yt-dlp/yt-dlp/issues/6030)) by [rohieb](https://github.com/rohieb) - - [`uploader_id` includes `@` with handle](https://github.com/yt-dlp/yt-dlp/commit/c61cf091a54d3aa3c611722035ccde5ecfe981bb) by [bashonly](https://github.com/bashonly) -- zdf - - [Use android API endpoint for UHD downloads](https://github.com/yt-dlp/yt-dlp/commit/0fe87a8730638490415d630f48e61d264d89c358) ([#6150](https://github.com/yt-dlp/yt-dlp/issues/6150)) by [seproDev](https://github.com/seproDev) -### Downloader changes -- hls - - [Allow extractors to provide AES key](https://github.com/yt-dlp/yt-dlp/commit/7e68567e508168b345266c0c19812ad50a829eaa) ([#6158](https://github.com/yt-dlp/yt-dlp/issues/6158)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) -### Postprocessor changes -- extractaudio - - [Handle outtmpl without ext](https://github.com/yt-dlp/yt-dlp/commit/f737fb16d8234408c85bc189ccc926fea000515b) ([#6005](https://github.com/yt-dlp/yt-dlp/issues/6005)) by [carusocr](https://github.com/carusocr) -- pyinst - - [Fix for pyinstaller 5.8](https://github.com/yt-dlp/yt-dlp/commit/2e269bd998c61efaf7500907d114a56e5e83e65e) by [pukkandan](https://github.com/pukkandan) -### Misc. changes -- build - - [Update pyinstaller](https://github.com/yt-dlp/yt-dlp/commit/365b9006051ac7d735c20bb63c4907b758233048) by [pukkandan](https://github.com/pukkandan) -- cleanup - - Miscellaneous: [76c9c52](https://github.com/yt-dlp/yt-dlp/commit/76c9c523071150053df7b56956646b680b6a6e05) by [pukkandan](https://github.com/pukkandan) -- devscripts - - [Provide pyinstaller hooks](https://github.com/yt-dlp/yt-dlp/commit/acb1042a9ffa8769fe691beac1011d6da1fcf321) by [pukkandan](https://github.com/pukkandan) -- pyinstaller - - [Analyze sub-modules of `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/b85faf6ffb700058e774e99c04304a7a9257cdd0) by [pukkandan](https://github.com/pukkandan) - ### 2023.01.06 * Fix config locations by [Grub4k](https://github.com/Grub4k), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) -- cgit v1.2.3 From 18d295c9e0f95adc179eef345b7af64d6372db78 Mon Sep 17 00:00:00 2001 From: Zhong Lufan Date: Fri, 17 Feb 2023 21:11:16 +0800 Subject: [extractor/tencent] Add more formats and info (#5950) Authored by: Hill-98 --- yt_dlp/extractor/tencent.py | 88 ++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 44cae0472..42a2175b0 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -8,6 +8,7 @@ from .common import InfoExtractor from ..aes import aes_cbc_encrypt_bytes from ..utils import ( ExtractorError, + float_or_none, determine_ext, int_or_none, js_to_json, @@ -19,6 +20,16 @@ from ..utils import ( class TencentBaseIE(InfoExtractor): """Subclasses must set _API_URL, _APP_VERSION, _PLATFORM, _HOST, _REFERER""" + def _check_api_response(self, api_response): + msg = api_response.get('msg') + if api_response.get('code') != '0.0' and msg is not None: + if msg in ( + '您所在区域暂无此内容版权(如设置VPN请关闭后重试)', + 'This content is not available in your area due to copyright restrictions. Please choose other videos.' + ): + self.raise_geo_restricted() + raise ExtractorError(f'Tencent said: {msg}') + def _get_ckey(self, video_id, url, guid): ua = self.get_param('http_headers')['User-Agent'] @@ -47,6 +58,11 @@ class TencentBaseIE(InfoExtractor): 'sphttps': '1', # Enable HTTPS 'otype': 'json', 'spwm': '1', + 'hevclv': '28', # Enable HEVC + 'drm': '40', # Enable DRM + # For HDR + 'spvideo': '4', + 'spsfrhdr': '100', # For SHD 'host': self._HOST, 'referer': self._REFERER, @@ -63,7 +79,6 @@ class TencentBaseIE(InfoExtractor): def _extract_video_formats_and_subtitles(self, api_response, video_id): video_response = api_response['vl']['vi'][0] - video_width, video_height = video_response.get('vw'), video_response.get('vh') formats, subtitles = [], {} for video_format in video_response['ul']['ui']: @@ -71,47 +86,61 @@ class TencentBaseIE(InfoExtractor): fmts, subs = self._extract_m3u8_formats_and_subtitles( video_format['url'] + traverse_obj(video_format, ('hls', 'pt'), default=''), video_id, 'mp4', fatal=False) - for f in fmts: - f.update({'width': video_width, 'height': video_height}) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: formats.append({ 'url': f'{video_format["url"]}{video_response["fn"]}?vkey={video_response["fvkey"]}', - 'width': video_width, - 'height': video_height, 'ext': 'mp4', }) + identifier = video_response.get('br') + format_response = traverse_obj( + api_response, ('fl', 'fi', lambda _, v: v['br'] == identifier), + expected_type=dict, get_all=False) or {} + common_info = { + 'width': video_response.get('vw'), + 'height': video_response.get('vh'), + 'abr': float_or_none(format_response.get('audiobandwidth'), scale=1000), + 'vbr': float_or_none(format_response.get('bandwidth'), scale=1000), + 'fps': format_response.get('vfps'), + 'format': format_response.get('sname'), + 'format_id': format_response.get('name'), + 'format_note': format_response.get('resolution'), + 'dynamic_range': {'hdr10': 'hdr10'}.get(format_response.get('name'), 'sdr'), + 'has_drm': format_response.get('drm', 0) != 0, + } + for f in formats: + f.update(common_info) + return formats, subtitles - def _extract_video_native_subtitles(self, api_response, subtitles_format): + def _extract_video_native_subtitles(self, api_response): subtitles = {} for subtitle in traverse_obj(api_response, ('sfl', 'fi')) or (): subtitles.setdefault(subtitle['lang'].lower(), []).append({ 'url': subtitle['url'], - 'ext': subtitles_format, + 'ext': 'srt' if subtitle.get('captionType') == 1 else 'vtt', 'protocol': 'm3u8_native' if determine_ext(subtitle['url']) == 'm3u8' else 'http', }) return subtitles def _extract_all_video_formats_and_subtitles(self, url, video_id, series_id): - formats, subtitles = [], {} - for video_format, subtitle_format, video_quality in ( - # '': 480p, 'shd': 720p, 'fhd': 1080p - ('mp4', 'srt', ''), ('hls', 'vtt', 'shd'), ('hls', 'vtt', 'fhd')): - api_response = self._get_video_api_response( - url, video_id, series_id, subtitle_format, video_format, video_quality) - - if api_response.get('em') != 0 and api_response.get('exem') != 0: - if '您所在区域暂无此内容版权' in api_response.get('msg'): - self.raise_geo_restricted() - raise ExtractorError(f'Tencent said: {api_response.get("msg")}') + api_responses = [self._get_video_api_response(url, video_id, series_id, 'srt', 'hls', 'hd')] + self._check_api_response(api_responses[0]) + qualities = traverse_obj(api_responses, (0, 'fl', 'fi', ..., 'name')) or ('shd', 'fhd') + for q in qualities: + if q not in ('ld', 'sd', 'hd'): + api_responses.append(self._get_video_api_response( + url, video_id, series_id, 'vtt', 'hls', q)) + self._check_api_response(api_responses[-1]) + formats, subtitles = [], {} + for api_response in api_responses: fmts, subs = self._extract_video_formats_and_subtitles(api_response, video_id) - native_subtitles = self._extract_video_native_subtitles(api_response, subtitle_format) + native_subtitles = self._extract_video_native_subtitles(api_response) formats.extend(fmts) self._merge_subtitles(subs, native_subtitles, target=subtitles) @@ -120,7 +149,7 @@ class TencentBaseIE(InfoExtractor): def _get_clean_title(self, title): return re.sub( - r'\s*[_\-]\s*(?:Watch online|腾讯视频|(?:高清)?1080P在线观看平台).*?$', + r'\s*[_\-]\s*(?:Watch online|Watch HD Video Online|WeTV|腾讯视频|(?:高清)?1080P在线观看平台).*?$', '', title or '').strip() or None @@ -147,27 +176,29 @@ class VQQVideoIE(VQQBaseIE): _TESTS = [{ 'url': 'https://v.qq.com/x/page/q326831cny0.html', - 'md5': '826ef93682df09e3deac4a6e6e8cdb6e', + 'md5': '84568b3722e15e9cd023b5594558c4a7', 'info_dict': { 'id': 'q326831cny0', 'ext': 'mp4', 'title': '我是选手:雷霆裂阵,终极时刻', 'description': 'md5:e7ed70be89244017dac2a835a10aeb1e', 'thumbnail': r're:^https?://[^?#]+q326831cny0', + 'format_id': r're:^shd', }, }, { 'url': 'https://v.qq.com/x/page/o3013za7cse.html', - 'md5': 'b91cbbeada22ef8cc4b06df53e36fa21', + 'md5': 'cc431c4f9114a55643893c2c8ebf5592', 'info_dict': { 'id': 'o3013za7cse', 'ext': 'mp4', 'title': '欧阳娜娜VLOG', 'description': 'md5:29fe847497a98e04a8c3826e499edd2e', 'thumbnail': r're:^https?://[^?#]+o3013za7cse', + 'format_id': r're:^shd', }, }, { 'url': 'https://v.qq.com/x/cover/7ce5noezvafma27/a00269ix3l8.html', - 'md5': '71459c5375c617c265a22f083facce67', + 'md5': '87968df6238a65d2478f19c25adf850b', 'info_dict': { 'id': 'a00269ix3l8', 'ext': 'mp4', @@ -175,10 +206,11 @@ class VQQVideoIE(VQQBaseIE): 'description': 'md5:8cae3534327315b3872fbef5e51b5c5b', 'thumbnail': r're:^https?://[^?#]+7ce5noezvafma27', 'series': '鸡毛飞上天', + 'format_id': r're:^shd', }, }, { 'url': 'https://v.qq.com/x/cover/mzc00200p29k31e/s0043cwsgj0.html', - 'md5': '96b9fd4a189fdd4078c111f21d7ac1bc', + 'md5': 'fadd10bf88aec3420f06f19ee1d24c5b', 'info_dict': { 'id': 's0043cwsgj0', 'ext': 'mp4', @@ -186,6 +218,7 @@ class VQQVideoIE(VQQBaseIE): 'description': 'md5:1d8c3a0b8729ae3827fa5b2d3ebd5213', 'thumbnail': r're:^https?://[^?#]+s0043cwsgj0', 'series': '青年理工工作者生活研究所', + 'format_id': r're:^shd', }, }, { # Geo-restricted to China @@ -319,6 +352,7 @@ class WeTvEpisodeIE(WeTvBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 2835, + 'format_id': r're:^shd', }, }, { 'url': 'https://wetv.vip/en/play/u37kgfnfzs73kiu/p0039b9nvik', @@ -333,6 +367,7 @@ class WeTvEpisodeIE(WeTvBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 2454, + 'format_id': r're:^shd', }, }, { 'url': 'https://wetv.vip/en/play/lcxgwod5hapghvw-WeTV-PICK-A-BOO/i0042y00lxp-Zhao-Lusi-Describes-The-First-Experiences-She-Had-In-Who-Rules-The-World-%7C-WeTV-PICK-A-BOO', @@ -342,11 +377,12 @@ class WeTvEpisodeIE(WeTvBaseIE): 'ext': 'mp4', 'title': 'md5:f7a0857dbe5fbbe2e7ad630b92b54e6a', 'description': 'md5:76260cb9cdc0ef76826d7ca9d92fadfa', - 'thumbnail': r're:^https?://[^?#]+lcxgwod5hapghvw', + 'thumbnail': r're:^https?://[^?#]+i0042y00lxp', 'series': 'WeTV PICK-A-BOO', 'episode': 'Episode 0', 'episode_number': 0, 'duration': 442, + 'format_id': r're:^shd', }, }] @@ -406,6 +442,7 @@ class IflixEpisodeIE(IflixBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 2639, + 'format_id': r're:^shd', }, }, { 'url': 'https://www.iflix.com/en/play/fvvrcc3ra9lbtt1-Take-My-Brother-Away/i0029sd3gm1-EP1%EF%BC%9ATake-My-Brother-Away', @@ -420,6 +457,7 @@ class IflixEpisodeIE(IflixBaseIE): 'episode': 'Episode 1', 'episode_number': 1, 'duration': 228, + 'format_id': r're:^shd', }, }] -- cgit v1.2.3 From da8e2912b165005f76779a115a071cd6132ceedf Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Thu, 23 Feb 2023 04:18:45 +0100 Subject: [utils] `Popen`: Shim undocumented `text_mode` property Fixes #6317 Authored by: Grub4K --- yt_dlp/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 994239897..4fe718bf0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -879,6 +879,7 @@ class Popen(subprocess.Popen): env = os.environ.copy() self._fix_pyinstaller_ld_path(env) + self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines') if text is True: kwargs['universal_newlines'] = True # For 3.6 compatibility kwargs.setdefault('encoding', 'utf-8') @@ -900,7 +901,7 @@ class Popen(subprocess.Popen): @classmethod def run(cls, *args, timeout=None, **kwargs): with cls(*args, **kwargs) as proc: - default = '' if proc.text_mode else b'' + default = '' if proc.__text_mode else b'' stdout, stderr = proc.communicate_or_kill(timeout=timeout) return stdout or default, stderr or default, proc.returncode -- cgit v1.2.3 From cc09083636ce21e58ff74f45eac2dbda507462b0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Feb 2023 10:39:43 +0530 Subject: [utils] `LenientJSONDecoder`: Parse unclosed objects --- yt_dlp/utils.py | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4fe718bf0..9ff096433 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -593,21 +593,43 @@ def clean_html(html): class LenientJSONDecoder(json.JSONDecoder): - def __init__(self, *args, transform_source=None, ignore_extra=False, **kwargs): + # TODO: Write tests + def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs): self.transform_source, self.ignore_extra = transform_source, ignore_extra + self._close_attempts = 2 * close_objects super().__init__(*args, **kwargs) + @staticmethod + def _close_object(err): + doc = err.doc[:err.pos] + # We need to add comma first to get the correct error message + if err.msg.startswith('Expecting \',\''): + return doc + ',' + elif not doc.endswith(','): + return + + if err.msg.startswith('Expecting property name'): + return doc[:-1] + '}' + elif err.msg.startswith('Expecting value'): + return doc[:-1] + ']' + def decode(self, s): if self.transform_source: s = self.transform_source(s) - try: - if self.ignore_extra: - return self.raw_decode(s.lstrip())[0] - return super().decode(s) - except json.JSONDecodeError as e: - if e.pos is not None: + for attempt in range(self._close_attempts + 1): + try: + if self.ignore_extra: + return self.raw_decode(s.lstrip())[0] + return super().decode(s) + except json.JSONDecodeError as e: + if e.pos is None: + raise + elif attempt < self._close_attempts: + s = self._close_object(e) + if s is not None: + continue raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos) - raise + assert False, 'Too many attempts to decode JSON' def sanitize_open(filename, open_mode): -- cgit v1.2.3 From 43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Feb 2023 10:16:30 +0530 Subject: [extractor] Fix DRM detection in m3u8 Fixes https://github.com/ytdl-org/youtube-dl/issues/31693#issuecomment-1445202857 --- yt_dlp/extractor/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ebacc87bc..86bef173f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2063,6 +2063,7 @@ class InfoExtractor: 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, 'vcodec': 'none' if media_type == 'AUDIO' else None, } for idx in _extract_m3u8_playlist_indices(manifest_url)) @@ -2122,6 +2123,7 @@ class InfoExtractor: 'protocol': entry_protocol, 'preference': preference, 'quality': quality, + 'has_drm': has_drm, } resolution = last_stream_inf.get('RESOLUTION') if resolution: -- cgit v1.2.3 From 8e9fe43cd393e69fa49b3d842aa3180c1d105b8f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Feb 2023 10:27:04 +0530 Subject: [extractor/generic] Handle basic-auth when checking redirects Closes #6352 --- yt_dlp/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 55e55d524..d76ef3e31 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -15,6 +15,7 @@ from ..utils import ( UnsupportedError, determine_ext, dict_get, + extract_basic_auth, format_field, int_or_none, is_html, @@ -2372,9 +2373,8 @@ class GenericIE(InfoExtractor): **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl() - if new_url == urllib.parse.urlparse(url)._replace(scheme='https').geturl(): - url = new_url - elif url != new_url: + url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl() + if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url) if force_videoid: new_url = smuggle_url(new_url, {'force_videoid': force_videoid}) -- cgit v1.2.3 From 4d248e29d20d983ededab0b03d4fe69dff9eb4ed Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:09:20 +0530 Subject: [extractor/GoogleDrive] Fix some audio Only those with source url, but no confirmation page --- yt_dlp/extractor/googledrive.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index e027ea7c4..9e2ccde00 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( - determine_ext, ExtractorError, + determine_ext, get_element_by_class, int_or_none, lowercase_escape, @@ -163,15 +163,13 @@ class GoogleDriveIE(InfoExtractor): video_id = self._match_id(url) video_info = compat_parse_qs(self._download_webpage( 'https://drive.google.com/get_video_info', - video_id, query={'docid': video_id})) + video_id, 'Downloading video webpage', query={'docid': video_id})) def get_value(key): return try_get(video_info, lambda x: x[key][0]) reason = get_value('reason') title = get_value('title') - if not title and reason: - raise ExtractorError(reason, expected=True) formats = [] fmt_stream_map = (get_value('fmt_stream_map') or '').split(',') @@ -216,6 +214,11 @@ class GoogleDriveIE(InfoExtractor): urlh = request_source_file(source_url, 'source') if urlh: def add_source_format(urlh): + nonlocal title + if not title: + title = self._search_regex( + r'\bfilename="([^"]+)"', urlh.headers.get('Content-Disposition'), + 'title', default=None) formats.append({ # Use redirect URLs as download URLs in order to calculate # correct cookies in _calc_cookies. @@ -251,7 +254,10 @@ class GoogleDriveIE(InfoExtractor): or 'unable to extract confirmation code') if not formats and reason: - self.raise_no_formats(reason, expected=True) + if title: + self.raise_no_formats(reason, expected=True) + else: + raise ExtractorError(reason, expected=True) hl = get_value('hl') subtitles_id = None -- cgit v1.2.3 From 5038f6d713303e0967d002216e7a88652401c22a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:03:44 +0530 Subject: [extractor/youtube] Construct dash formats with `range` query Closes #6369 --- yt_dlp/extractor/youtube.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index be82bc689..0227a1f83 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3776,10 +3776,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if no_video: dct['abr'] = tbr if no_audio or no_video: - dct['downloader_options'] = { - # Youtube throttles chunks >~10M - 'http_chunk_size': 10485760, - } + CHUNK_SIZE = 10 << 20 + dct.update({ + 'request_data': b'x', + 'protocol': 'http_dash_segments', + 'fragments': [{ + 'url': update_url_query(dct['url'], { + 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, dct["filesize"])}' + }) + } for range_start in range(0, dct['filesize'], CHUNK_SIZE)] + } if dct['filesize'] else { + 'downloader_options': {'http_chunk_size': CHUNK_SIZE} # No longer useful? + }) + if dct.get('ext'): dct['container'] = dct['ext'] + '_dash' -- cgit v1.2.3 From b059188383eee4fa336ef728dda3ff4bb7335625 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 22:32:20 +0530 Subject: [plugins] Don't look in `.egg` directories Closes #6306 --- yt_dlp/plugins.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py index 6eecdb4d0..6422c7a51 100644 --- a/yt_dlp/plugins.py +++ b/yt_dlp/plugins.py @@ -88,7 +88,7 @@ class PluginFinder(importlib.abc.MetaPathFinder): candidate = path / parts if candidate.is_dir(): yield candidate - elif path.suffix in ('.zip', '.egg', '.whl'): + elif path.suffix in ('.zip', '.egg', '.whl') and path.is_file(): if parts in dirs_in_zip(path): yield candidate -- cgit v1.2.3 From 65f6e807804d2af5e00f2aecd72bfc43af19324a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:10:54 +0530 Subject: [dependencies] Simplify `Cryptodome` Closes #6292, closes #6272, closes #6338 --- test/test_aes.py | 4 ++-- yt_dlp/__pyinstaller/hook-yt_dlp.py | 28 +--------------------- yt_dlp/aes.py | 6 ++--- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/compat/compat_utils.py | 2 +- yt_dlp/dependencies/Cryptodome.py | 48 ++++++++++++++++++++++++------------- yt_dlp/dependencies/__init__.py | 2 +- yt_dlp/downloader/hls.py | 2 +- yt_dlp/extractor/bilibili.py | 6 ++--- yt_dlp/extractor/ivi.py | 8 +++---- yt_dlp/extractor/wrestleuniverse.py | 6 ++--- 11 files changed, 51 insertions(+), 63 deletions(-) diff --git a/test/test_aes.py b/test/test_aes.py index 18f15fecb..a26abfd7d 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -48,7 +48,7 @@ class TestAES(unittest.TestCase): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if Cryptodome: + if Cryptodome.AES: decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) @@ -78,7 +78,7 @@ class TestAES(unittest.TestCase): decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if Cryptodome: + if Cryptodome.AES: decrypted = aes_gcm_decrypt_and_verify_bytes( data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) diff --git a/yt_dlp/__pyinstaller/hook-yt_dlp.py b/yt_dlp/__pyinstaller/hook-yt_dlp.py index 057cfef2f..63dcdffe0 100644 --- a/yt_dlp/__pyinstaller/hook-yt_dlp.py +++ b/yt_dlp/__pyinstaller/hook-yt_dlp.py @@ -1,30 +1,8 @@ -import ast -import os import sys -from pathlib import Path from PyInstaller.utils.hooks import collect_submodules -def find_attribute_accesses(node, name, path=()): - if isinstance(node, ast.Attribute): - path = [*path, node.attr] - if isinstance(node.value, ast.Name) and node.value.id == name: - yield path[::-1] - for child in ast.iter_child_nodes(node): - yield from find_attribute_accesses(child, name, path) - - -def collect_used_submodules(name, level): - for dirpath, _, filenames in os.walk(Path(__file__).parent.parent): - for filename in filenames: - if not filename.endswith('.py'): - continue - with open(Path(dirpath) / filename, encoding='utf8') as f: - for submodule in find_attribute_accesses(ast.parse(f.read()), name): - yield '.'.join(submodule[:level]) - - def pycryptodome_module(): try: import Cryptodome # noqa: F401 @@ -41,12 +19,8 @@ def pycryptodome_module(): def get_hidden_imports(): yield 'yt_dlp.compat._legacy' + yield pycryptodome_module() yield from collect_submodules('websockets') - - crypto = pycryptodome_module() - for sm in set(collect_used_submodules('Cryptodome', 2)): - yield f'{crypto}.{sm}' - # These are auto-detected, but explicitly add them just in case yield from ('mutagen', 'brotli', 'certifi') diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index deff0a2b3..b3a383cd9 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -5,14 +5,14 @@ from .compat import compat_ord from .dependencies import Cryptodome from .utils import bytes_to_intlist, intlist_to_bytes -if Cryptodome: +if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ - return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_CBC, iv).decrypt(data) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ - return Cryptodome.Cipher.AES.new(key, Cryptodome.Cipher.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 84d749209..83bf869a8 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -32,9 +32,9 @@ from re import match as compat_Match # noqa: F401 from . import compat_expanduser, compat_HTMLParseError, compat_realpath from .compat_utils import passthrough_module -from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 +from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index 8956b3bf1..3ca46d270 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -48,7 +48,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la """Passthrough parent module into a child module, creating the parent if necessary""" def __getattr__(attr): if _is_package(parent): - with contextlib.suppress(ImportError): + with contextlib.suppress(ModuleNotFoundError): return importlib.import_module(f'.{attr}', parent.__name__) ret = from_child(attr) diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py index 2adc51374..a50bce4d4 100644 --- a/yt_dlp/dependencies/Cryptodome.py +++ b/yt_dlp/dependencies/Cryptodome.py @@ -1,8 +1,5 @@ import types -from ..compat import functools -from ..compat.compat_utils import passthrough_module - try: import Cryptodome as _parent except ImportError: @@ -12,19 +9,36 @@ except ImportError: _parent = types.ModuleType('no_Cryptodome') __bool__ = lambda: False -passthrough_module(__name__, _parent, (..., '__version__')) -del passthrough_module +__version__ = '' +AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None +try: + if _parent.__name__ == 'Cryptodome': + from Cryptodome import __version__ + from Cryptodome.Cipher import AES + from Cryptodome.Cipher import PKCS1_v1_5 + from Cryptodome.Cipher import Blowfish + from Cryptodome.Cipher import PKCS1_OAEP + from Cryptodome.Hash import SHA1 + from Cryptodome.Hash import CMAC + from Cryptodome.PublicKey import RSA + elif _parent.__name__ == 'Crypto': + from Crypto import __version__ + from Crypto.Cipher import AES + from Crypto.Cipher import PKCS1_v1_5 + from Crypto.Cipher import Blowfish + from Crypto.Cipher import PKCS1_OAEP + from Crypto.Hash import SHA1 + from Crypto.Hash import CMAC + from Crypto.PublicKey import RSA +except ImportError: + __version__ = f'broken {__version__}'.strip() -@property -@functools.cache -def _yt_dlp__identifier(): - if _parent.__name__ == 'Crypto': - from Crypto.Cipher import AES - try: - # In pycrypto, mode defaults to ECB. See: - # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode - AES.new(b'abcdefghijklmnop') - except TypeError: - return 'pycrypto' - return _parent.__name__ +_yt_dlp__identifier = _parent.__name__ +if AES and _yt_dlp__identifier == 'Crypto': + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + AES.new(b'abcdefghijklmnop') + except TypeError: + _yt_dlp__identifier = 'pycrypto' diff --git a/yt_dlp/dependencies/__init__.py b/yt_dlp/dependencies/__init__.py index c2214e6db..6e7d29c5c 100644 --- a/yt_dlp/dependencies/__init__.py +++ b/yt_dlp/dependencies/__init__.py @@ -73,7 +73,7 @@ available_dependencies = {k: v for k, v in all_dependencies.items() if v} # Deprecated -Cryptodome_AES = Cryptodome.Cipher.AES if Cryptodome else None +Cryptodome_AES = Cryptodome.AES __all__ = [ diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 29d6f6241..f2868dc52 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -70,7 +70,7 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: has_ffmpeg = FFmpegFD.available() - no_crypto = not Cryptodome and '#EXT-X-KEY:METHOD=AES-128' in s + no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s if no_crypto and has_ffmpeg: can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' elif no_crypto: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index f4180633a..2252840b3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -894,15 +894,15 @@ class BiliIntlBaseIE(InfoExtractor): } def _perform_login(self, username, password): - if not Cryptodome: + if not Cryptodome.RSA: raise ExtractorError('pycryptodomex not found. Please install', expected=True) key_data = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None, note='Downloading login key', errnote='Unable to download login key')['data'] - public_key = Cryptodome.PublicKey.RSA.importKey(key_data['key']) - password_hash = Cryptodome.Cipher.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + public_key = Cryptodome.RSA.importKey(key_data['key']) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) login_post = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'username': username, diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 96220bea9..fa5ceec95 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -91,7 +91,7 @@ class IviIE(InfoExtractor): for site in (353, 183): content_data = (data % site).encode() if site == 353: - if not Cryptodome: + if not Cryptodome.CMAC: continue timestamp = (self._download_json( @@ -105,8 +105,8 @@ class IviIE(InfoExtractor): query = { 'ts': timestamp, - 'sign': Cryptodome.Hash.CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, - Cryptodome.Cipher.Blowfish).hexdigest(), + 'sign': Cryptodome.CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, + Cryptodome.Blowfish).hexdigest(), } else: query = {} @@ -126,7 +126,7 @@ class IviIE(InfoExtractor): extractor_msg = 'Video %s does not exist' elif site == 353: continue - elif not Cryptodome: + elif not Cryptodome.CMAC: raise ExtractorError('pycryptodomex not found. Please install', expected=True) elif message: extractor_msg += ': ' + message diff --git a/yt_dlp/extractor/wrestleuniverse.py b/yt_dlp/extractor/wrestleuniverse.py index 78e7c83ab..5c6dec2c4 100644 --- a/yt_dlp/extractor/wrestleuniverse.py +++ b/yt_dlp/extractor/wrestleuniverse.py @@ -50,10 +50,10 @@ class WrestleUniverseBaseIE(InfoExtractor): data=data, headers=headers, query=query, fatal=fatal) def _call_encrypted_api(self, video_id, param='', msg='API', data={}, query={}, fatal=True): - if not Cryptodome: + if not Cryptodome.RSA: raise ExtractorError('pycryptodomex not found. Please install', expected=True) - private_key = Cryptodome.PublicKey.RSA.generate(2048) - cipher = Cryptodome.Cipher.PKCS1_OAEP.new(private_key, hashAlgo=Cryptodome.Hash.SHA1) + private_key = Cryptodome.RSA.generate(2048) + cipher = Cryptodome.PKCS1_OAEP.new(private_key, hashAlgo=Cryptodome.SHA1) def decrypt(data): if not data: -- cgit v1.2.3 From f34804b2f920f62a6e893a14a9e2a2144b14dd23 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:34:43 +0530 Subject: [extractor/youtube] Fix 5038f6d713303e0967d002216e7a88652401c22a * [fragment] Fix `request_data` * [youtube] Don't use POST for now. It may be easier to break in future Authored by: bashonly, coletdjnz --- yt_dlp/downloader/fragment.py | 3 ++- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/youtube.py | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 039cb1492..377f138b7 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -466,7 +466,8 @@ class FragmentFD(FileDownloader): for retry in RetryManager(self.params.get('fragment_retries'), error_callback): try: ctx['fragment_count'] = fragment.get('fragment_count') - if not self._download_fragment(ctx, fragment['url'], info_dict, headers): + if not self._download_fragment( + ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): return except (urllib.error.HTTPError, http.client.IncompleteRead) as err: retry.error = err diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 86bef173f..98efe0e9d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -132,6 +132,7 @@ class InfoExtractor: is parsed from a string (in case of fragmented media) for MSS - URL of the ISM manifest. + * request_data Data to send in POST request to the URL * manifest_url The URL of the manifest file in case of fragmented media: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 0227a1f83..f5ffce775 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3778,7 +3778,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if no_audio or no_video: CHUNK_SIZE = 10 << 20 dct.update({ - 'request_data': b'x', 'protocol': 'http_dash_segments', 'fragments': [{ 'url': update_url_query(dct['url'], { -- cgit v1.2.3 From 31e183557fcd1b937582f9429f29207c1261f501 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:50:34 +0530 Subject: [extractor/youtube] Extract channel `view_count` when `/about` tab is passed --- yt_dlp/extractor/youtube.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f5ffce775..d1696349a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4905,6 +4905,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): info['view_count'] = self._get_count(playlist_stats, 1) if info['view_count'] is None: # 0 is allowed info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText') + if info['view_count'] is None: + info['view_count'] = self._get_count(data, ( + 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer', + 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText')) info['playlist_count'] = self._get_count(playlist_stats, 0) if info['playlist_count'] is None: # 0 is allowed @@ -6124,6 +6128,23 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): } }], 'params': {'extract_flat': True}, + }, { + 'url': 'https://www.youtube.com/@3blue1brown/about', + 'info_dict': { + 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'tags': ['Mathematics'], + 'title': '3Blue1Brown - About', + 'uploader_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', + 'channel_follower_count': int, + 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'channel': '3Blue1Brown', + 'uploader': '3Blue1Brown', + 'view_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', + 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + }, + 'playlist_count': 0, }] @classmethod -- cgit v1.2.3 From 5b28cef72db3b531680d89c121631c73ae05354f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 28 Feb 2023 23:31:02 +0530 Subject: [cleanup] Misc --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 + .github/ISSUE_TEMPLATE/2_site_support_request.yml | 2 + .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 + .github/ISSUE_TEMPLATE/4_bug_report.yml | 2 + .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 + .github/ISSUE_TEMPLATE/6_question.yml | 2 + CONTRIBUTING.md | 2 +- Changelog.md | 4 +- README.md | 1 + devscripts/make_issue_template.py | 2 + supportedsites.md | 366 +++++++++++----------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/dependencies/Cryptodome.py | 18 +- yt_dlp/downloader/fragment.py | 2 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/youtube.py | 4 +- 16 files changed, 212 insertions(+), 203 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index e1103fb84..48e8890c5 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -50,6 +50,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 90d7294ac..d43d62f03 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -62,6 +62,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 5b59852c7..352b47242 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -58,6 +58,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index bd4695f87..7588b8ed8 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -43,6 +43,8 @@ body: options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 8c7f315e9..fdda50b7b 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -40,6 +40,8 @@ body: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 4a1344628..56ce74654 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -46,6 +46,8 @@ body: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 551db674e..ae2c45423 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -127,7 +127,7 @@ While these steps won't necessarily ensure that no misuse of the account takes p ### Is the website primarily used for piracy? -We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in deep fake. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). +We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). diff --git a/Changelog.md b/Changelog.md index 8d3ac089c..24bc8a2e2 100644 --- a/Changelog.md +++ b/Changelog.md @@ -50,8 +50,8 @@ * [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) * [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) * [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) -* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) -* **[extractor/youtube] Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) +* [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf) +* [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) * [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) * [extractor/youtube] Handle `consent.youtube` * [extractor/youtube] Support `/live/` URL diff --git a/README.md b/README.md index 9b91775bc..3d3db933a 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ Features marked with a **\*** have been back-ported to youtube-dl Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: +* yt-dlp supports only [Python 3.7+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 1ee00f2b8..39b95c8da 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -24,6 +24,8 @@ VERBOSE_TMPL = ''' options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true + - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" + required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea diff --git a/supportedsites.md b/supportedsites.md index b545ec540..d7ac6dce5 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -28,14 +28,14 @@ - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** - - **AbemaTV**: [abematv] + - **AbemaTV**: [*abematv*](## "netrc machine") - **AbemaTVTitle** - **AcademicEarth:Course** - **acast** - **acast:channel** - **AcFunBangumi** - **AcFunVideo** - - **ADN**: [animationdigitalnetwork] Animation Digital Network + - **ADN**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network - **AdobeConnect** - **adobetv** - **adobetv:channel** @@ -47,8 +47,8 @@ - **aenetworks:collection** - **aenetworks:show** - **AeonCo** - - **afreecatv**: [afreecatv] afreecatv.com - - **afreecatv:live**: [afreecatv] afreecatv.com + - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com + - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv:user** - **AirMozilla** - **AirTV** @@ -59,8 +59,8 @@ - **AlphaPorno** - **Alsace20TV** - **Alsace20TVEmbed** - - **Alura**: [alura] - - **AluraCourse**: [aluracourse] + - **Alura**: [*alura*](## "netrc machine") + - **AluraCourse**: [*aluracourse*](## "netrc machine") - **Amara** - **AmazonMiniTV** - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix @@ -100,7 +100,7 @@ - **ArteTVPlaylist** - **AsianCrush** - **AsianCrushPlaylist** - - **AtresPlayer**: [atresplayer] + - **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtScaleConfEvent** - **ATTTechChannel** - **ATVAt** @@ -128,15 +128,15 @@ - **Bandcamp:user** - **Bandcamp:weekly** - **BannedVideo** - - **bbc**: [bbc] BBC - - **bbc.co.uk**: [bbc] BBC iPlayer + - **bbc**: [*bbc*](## "netrc machine") BBC + - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:​iplayer:episodes** - **bbc.co.uk:​iplayer:group** - **bbc.co.uk:playlist** - - **BBVTV**: [bbvtv] - - **BBVTVLive**: [bbvtv] - - **BBVTVRecordings**: [bbvtv] + - **BBVTV**: [*bbvtv*](## "netrc machine") + - **BBVTVLive**: [*bbvtv*](## "netrc machine") + - **BBVTVRecordings**: [*bbvtv*](## "netrc machine") - **BeatBumpPlaylist** - **BeatBumpVideo** - **Beatport** @@ -165,8 +165,8 @@ - **BilibiliSpaceAudio** - **BilibiliSpacePlaylist** - **BilibiliSpaceVideo** - - **BiliIntl**: [biliintl] - - **biliIntl:series**: [biliintl] + - **BiliIntl**: [*biliintl*](## "netrc machine") + - **biliIntl:series**: [*biliintl*](## "netrc machine") - **BiliLive** - **BioBioChileTV** - **Biography** @@ -232,7 +232,7 @@ - **cbssports:embed** - **CCMA** - **CCTV**: 央视网 - - **CDA**: [cdapl] + - **CDA**: [*cdapl*](## "netrc machine") - **Cellebrite** - **CeskaTelevize** - **CGTN** @@ -286,8 +286,8 @@ - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** - - **crunchyroll**: [crunchyroll] - - **crunchyroll:playlist**: [crunchyroll] + - **crunchyroll**: [*crunchyroll*](## "netrc machine") + - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine") - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 @@ -295,18 +295,18 @@ - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - **CultureUnplugged** - - **curiositystream**: [curiositystream] - - **curiositystream:collections**: [curiositystream] - - **curiositystream:series**: [curiositystream] + - **curiositystream**: [*curiositystream*](## "netrc machine") + - **curiositystream:collections**: [*curiositystream*](## "netrc machine") + - **curiositystream:series**: [*curiositystream*](## "netrc machine") - **CWTV** - - **Cybrary**: [cybrary] - - **CybraryCourse**: [cybrary] + - **Cybrary**: [*cybrary*](## "netrc machine") + - **CybraryCourse**: [*cybrary*](## "netrc machine") - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** - - **dailymotion**: [dailymotion] - - **dailymotion:playlist**: [dailymotion] - - **dailymotion:user**: [dailymotion] + - **dailymotion**: [*dailymotion*](## "netrc machine") + - **dailymotion:playlist**: [*dailymotion*](## "netrc machine") + - **dailymotion:user**: [*dailymotion*](## "netrc machine") - **DailyWire** - **DailyWirePodcast** - **damtomo:record** @@ -328,7 +328,7 @@ - **DeuxMNews** - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Digg** - - **DigitalConcertHall**: [digitalconcerthall] DigitalConcertHall extractor + - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor - **DigitallySpeaking** - **Digiteka** - **Discovery** @@ -351,7 +351,7 @@ - **DRBonanza** - **Drooble** - **Dropbox** - - **Dropout**: [dropout] + - **Dropout**: [*dropout*](## "netrc machine") - **DropoutSeason** - **DrTuber** - **drtv** @@ -373,9 +373,9 @@ - **egghead:lesson**: egghead.io lesson - **ehftv** - **eHow** - - **EinsUndEinsTV**: [1und1tv] - - **EinsUndEinsTVLive**: [1und1tv] - - **EinsUndEinsTVRecordings**: [1und1tv] + - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") + - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") + - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - **Einthusan** - **eitb.tv** - **EllenTube** @@ -390,7 +390,7 @@ - **EpiconSeries** - **Epoch** - **Eporner** - - **EroProfile**: [eroprofile] + - **EroProfile**: [*eroprofile*](## "netrc machine") - **EroProfile:album** - **ertflix**: ERTFLIX videos - **ertflix:codename**: ERTFLIX videos by codename @@ -405,20 +405,20 @@ - **EuropeanTour** - **Eurosport** - **EUScreen** - - **EWETV**: [ewetv] - - **EWETVLive**: [ewetv] - - **EWETVRecordings**: [ewetv] + - **EWETV**: [*ewetv*](## "netrc machine") + - **EWETVLive**: [*ewetv*](## "netrc machine") + - **EWETVRecordings**: [*ewetv*](## "netrc machine") - **ExpoTV** - **Expressen** - **ExtremeTube** - **EyedoTV** - - **facebook**: [facebook] + - **facebook**: [*facebook*](## "netrc machine") - **facebook:reel** - **FacebookPluginsVideo** - - **fancode:live**: [fancode] - - **fancode:vod**: [fancode] + - **fancode:live**: [*fancode*](## "netrc machine") + - **fancode:vod**: [*fancode*](## "netrc machine") - **faz.net** - - **fc2**: [fc2] + - **fc2**: [*fc2*](## "netrc machine") - **fc2:embed** - **fc2:live** - **Fczenit** @@ -452,20 +452,20 @@ - **freespeech.org** - **freetv:series** - **FreeTvMovies** - - **FrontendMasters**: [frontendmasters] - - **FrontendMastersCourse**: [frontendmasters] - - **FrontendMastersLesson**: [frontendmasters] + - **FrontendMasters**: [*frontendmasters*](## "netrc machine") + - **FrontendMastersCourse**: [*frontendmasters*](## "netrc machine") + - **FrontendMastersLesson**: [*frontendmasters*](## "netrc machine") - **FujiTVFODPlus7** - - **Funimation**: [funimation] - - **funimation:page**: [funimation] - - **funimation:show**: [funimation] + - **Funimation**: [*funimation*](## "netrc machine") + - **funimation:page**: [*funimation*](## "netrc machine") + - **funimation:show**: [*funimation*](## "netrc machine") - **Funk** - **Fusion** - **Fux** - **FuyinTV** - **Gab** - **GabTV** - - **Gaia**: [gaia] + - **Gaia**: [*gaia*](## "netrc machine") - **GameInformer** - **GameJolt** - **GameJoltCommunity** @@ -477,9 +477,9 @@ - **GameStar** - **Gaskrank** - **Gazeta** - - **GDCVault**: [gdcvault] + - **GDCVault**: [*gdcvault*](## "netrc machine") - **GediDigital** - - **gem.cbc.ca**: [cbcgem] + - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - **Genius** @@ -489,11 +489,11 @@ - **Gfycat** - **GiantBomb** - **Giga** - - **GlattvisionTV**: [glattvisiontv] - - **GlattvisionTVLive**: [glattvisiontv] - - **GlattvisionTVRecordings**: [glattvisiontv] + - **GlattvisionTV**: [*glattvisiontv*](## "netrc machine") + - **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine") + - **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine") - **Glide**: Glide mobile video messages (glide.me) - - **Globo**: [globo] + - **Globo**: [*globo*](## "netrc machine") - **GloboArticle** - **glomex**: Glomex videos - **glomex:embed**: Glomex embedded videos @@ -507,7 +507,7 @@ - **google:​podcasts:feed** - **GoogleDrive** - **GoogleDrive:Folder** - - **GoPlay**: [goplay] + - **GoPlay**: [*goplay*](## "netrc machine") - **GoPro** - **Goshgay** - **GoToStage** @@ -527,7 +527,7 @@ - **hgtv.com:show** - **HGTVDe** - **HGTVUsa** - - **HiDive**: [hidive] + - **HiDive**: [*hidive*](## "netrc machine") - **HistoricFilms** - **history:player** - **history:topic**: History.com Topic @@ -544,8 +544,8 @@ - **Howcast** - **HowStuffWorks** - **hrfernsehen** - - **HRTi**: [hrti] - - **HRTiPlaylist**: [hrti] + - **HRTi**: [*hrti*](## "netrc machine") + - **HRTiPlaylist**: [*hrti*](## "netrc machine") - **HSEProduct** - **HSEShow** - **html5** @@ -575,19 +575,19 @@ - **Inc** - **IndavideoEmbed** - **InfoQ** - - **Instagram**: [instagram] - - **instagram:story**: [instagram] - - **instagram:tag**: [instagram] Instagram hashtag search URLs - - **instagram:user**: [instagram] Instagram user profile + - **Instagram**: [*instagram*](## "netrc machine") + - **instagram:story**: [*instagram*](## "netrc machine") + - **instagram:tag**: [*instagram*](## "netrc machine") Instagram hashtag search URLs + - **instagram:user**: [*instagram*](## "netrc machine") Instagram user profile - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **InvestigationDiscovery** - - **IPrima**: [iprima] + - **IPrima**: [*iprima*](## "netrc machine") - **IPrimaCNN** - **iq.com**: International version of iQiyi - **iq.com:album** - - **iqiyi**: [iqiyi] 爱奇艺 + - **iqiyi**: [*iqiyi*](## "netrc machine") 爱奇艺 - **IslamChannel** - **IslamChannelSeries** - **IsraelNationalNews** @@ -660,9 +660,9 @@ - **LcpPlay** - **Le**: 乐视网 - **Lecture2Go** - - **Lecturio**: [lecturio] - - **LecturioCourse**: [lecturio] - - **LecturioDeCourse**: [lecturio] + - **Lecturio**: [*lecturio*](## "netrc machine") + - **LecturioCourse**: [*lecturio*](## "netrc machine") + - **LecturioDeCourse**: [*lecturio*](## "netrc machine") - **LEGO** - **Lemonde** - **Lenta** @@ -678,10 +678,10 @@ - **limelight:channel_list** - **LineLive** - **LineLiveChannel** - - **LinkedIn**: [linkedin] - - **linkedin:learning**: [linkedin] - - **linkedin:​learning:course**: [linkedin] - - **LinuxAcademy**: [linuxacademy] + - **LinkedIn**: [*linkedin*](## "netrc machine") + - **linkedin:learning**: [*linkedin*](## "netrc machine") + - **linkedin:​learning:course**: [*linkedin*](## "netrc machine") + - **LinuxAcademy**: [*linuxacademy*](## "netrc machine") - **Liputan6** - **ListenNotes** - **LiTV** @@ -696,8 +696,8 @@ - **LoveHomePorn** - **LRTStream** - **LRTVOD** - - **lynda**: [lynda] lynda.com videos - - **lynda:course**: [lynda] lynda.com online courses + - **lynda**: [*lynda*](## "netrc machine") lynda.com videos + - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses - **m6** - **MagentaMusik360** - **mailru**: Видео@Mail.Ru @@ -767,13 +767,13 @@ - **mixcloud:user** - **MLB** - **MLBArticle** - - **MLBTV**: [mlb] + - **MLBTV**: [*mlb*](## "netrc machine") - **MLBVideo** - **MLSSoccer** - **Mnet** - - **MNetTV**: [mnettv] - - **MNetTVLive**: [mnettv] - - **MNetTVRecordings**: [mnettv] + - **MNetTV**: [*mnettv*](## "netrc machine") + - **MNetTVLive**: [*mnettv*](## "netrc machine") + - **MNetTVRecordings**: [*mnettv*](## "netrc machine") - **MochaVideo** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** @@ -852,9 +852,9 @@ - **ndr:embed** - **ndr:​embed:base** - **NDTV** - - **Nebula**: [watchnebula] - - **nebula:channel**: [watchnebula] - - **nebula:subscriptions**: [watchnebula] + - **Nebula**: [*watchnebula*](## "netrc machine") + - **nebula:channel**: [*watchnebula*](## "netrc machine") + - **nebula:subscriptions**: [*watchnebula*](## "netrc machine") - **NerdCubedFeed** - **netease:album**: 网易云音乐 - 专辑 - **netease:djradio**: 网易云音乐 - 电台 @@ -863,9 +863,9 @@ - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 - - **NetPlusTV**: [netplus] - - **NetPlusTVLive**: [netplus] - - **NetPlusTVRecordings**: [netplus] + - **NetPlusTV**: [*netplus*](## "netrc machine") + - **NetPlusTVLive**: [*netplus*](## "netrc machine") + - **NetPlusTVRecordings**: [*netplus*](## "netrc machine") - **Netverse** - **NetversePlaylist** - **NetverseSearch**: "netsearch:" prefix @@ -898,7 +898,7 @@ - **nickelodeon:br** - **nickelodeonru** - **nicknight** - - **niconico**: [niconico] ニコニコ動画 + - **niconico**: [*niconico*](## "netrc machine") ニコニコ動画 - **niconico:history**: NicoNico user history or likes. Requires cookies. - **niconico:playlist** - **niconico:series** @@ -911,7 +911,7 @@ - **Nitter** - **njoy**: N-JOY - **njoy:embed** - - **NJPWWorld**: [njpwworld] 新日本プロレスワールド + - **NJPWWorld**: [*njpwworld*](## "netrc machine") 新日本プロレスワールド - **NobelPrize** - **NoicePodcast** - **NonkTube** @@ -980,11 +980,11 @@ - **orf:iptv**: iptv.ORF.at - **orf:radio** - **orf:tvthek**: ORF TVthek - - **OsnatelTV**: [osnateltv] - - **OsnatelTVLive**: [osnateltv] - - **OsnatelTVRecordings**: [osnateltv] + - **OsnatelTV**: [*osnateltv*](## "netrc machine") + - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") + - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") - **OutsideTV** - - **PacktPub**: [packtpub] + - **PacktPub**: [*packtpub*](## "netrc machine") - **PacktPubCourse** - **PalcoMP3:artist** - **PalcoMP3:song** @@ -1007,7 +1007,7 @@ - **peer.tv** - **PeerTube** - **PeerTube:Playlist** - - **peloton**: [peloton] + - **peloton**: [*peloton*](## "netrc machine") - **peloton:live**: Peloton Live - **People** - **PerformGroup** @@ -1016,7 +1016,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** - - **Piapro**: [piapro] + - **Piapro**: [*piapro*](## "netrc machine") - **Picarto** - **PicartoVod** - **Piksel** @@ -1027,11 +1027,11 @@ - **pixiv:​sketch:user** - **Pladform** - **PlanetMarathi** - - **Platzi**: [platzi] - - **PlatziCourse**: [platzi] + - **Platzi**: [*platzi*](## "netrc machine") + - **PlatziCourse**: [*platzi*](## "netrc machine") - **play.fm** - **player.sky.it** - - **PlayPlusTV**: [playplustv] + - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlayStuff** - **PlaysTV** - **PlaySuisse** @@ -1039,7 +1039,7 @@ - **Playvid** - **PlayVids** - **Playwire** - - **pluralsight**: [pluralsight] + - **pluralsight**: [*pluralsight*](## "netrc machine") - **pluralsight:course** - **PlutoTV** - **PodbayFM** @@ -1048,8 +1048,8 @@ - **podomatic** - **Pokemon** - **PokemonWatch** - - **PokerGo**: [pokergo] - - **PokerGoCollection**: [pokergo] + - **PokerGo**: [*pokergo*](## "netrc machine") + - **PokerGoCollection**: [*pokergo*](## "netrc machine") - **PolsatGo** - **PolskieRadio** - **polskieradio:audition** @@ -1066,11 +1066,11 @@ - **Pornez** - **PornFlip** - **PornHd** - - **PornHub**: [pornhub] PornHub and Thumbzilla - - **PornHubPagedVideoList**: [pornhub] - - **PornHubPlaylist**: [pornhub] - - **PornHubUser**: [pornhub] - - **PornHubUserVideosUpload**: [pornhub] + - **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla + - **PornHubPagedVideoList**: [*pornhub*](## "netrc machine") + - **PornHubPlaylist**: [*pornhub*](## "netrc machine") + - **PornHubUser**: [*pornhub*](## "netrc machine") + - **PornHubUserVideosUpload**: [*pornhub*](## "netrc machine") - **Pornotube** - **PornoVoisines** - **PornoXO** @@ -1098,9 +1098,9 @@ - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 - - **QuantumTV**: [quantumtv] - - **QuantumTVLive**: [quantumtv] - - **QuantumTVRecordings**: [quantumtv] + - **QuantumTV**: [*quantumtv*](## "netrc machine") + - **QuantumTVLive**: [*quantumtv*](## "netrc machine") + - **QuantumTVRecordings**: [*quantumtv*](## "netrc machine") - **Qub** - **R7** - **R7Article** @@ -1157,16 +1157,16 @@ - **RICE** - **RMCDecouverte** - **RockstarGames** - - **Rokfin**: [rokfin] + - **Rokfin**: [*rokfin*](## "netrc machine") - **rokfin:channel**: Rokfin Channels - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix - **rokfin:stack**: Rokfin Stacks - - **RoosterTeeth**: [roosterteeth] - - **RoosterTeethSeries**: [roosterteeth] + - **RoosterTeeth**: [*roosterteeth*](## "netrc machine") + - **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine") - **RottenTomatoes** - **Rozhlas** - **RozhlasVltava** - - **RTBF**: [rtbf] + - **RTBF**: [*rtbf*](## "netrc machine") - **RTDocumentry** - **RTDocumentryPlaylist** - **rte**: Raidió Teilifís Éireann TV @@ -1208,16 +1208,16 @@ - **Ruutu** - **Ruv** - **ruv.is:spila** - - **safari**: [safari] safaribooksonline.com online video - - **safari:api**: [safari] - - **safari:course**: [safari] safaribooksonline.com online courses + - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video + - **safari:api**: [*safari*](## "netrc machine") + - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses - **Saitosan** - - **SAKTV**: [saktv] - - **SAKTVLive**: [saktv] - - **SAKTVRecordings**: [saktv] - - **SaltTV**: [salttv] - - **SaltTVLive**: [salttv] - - **SaltTVRecordings**: [salttv] + - **SAKTV**: [*saktv*](## "netrc machine") + - **SAKTVLive**: [*saktv*](## "netrc machine") + - **SAKTVRecordings**: [*saktv*](## "netrc machine") + - **SaltTV**: [*salttv*](## "netrc machine") + - **SaltTVLive**: [*salttv*](## "netrc machine") + - **SaltTVRecordings**: [*salttv*](## "netrc machine") - **SampleFocus** - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos @@ -1233,8 +1233,8 @@ - **ScrippsNetworks** - **scrippsnetworks:watch** - **Scrolller** - - **SCTE**: [scte] - - **SCTECourse**: [scte] + - **SCTE**: [*scte*](## "netrc machine") + - **SCTECourse**: [*scte*](## "netrc machine") - **Seeker** - **SenateGov** - **SenateISVP** @@ -1243,7 +1243,7 @@ - **Sexu** - **SeznamZpravy** - **SeznamZpravyArticle** - - **Shahid**: [shahid] + - **Shahid**: [*shahid*](## "netrc machine") - **ShahidShow** - **Shared**: shared.sx - **ShareVideosEmbed** @@ -1273,16 +1273,16 @@ - **Smotrim** - **Snotr** - **Sohu** - - **SonyLIV**: [sonyliv] + - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** - - **soundcloud**: [soundcloud] - - **soundcloud:playlist**: [soundcloud] - - **soundcloud:related**: [soundcloud] - - **soundcloud:search**: [soundcloud] Soundcloud search; "scsearch:" prefix - - **soundcloud:set**: [soundcloud] - - **soundcloud:trackstation**: [soundcloud] - - **soundcloud:user**: [soundcloud] - - **soundcloud:​user:permalink**: [soundcloud] + - **soundcloud**: [*soundcloud*](## "netrc machine") + - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") + - **soundcloud:related**: [*soundcloud*](## "netrc machine") + - **soundcloud:search**: [*soundcloud*](## "netrc machine") Soundcloud search; "scsearch:" prefix + - **soundcloud:set**: [*soundcloud*](## "netrc machine") + - **soundcloud:trackstation**: [*soundcloud*](## "netrc machine") + - **soundcloud:user**: [*soundcloud*](## "netrc machine") + - **soundcloud:​user:permalink**: [*soundcloud*](## "netrc machine") - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** @@ -1349,13 +1349,13 @@ - **Tass** - **TBS** - **TDSLifeway** - - **Teachable**: [teachable] - - **TeachableCourse**: [teachable] + - **Teachable**: [*teachable*](## "netrc machine") + - **TeachableCourse**: [*teachable*](## "netrc machine") - **teachertube**: teachertube.com videos - **teachertube:​user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamTreeHouse**: [teamtreehouse] + - **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine") - **TechTalks** - **techtv.mit.edu** - **TedEmbed** @@ -1378,8 +1378,8 @@ - **TeleTask** - **Telewebion** - **Tempo** - - **TennisTV**: [tennistv] - - **TenPlay**: [10play] + - **TennisTV**: [*tennistv*](## "netrc machine") + - **TenPlay**: [*10play*](## "netrc machine") - **TF1** - **TFO** - **TheHoleTv** @@ -1417,13 +1417,13 @@ - **tokfm:audition** - **tokfm:podcast** - **ToonGoggles** - - **tou.tv**: [toutv] + - **tou.tv**: [*toutv*](## "netrc machine") - **Toypics**: Toypics video - **ToypicsUser**: Toypics user profile - **TrailerAddict**: (**Currently broken**) - **TravelChannel** - - **Triller**: [triller] - - **TrillerUser**: [triller] + - **Triller**: [*triller*](## "netrc machine") + - **TrillerUser**: [*triller*](## "netrc machine") - **Trilulilu** - **Trovo** - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix @@ -1435,11 +1435,11 @@ - **Truth** - **TruTV** - **Tube8** - - **TubeTuGraz**: [tubetugraz] tube.tugraz.at - - **TubeTuGrazSeries**: [tubetugraz] - - **TubiTv**: [tubitv] + - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at + - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine") + - **TubiTv**: [*tubitv*](## "netrc machine") - **TubiTvShow** - - **Tumblr**: [tumblr] + - **Tumblr**: [*tumblr*](## "netrc machine") - **tunein:clip** - **tunein:program** - **tunein:station** @@ -1489,13 +1489,13 @@ - **TwitCasting** - **TwitCastingLive** - **TwitCastingUser** - - **twitch:clips**: [twitch] - - **twitch:stream**: [twitch] - - **twitch:vod**: [twitch] - - **TwitchCollection**: [twitch] - - **TwitchVideos**: [twitch] - - **TwitchVideosClips**: [twitch] - - **TwitchVideosCollections**: [twitch] + - **twitch:clips**: [*twitch*](## "netrc machine") + - **twitch:stream**: [*twitch*](## "netrc machine") + - **twitch:vod**: [*twitch*](## "netrc machine") + - **TwitchCollection**: [*twitch*](## "netrc machine") + - **TwitchVideos**: [*twitch*](## "netrc machine") + - **TwitchVideosClips**: [*twitch*](## "netrc machine") + - **TwitchVideosCollections**: [*twitch*](## "netrc machine") - **twitter** - **twitter:amplify** - **twitter:broadcast** @@ -1503,11 +1503,11 @@ - **twitter:shortener** - **twitter:spaces** - **Txxx** - - **udemy**: [udemy] - - **udemy:course**: [udemy] + - **udemy**: [*udemy*](## "netrc machine") + - **udemy:course**: [*udemy*](## "netrc machine") - **UDNEmbed**: 聯合影音 - - **UFCArabia**: [ufcarabia] - - **UFCTV**: [ufctv] + - **UFCArabia**: [*ufcarabia*](## "netrc machine") + - **UFCTV**: [*ufctv*](## "netrc machine") - **ukcolumn** - **UKTVPlay** - **umg:de**: Universal Music Deutschland @@ -1537,7 +1537,7 @@ - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - - **vhx:embed**: [vimeo] + - **vhx:embed**: [*vimeo*](## "netrc machine") - **Viafree** - **vice** - **vice:article** @@ -1560,25 +1560,25 @@ - **videomore:season** - **videomore:video** - **VideoPress** - - **Vidio**: [vidio] - - **VidioLive**: [vidio] - - **VidioPremier**: [vidio] + - **Vidio**: [*vidio*](## "netrc machine") + - **VidioLive**: [*vidio*](## "netrc machine") + - **VidioPremier**: [*vidio*](## "netrc machine") - **VidLii** - **viewlift** - **viewlift:embed** - **Viidea** - - **viki**: [viki] - - **viki:channel**: [viki] - - **vimeo**: [vimeo] - - **vimeo:album**: [vimeo] - - **vimeo:channel**: [vimeo] - - **vimeo:group**: [vimeo] - - **vimeo:likes**: [vimeo] Vimeo user likes - - **vimeo:ondemand**: [vimeo] - - **vimeo:pro**: [vimeo] - - **vimeo:review**: [vimeo] Review pages on vimeo - - **vimeo:user**: [vimeo] - - **vimeo:watchlater**: [vimeo] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) + - **viki**: [*viki*](## "netrc machine") + - **viki:channel**: [*viki*](## "netrc machine") + - **vimeo**: [*vimeo*](## "netrc machine") + - **vimeo:album**: [*vimeo*](## "netrc machine") + - **vimeo:channel**: [*vimeo*](## "netrc machine") + - **vimeo:group**: [*vimeo*](## "netrc machine") + - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes + - **vimeo:ondemand**: [*vimeo*](## "netrc machine") + - **vimeo:pro**: [*vimeo*](## "netrc machine") + - **vimeo:review**: [*vimeo*](## "netrc machine") Review pages on vimeo + - **vimeo:user**: [*vimeo*](## "netrc machine") + - **vimeo:watchlater**: [*vimeo*](## "netrc machine") Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) - **Vimm:recording** - **Vimm:stream** - **ViMP** @@ -1588,13 +1588,13 @@ - **vine:user** - **Viqeo** - **Viu** - - **viu:ott**: [viu] + - **viu:ott**: [*viu*](## "netrc machine") - **viu:playlist** - **ViuOTTIndonesia** - **Vivo**: vivo.sx - - **vk**: [vk] VK - - **vk:uservideos**: [vk] VK - User's Videos - - **vk:wallpost**: [vk] + - **vk**: [*vk*](## "netrc machine") VK + - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos + - **vk:wallpost**: [*vk*](## "netrc machine") - **vm.tiktok** - **Vocaroo** - **Vodlocker** @@ -1613,14 +1613,14 @@ - **vqq:video** - **Vrak** - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - - **VrtNU**: [vrtnu] VrtNU.be - - **vrv**: [vrv] + - **VrtNU**: [*vrtnu*](## "netrc machine") VrtNU.be + - **vrv**: [*vrv*](## "netrc machine") - **vrv:series** - **VShare** - **VTM** - - **VTXTV**: [vtxtv] - - **VTXTVLive**: [vtxtv] - - **VTXTVRecordings**: [vtxtv] + - **VTXTV**: [*vtxtv*](## "netrc machine") + - **VTXTVLive**: [*vtxtv*](## "netrc machine") + - **VTXTVRecordings**: [*vtxtv*](## "netrc machine") - **VuClip** - **Vupload** - **VVVVID** @@ -1629,9 +1629,9 @@ - **Vzaar** - **Wakanim** - **Walla** - - **WalyTV**: [walytv] - - **WalyTVLive**: [walytv] - - **WalyTVRecordings**: [walytv] + - **WalyTV**: [*walytv*](## "netrc machine") + - **WalyTVLive**: [*walytv*](## "netrc machine") + - **WalyTVRecordings**: [*walytv*](## "netrc machine") - **wasdtv:clip** - **wasdtv:record** - **wasdtv:stream** @@ -1743,13 +1743,13 @@ - **YoutubeLivestreamEmbed**: YouTube livestream embeds - **YoutubeYtBe**: youtu.be - **Zapiks** - - **Zattoo**: [zattoo] - - **ZattooLive**: [zattoo] - - **ZattooMovies**: [zattoo] - - **ZattooRecordings**: [zattoo] + - **Zattoo**: [*zattoo*](## "netrc machine") + - **ZattooLive**: [*zattoo*](## "netrc machine") + - **ZattooMovies**: [*zattoo*](## "netrc machine") + - **ZattooRecordings**: [*zattoo*](## "netrc machine") - **ZDF** - **ZDFChannel** - - **Zee5**: [zee5] + - **Zee5**: [*zee5*](## "netrc machine") - **zee5:series** - **ZeeNews** - **ZenYandex** diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d6c5ce769..00846cd7e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -614,7 +614,7 @@ class YoutubeDL: '\n You will no longer receive updates on this version') if current_version < MIN_SUPPORTED: msg = 'Python version %d.%d is no longer supported' - self.deprecation_warning( + self.deprecated_feature( f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED)) if self.params.get('allow_unplayable_formats'): diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py index a50bce4d4..74ab6575c 100644 --- a/yt_dlp/dependencies/Cryptodome.py +++ b/yt_dlp/dependencies/Cryptodome.py @@ -14,22 +14,14 @@ AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None try: if _parent.__name__ == 'Cryptodome': from Cryptodome import __version__ - from Cryptodome.Cipher import AES - from Cryptodome.Cipher import PKCS1_v1_5 - from Cryptodome.Cipher import Blowfish - from Cryptodome.Cipher import PKCS1_OAEP - from Cryptodome.Hash import SHA1 - from Cryptodome.Hash import CMAC + from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 + from Cryptodome.Hash import CMAC, SHA1 from Cryptodome.PublicKey import RSA elif _parent.__name__ == 'Crypto': from Crypto import __version__ - from Crypto.Cipher import AES - from Crypto.Cipher import PKCS1_v1_5 - from Crypto.Cipher import Blowfish - from Crypto.Cipher import PKCS1_OAEP - from Crypto.Hash import SHA1 - from Crypto.Hash import CMAC - from Crypto.PublicKey import RSA + from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 + from Crypto.Hash import CMAC, SHA1 # noqa: F401 + from Crypto.PublicKey import RSA # noqa: F401 except ImportError: __version__ = f'broken {__version__}'.strip() diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 377f138b7..3dc638f52 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -497,7 +497,7 @@ class FragmentFD(FileDownloader): download_fragment(fragment, ctx_copy) return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') - self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + self.report_warning('The download speed shown is only of one thread. This is a known issue') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: try: for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 98efe0e9d..8ad63b411 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3527,7 +3527,7 @@ class InfoExtractor: desc = '' if cls._NETRC_MACHINE: if markdown: - desc += f' [{cls._NETRC_MACHINE}]' + desc += f' [*{cls._NETRC_MACHINE}*](## "netrc machine")' else: desc += f' [{cls._NETRC_MACHINE}]' if cls.IE_DESC is False: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d1696349a..44e932293 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -956,7 +956,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): @staticmethod def is_music_url(url): - return re.match(r'https?://music\.youtube\.com/', url) is not None + return re.match(r'(https?://)?music\.youtube\.com/', url) is not None def _extract_video(self, renderer): video_id = renderer.get('videoId') @@ -6211,6 +6211,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): original_tab_id, display_id = tab[1:], f'{item_id}{tab}' if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: url = f'{pre}/videos{post}' + if smuggled_data.get('is_music_url'): + self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}') # Handle both video/playlist URLs qs = parse_qs(url) -- cgit v1.2.3 From 7f51861b1820c37b157a239b1fe30628d907c034 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Wed, 1 Mar 2023 07:56:53 +0000 Subject: [extractor/youtube] Detect and break on looping comments (#6301) Fixes https://github.com/yt-dlp/yt-dlp/issues/6290 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 44e932293..b02e0153a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3341,6 +3341,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): comment = self._extract_comment(comment_renderer, parent) if not comment: continue + # Sometimes YouTube may break and give us infinite looping comments. + # See: https://github.com/yt-dlp/yt-dlp/issues/6290 + if comment['id'] in tracker['seen_comment_ids']: + self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.') + yield + else: + tracker['seen_comment_ids'].add(comment['id']) tracker['running_total'] += 1 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 @@ -3365,7 +3372,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): est_total=0, current_page_thread=0, total_parent_comments=0, - total_reply_comments=0) + total_reply_comments=0, + seen_comment_ids=set()) # TODO: Deprecated # YouTube comments have a max depth of 2 -- cgit v1.2.3 From b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e Mon Sep 17 00:00:00 2001 From: bashonly Date: Wed, 1 Mar 2023 06:38:02 -0600 Subject: [extractor/generic] Detect manifest links via extension Authored by: bashonly --- yt_dlp/extractor/generic.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d76ef3e31..49aa5a1f5 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2393,14 +2393,15 @@ class GenericIE(InfoExtractor): self.report_detected('direct video link') headers = smuggled_data.get('http_headers', {}) format_id = str(m.group('format_id')) + ext = determine_ext(url) subtitles = {} - if format_id.endswith('mpegurl'): + if format_id.endswith('mpegurl') or ext == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) info_dict.update(self._fragment_query(url)) - elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): + elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd': formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) info_dict.update(self._fragment_query(url)) - elif format_id == 'f4m': + elif format_id == 'f4m' or ext == 'f4m': formats = self._extract_f4m_formats(url, video_id, headers=headers) else: formats = [{ -- cgit v1.2.3 From 9fddc12ab022a31754e0eaa358fc4e1dfa974587 Mon Sep 17 00:00:00 2001 From: std-move <26625259+std-move@users.noreply.github.com> Date: Thu, 2 Mar 2023 19:33:33 +0100 Subject: [extractor/iprima] Fix extractor (#6291) Authored by: std-move Closes #6187 --- yt_dlp/extractor/iprima.py | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 181820542..e58e9c2ee 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -7,7 +7,8 @@ from ..utils import ( js_to_json, urlencode_postdata, ExtractorError, - parse_qs + parse_qs, + traverse_obj ) @@ -15,8 +16,7 @@ class IPrimaIE(InfoExtractor): _VALID_URL = r'https?://(?!cnn)(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P[^/?#&]+)' _GEO_BYPASS = False _NETRC_MACHINE = 'iprima' - _LOGIN_URL = 'https://auth.iprima.cz/oauth2/login' - _TOKEN_URL = 'https://auth.iprima.cz/oauth2/token' + _AUTH_ROOT = 'https://auth.iprima.cz' access_token = None _TESTS = [{ @@ -67,7 +67,7 @@ class IPrimaIE(InfoExtractor): return login_page = self._download_webpage( - self._LOGIN_URL, None, note='Downloading login page', + f'{self._AUTH_ROOT}/oauth2/login', None, note='Downloading login page', errnote='Downloading login page failed') login_form = self._hidden_inputs(login_page) @@ -76,11 +76,20 @@ class IPrimaIE(InfoExtractor): '_email': username, '_password': password}) - _, login_handle = self._download_webpage_handle( - self._LOGIN_URL, None, data=urlencode_postdata(login_form), + profile_select_html, login_handle = self._download_webpage_handle( + f'{self._AUTH_ROOT}/oauth2/login', None, data=urlencode_postdata(login_form), note='Logging in') - code = parse_qs(login_handle.geturl()).get('code')[0] + # a profile may need to be selected first, even when there is only a single one + if '/profile-select' in login_handle.geturl(): + profile_id = self._search_regex( + r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id') + + login_handle = self._request_webpage( + f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None, + query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile') + + code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0)) if not code: raise ExtractorError('Login failed', expected=True) @@ -89,10 +98,10 @@ class IPrimaIE(InfoExtractor): 'client_id': 'prima_sso', 'grant_type': 'authorization_code', 'code': code, - 'redirect_uri': 'https://auth.iprima.cz/sso/auth-check'} + 'redirect_uri': f'{self._AUTH_ROOT}/sso/auth-check'} token_data = self._download_json( - self._TOKEN_URL, None, + f'{self._AUTH_ROOT}/oauth2/token', None, note='Downloading token', errnote='Downloading token failed', data=urlencode_postdata(token_request_data)) @@ -115,14 +124,22 @@ class IPrimaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_meta( + title = self._html_extract_title(webpage) or self._html_search_meta( ['og:title', 'twitter:title'], webpage, 'title', default=None) video_id = self._search_regex(( r'productId\s*=\s*([\'"])(?Pp\d+)\1', - r'pproduct_id\s*=\s*([\'"])(?Pp\d+)\1'), - webpage, 'real id', group='id') + r'pproduct_id\s*=\s*([\'"])(?Pp\d+)\1', + ), webpage, 'real id', group='id', default=None) + + if not video_id: + nuxt_data = self._search_nuxt_data(webpage, video_id, traverse='data') + video_id = traverse_obj( + nuxt_data, (..., 'content', 'additionals', 'videoPlayId', {str}), get_all=False) + + if not video_id: + self.raise_no_formats('Unable to extract video ID from webpage') metadata = self._download_json( f'https://api.play-backend.iprima.cz/api/v1//products/id-{video_id}/play', -- cgit v1.2.3 From 77d6d136468d0c23c8e79bc937898747804f585a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 3 Mar 2023 03:34:56 -0600 Subject: [extractor/ntvru] Extract HLS and DASH formats (#6403) Closes #5915 Authored by: bashonly --- yt_dlp/extractor/ntvru.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index 8d5877daa..91b7724eb 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -21,6 +21,7 @@ class NTVRuIE(InfoExtractor): 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', 'thumbnail': r're:^http://.*\.jpg', 'duration': 136, + 'view_count': int, }, }, { 'url': 'http://www.ntv.ru/video/novosti/750370/', @@ -32,6 +33,7 @@ class NTVRuIE(InfoExtractor): 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', 'thumbnail': r're:^http://.*\.jpg', 'duration': 172, + 'view_count': int, }, }, { 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', @@ -43,6 +45,7 @@ class NTVRuIE(InfoExtractor): 'description': '«Сегодня». 21 марта 2014 года. 16:00', 'thumbnail': r're:^http://.*\.jpg', 'duration': 1496, + 'view_count': int, }, }, { 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/', @@ -54,6 +57,7 @@ class NTVRuIE(InfoExtractor): 'description': 'Остросюжетный фильм «Кома»', 'thumbnail': r're:^http://.*\.jpg', 'duration': 5592, + 'view_count': int, }, }, { 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', @@ -65,6 +69,7 @@ class NTVRuIE(InfoExtractor): 'description': '«Дело врачей»: «Деревце жизни»', 'thumbnail': r're:^http://.*\.jpg', 'duration': 2590, + 'view_count': int, }, }, { # Schemeless file URL @@ -115,6 +120,14 @@ class NTVRuIE(InfoExtractor): 'url': file_, 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)), }) + hls_manifest = xpath_text(video, './playback/hls') + if hls_manifest: + formats.extend(self._extract_m3u8_formats( + hls_manifest, video_id, m3u8_id='hls', fatal=False)) + dash_manifest = xpath_text(video, './playback/dash') + if dash_manifest: + formats.extend(self._extract_mpd_formats( + dash_manifest, video_id, mpd_id='dash', fatal=False)) return { 'id': xpath_text(video, './id'), -- cgit v1.2.3 From 2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 3 Mar 2023 03:37:23 -0600 Subject: [extractor/mediastream] Improve WinSports support (#6401) Closes #6360 Authored by: bashonly --- yt_dlp/extractor/mediastream.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index 4d3949527..e8d427a31 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -1,7 +1,13 @@ import re from .common import InfoExtractor -from ..utils import clean_html, get_element_html_by_class +from ..utils import ( + remove_end, + str_or_none, + strip_or_none, + traverse_obj, + urljoin, +) class MediaStreamIE(InfoExtractor): @@ -117,39 +123,56 @@ class MediaStreamIE(InfoExtractor): class WinSportsVideoIE(InfoExtractor): - _VALID_URL = r'https?://www\.winsports\.co/videos/(?P[\w-]+)-(?P\d+)' + _VALID_URL = r'https?://www\.winsports\.co/videos/(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536', 'info_dict': { 'id': '62dc8357162c4b0821fcfb3c', - 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco', + 'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536', 'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco', 'description': 'md5:eb811b2b2882bdc59431732c06b905f2', 'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c', 'ext': 'mp4', }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548', 'info_dict': { 'id': '62dcb875ef12a5526790b552', - 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional', + 'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548', 'title': 'Observa aquí los goles del empate entre Tolima y Nacional', 'description': 'md5:b19402ba6e46558b93fd24b873eea9c9', 'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552', 'ext': 'mp4', }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.winsports.co/videos/equidad-vuelve-defender-su-arco-de-remates-de-junior', + 'info_dict': { + 'id': '63fa7eca72f1741ad3a4d515', + 'display_id': 'equidad-vuelve-defender-su-arco-de-remates-de-junior', + 'title': '⚽ Equidad vuelve a defender su arco de remates de Junior', + 'description': 'Remate de Sierra', + 'thumbnail': r're:^https?://[^?#]+63fa7eca72f1741ad3a4d515', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): - display_id, video_id = self._match_valid_url(url).group('display_id', 'id') + display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - + json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}) media_setting_json = self._search_json( r']+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id) - mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id'] + mediastream_id = traverse_obj( + media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}), + get_all=False) or json_ld.get('url') + if not mediastream_id: + self.raise_no_formats('No MediaStream embed found in webpage') return self.url_result( - f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True, - display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage))) + urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True, + display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports'))) -- cgit v1.2.3 From 40d77d89027cd0e0ce31d22aec81db3e1d433900 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 3 Mar 2023 03:42:54 -0600 Subject: [extractor/yle_areena] Extract non-Kaltura videos (#6402) Closes #6066 Authored by: bashonly --- yt_dlp/extractor/yle_areena.py | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/yle_areena.py b/yt_dlp/extractor/yle_areena.py index 98d3b1949..c5b45f0cb 100644 --- a/yt_dlp/extractor/yle_areena.py +++ b/yt_dlp/extractor/yle_areena.py @@ -61,7 +61,22 @@ class YleAreenaIE(InfoExtractor): 'age_limit': 0, 'webpage_url': 'https://areena.yle.fi/1-2158940' } - } + }, + { + 'url': 'https://areena.yle.fi/1-64829589', + 'info_dict': { + 'id': '1-64829589', + 'ext': 'mp4', + 'title': 'HKO & Mälkki & Tanner', + 'description': 'md5:b4f1b1af2c6569b33f75179a86eea156', + 'series': 'Helsingin kaupunginorkesterin konsertteja', + 'thumbnail': r're:^https?://.+\.jpg$', + 'release_date': '20230120', + }, + 'params': { + 'skip_download': 'm3u8', + }, + }, ] def _real_extract(self, url): @@ -91,12 +106,22 @@ class YleAreenaIE(InfoExtractor): 'name': sub.get('kind'), }) + kaltura_id = traverse_obj(video_data, ('data', 'ongoing_ondemand', 'kaltura', 'id'), expected_type=str) + if kaltura_id: + info_dict = { + '_type': 'url_transparent', + 'url': smuggle_url(f'kaltura:1955031:{kaltura_id}', {'source_url': url}), + 'ie_key': KalturaIE.ie_key(), + } + else: + info_dict = { + 'id': video_id, + 'formats': self._extract_m3u8_formats( + video_data['data']['ongoing_ondemand']['manifest_url'], video_id, 'mp4', m3u8_id='hls'), + } + return { - '_type': 'url_transparent', - 'url': smuggle_url( - f'kaltura:1955031:{video_data["data"]["ongoing_ondemand"]["kaltura"]["id"]}', - {'source_url': url}), - 'ie_key': KalturaIE.ie_key(), + **info_dict, 'title': (traverse_obj(video_data, ('data', 'ongoing_ondemand', 'title', 'fin'), expected_type=str) or episode or info.get('title')), 'description': description, -- cgit v1.2.3 From 9acf1ee25f7ad3920ede574a9de95b8c18626af4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 3 Mar 2023 16:48:54 +0530 Subject: [jsinterp] Handle `Date` at epoch 0 Closes #6400 --- test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 3203538bb..336e80291 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -66,6 +66,10 @@ _SIG_TESTS = [ ] _NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js', + 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg', + ), ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index c2d056aa1..31ab204d7 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -355,11 +355,11 @@ class JSInterpreter: obj = expr[4:] if obj.startswith('Date('): left, right = self._separate_at_paren(obj[4:]) - expr = unified_timestamp( + date = unified_timestamp( self.interpret_expression(left, local_vars, allow_recursion), False) - if not expr: + if date is None: raise self.Exception(f'Failed to parse date {left!r}', expr) - expr = self._dump(int(expr * 1000), local_vars) + right + expr = self._dump(int(date * 1000), local_vars) + right else: raise self.Exception(f'Unsupported object {obj}', expr) -- cgit v1.2.3 From d400e261cf029a3f20d364113b14de973be75404 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 3 Mar 2023 22:31:41 +0530 Subject: [devscripts] Script to generate changelog (#6220) Authored by: Grub4K --- README.md | 7 +- devscripts/changelog_override.json | 1 + devscripts/changelog_override.schema.json | 96 ++++++ devscripts/make_changelog.py | 491 ++++++++++++++++++++++++++++++ 4 files changed, 593 insertions(+), 2 deletions(-) create mode 100644 devscripts/changelog_override.json create mode 100644 devscripts/changelog_override.schema.json create mode 100644 devscripts/make_changelog.py diff --git a/README.md b/README.md index 3d3db933a..ddd71eeeb 100644 --- a/README.md +++ b/README.md @@ -311,10 +311,13 @@ If you wish to build it anyway, install Python and py2exe, and then simply run ` ### Related scripts -* **`devscripts/update-version.py [revision]`** - Update the version number based on current date -* **`devscripts/set-variant.py variant [-M update_message]`** - Set the build variant of the executable +* **`devscripts/update-version.py`** - Update the version number based on current date. +* **`devscripts/set-variant.py`** - Set the build variant of the executable. +* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. +Note: See their `--help` for more info. + You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release # USAGE AND OPTIONS diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/devscripts/changelog_override.json @@ -0,0 +1 @@ +{} diff --git a/devscripts/changelog_override.schema.json b/devscripts/changelog_override.schema.json new file mode 100644 index 000000000..9bd747b70 --- /dev/null +++ b/devscripts/changelog_override.schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft/2020-12/schema", + "type": "array", + "uniqueItems": true, + "items": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "action": { + "enum": [ + "add" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "short" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "remove" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + } + }, + "required": [ + "action", + "hash" + ] + }, + { + "type": "object", + "properties": { + "action": { + "enum": [ + "change" + ] + }, + "when": { + "type": "string", + "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" + }, + "hash": { + "type": "string", + "pattern": "^[0-9a-f]{40}$" + }, + "short": { + "type": "string" + }, + "authors": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "action", + "hash", + "short", + "authors" + ] + } + ] + } +} diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py new file mode 100644 index 000000000..b66181b53 --- /dev/null +++ b/devscripts/make_changelog.py @@ -0,0 +1,491 @@ +from __future__ import annotations + +import enum +import itertools +import json +import logging +import re +import subprocess +import sys +from collections import defaultdict +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path + +BASE_URL = 'https://github.com' +LOCATION_PATH = Path(__file__).parent + +logger = logging.getLogger(__name__) + + +class CommitGroup(enum.Enum): + UPSTREAM = None + PRIORITY = 'Important' + CORE = 'Core' + EXTRACTOR = 'Extractor' + DOWNLOADER = 'Downloader' + POSTPROCESSOR = 'Postprocessor' + MISC = 'Misc.' + + @classmethod + @lru_cache + def commit_lookup(cls): + return { + name: group + for group, names in { + cls.PRIORITY: {''}, + cls.UPSTREAM: {'upstream'}, + cls.CORE: { + 'aes', + 'cache', + 'compat_utils', + 'compat', + 'cookies', + 'core', + 'dependencies', + 'jsinterp', + 'outtmpl', + 'plugins', + 'update', + 'utils', + }, + cls.MISC: { + 'build', + 'cleanup', + 'devscripts', + 'docs', + 'misc', + 'test', + }, + cls.EXTRACTOR: {'extractor', 'extractors'}, + cls.DOWNLOADER: {'downloader'}, + cls.POSTPROCESSOR: {'postprocessor'}, + }.items() + for name in names + } + + @classmethod + def get(cls, value): + result = cls.commit_lookup().get(value) + if result: + logger.debug(f'Mapped {value!r} => {result.name}') + return result + + +@dataclass +class Commit: + hash: str | None + short: str + authors: list[str] + + def __str__(self): + result = f'{self.short!r}' + + if self.hash: + result += f' ({self.hash[:7]})' + + if self.authors: + authors = ', '.join(self.authors) + result += f' by {authors}' + + return result + + +@dataclass +class CommitInfo: + details: str | None + sub_details: tuple[str, ...] + message: str + issues: list[str] + commit: Commit + fixes: list[Commit] + + def key(self): + return ((self.details or '').lower(), self.sub_details, self.message) + + +class Changelog: + MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) + + def __init__(self, groups, repo): + self._groups = groups + self._repo = repo + + def __str__(self): + return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') + + def _format_groups(self, groups): + for item in CommitGroup: + group = groups[item] + if group: + yield self.format_module(item.value, group) + + def format_module(self, name, group): + result = f'\n#### {name} changes\n' if name else '\n' + return result + '\n'.join(self._format_group(group)) + + def _format_group(self, group): + sorted_group = sorted(group, key=CommitInfo.key) + detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) + for details, items in detail_groups: + if not details: + indent = '' + else: + yield f'- {details}' + indent = '\t' + + if details == 'cleanup': + items, cleanup_misc_items = self._filter_cleanup_misc_items(items) + + sub_detail_groups = itertools.groupby(items, lambda item: item.sub_details) + for sub_details, entries in sub_detail_groups: + if not sub_details: + for entry in entries: + yield f'{indent}- {self.format_single_change(entry)}' + continue + + prefix = f'{indent}- {", ".join(sub_details)}' + entries = list(entries) + if len(entries) == 1: + yield f'{prefix}: {self.format_single_change(entries[0])}' + continue + + yield prefix + for entry in entries: + yield f'{indent}\t- {self.format_single_change(entry)}' + + if details == 'cleanup' and cleanup_misc_items: + yield from self._format_cleanup_misc_sub_group(cleanup_misc_items) + + def _filter_cleanup_misc_items(self, items): + cleanup_misc_items = defaultdict(list) + non_misc_items = [] + for item in items: + if self.MISC_RE.search(item.message): + cleanup_misc_items[tuple(item.commit.authors)].append(item) + else: + non_misc_items.append(item) + + return non_misc_items, cleanup_misc_items + + def _format_cleanup_misc_sub_group(self, group): + prefix = '\t- Miscellaneous' + if len(group) == 1: + yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}' + return + + yield prefix + for message in self._format_cleanup_misc_items(group): + yield f'\t\t- {message}' + + def _format_cleanup_misc_items(self, group): + for authors, infos in group.items(): + message = ', '.join( + self._format_message_link(None, info.commit.hash) + for info in sorted(infos, key=lambda item: item.commit.hash or '')) + yield f'{message} by {self._format_authors(authors)}' + + def format_single_change(self, info): + message = self._format_message_link(info.message, info.commit.hash) + if info.issues: + message = f'{message} ({self._format_issues(info.issues)})' + + if info.commit.authors: + message = f'{message} by {self._format_authors(info.commit.authors)}' + + if info.fixes: + fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) + + authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) + if authors != info.commit.authors: + fix_message = f'{fix_message} by {self._format_authors(authors)}' + + message = f'{message} (With fixes in {fix_message})' + + return message + + def _format_message_link(self, message, hash): + assert message or hash, 'Improperly defined commit message or override' + message = message if message else hash[:7] + return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + + def _format_issues(self, issues): + return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) + + @staticmethod + def _format_authors(authors): + return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) + + @property + def repo_url(self): + return f'{BASE_URL}/{self._repo}' + + +class CommitRange: + COMMAND = 'git' + COMMIT_SEPARATOR = '-----' + + AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) + MESSAGE_RE = re.compile(r''' + (?:\[ + (?P[^\]\/:,]+) + (?:/(?P
[^\]:,]+))? + (?:[:,](?P[^\]]+))? + \]\ )? + (?:`?(?P[^:`]+)`?: )? + (?P.+?) + (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? + ''', re.VERBOSE | re.DOTALL) + EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) + FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+for)?|Revert)\s+([\da-f]{40})') + UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') + + def __init__(self, start, end, default_author=None) -> None: + self._start = start + self._end = end + self._commits, self._fixes = self._get_commits_and_fixes(default_author) + self._commits_added = [] + + @classmethod + def from_single(cls, commitish='HEAD', default_author=None): + start_commitish = cls.get_prev_tag(commitish) + end_commitish = cls.get_next_tag(commitish) + if start_commitish == end_commitish: + start_commitish = cls.get_prev_tag(f'{commitish}~') + logger.info(f'Determined range from {commitish!r}: {start_commitish}..{end_commitish}') + return cls(start_commitish, end_commitish, default_author) + + @classmethod + def get_prev_tag(cls, commitish): + command = [cls.COMMAND, 'describe', '--tags', '--abbrev=0', '--exclude=*[^0-9.]*', commitish] + return subprocess.check_output(command, text=True).strip() + + @classmethod + def get_next_tag(cls, commitish): + result = subprocess.run( + [cls.COMMAND, 'describe', '--contains', '--abbrev=0', commitish], + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) + if result.returncode: + return 'HEAD' + + return result.stdout.partition('~')[0].strip() + + def __iter__(self): + return iter(itertools.chain(self._commits.values(), self._commits_added)) + + def __len__(self): + return len(self._commits) + len(self._commits_added) + + def __contains__(self, commit): + if isinstance(commit, Commit): + if not commit.hash: + return False + commit = commit.hash + + return commit in self._commits + + def _is_ancestor(self, commitish): + return bool(subprocess.call( + [self.COMMAND, 'merge-base', '--is-ancestor', commitish, self._start])) + + def _get_commits_and_fixes(self, default_author): + result = subprocess.check_output([ + self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', + f'{self._start}..{self._end}'], text=True) + + commits = {} + fixes = defaultdict(list) + lines = iter(result.splitlines(False)) + for line in lines: + commit_hash = line + short = next(lines) + skip = short.startswith('Release ') or short == '[version] update' + + authors = [default_author] if default_author else [] + for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): + match = self.AUTHOR_INDICATOR_RE.match(line) + if match: + authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + + commit = Commit(commit_hash, short, authors) + if skip: + logger.debug(f'Skipped commit: {commit}') + continue + + fix_match = self.FIXES_RE.search(commit.short) + if fix_match: + commitish = fix_match.group(1) + fixes[commitish].append(commit) + + commits[commit.hash] = commit + + for commitish, fix_commits in fixes.items(): + if commitish in commits: + hashes = ', '.join(commit.hash[:7] for commit in fix_commits) + logger.info(f'Found fix(es) for {commitish[:7]}: {hashes}') + for fix_commit in fix_commits: + del commits[fix_commit.hash] + else: + logger.debug(f'Commit with fixes not in changes: {commitish[:7]}') + + return commits, fixes + + def apply_overrides(self, overrides): + for override in overrides: + when = override.get('when') + if when and when not in self and when != self._start: + logger.debug(f'Ignored {when!r}, not in commits {self._start!r}') + continue + + override_hash = override.get('hash') + if override['action'] == 'add': + commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) + logger.info(f'ADD {commit}') + self._commits_added.append(commit) + + elif override['action'] == 'remove': + if override_hash in self._commits: + logger.info(f'REMOVE {self._commits[override_hash]}') + del self._commits[override_hash] + + elif override['action'] == 'change': + if override_hash not in self._commits: + continue + commit = Commit(override_hash, override['short'], override['authors']) + logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') + self._commits[commit.hash] = commit + + self._commits = {key: value for key, value in reversed(self._commits.items())} + + def groups(self): + groups = defaultdict(list) + for commit in self: + upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short) + if upstream_re: + commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}' + + match = self.MESSAGE_RE.fullmatch(commit.short) + if not match: + logger.error(f'Error parsing short commit message: {commit.short!r}') + continue + + prefix, details, sub_details, sub_details_alt, message, issues = match.groups() + group = None + if prefix: + if prefix == 'priority': + prefix, _, details = (details or '').partition('/') + logger.debug(f'Priority: {message!r}') + group = CommitGroup.PRIORITY + + if not details and prefix: + if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'): + logger.debug(f'Replaced details with {prefix!r}') + details = prefix or None + + if details == 'common': + details = None + + if details: + details = details.strip() + + else: + group = CommitGroup.CORE + + sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.lower().replace(':', ',') + sub_details = tuple(filter(None, map(str.strip, sub_details.split(',')))) + + issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] + + if not group: + group = CommitGroup.get(prefix.lower()) + if not group: + if self.EXTRACTOR_INDICATOR_RE.search(commit.short): + group = CommitGroup.EXTRACTOR + else: + group = CommitGroup.POSTPROCESSOR + logger.warning(f'Failed to map {commit.short!r}, selected {group.name}') + + commit_info = CommitInfo( + details, sub_details, message.strip(), + issues, commit, self._fixes[commit.hash]) + logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') + groups[group].append(commit_info) + + return groups + + +def get_new_contributors(contributors_path, commits): + contributors = set() + if contributors_path.exists(): + with contributors_path.open() as file: + for line in filter(None, map(str.strip, file)): + author, _, _ = line.partition(' (') + authors = author.split('/') + contributors.update(map(str.casefold, authors)) + + new_contributors = set() + for commit in commits: + for author in commit.authors: + author_folded = author.casefold() + if author_folded not in contributors: + contributors.add(author_folded) + new_contributors.add(author) + + return sorted(new_contributors, key=str.casefold) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser( + description='Create a changelog markdown from a git commit range') + parser.add_argument( + 'commitish', default='HEAD', nargs='?', + help='The commitish to create the range from (default: %(default)s)') + parser.add_argument( + '-v', '--verbosity', action='count', default=0, + help='increase verbosity (can be used twice)') + parser.add_argument( + '-c', '--contributors', action='store_true', + help='update CONTRIBUTORS file (default: %(default)s)') + parser.add_argument( + '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', + help='path to the CONTRIBUTORS file') + parser.add_argument( + '--no-override', action='store_true', + help='skip override json in commit generation (default: %(default)s)') + parser.add_argument( + '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', + help='path to the changelog_override.json file') + parser.add_argument( + '--default-author', default='pukkandan', + help='the author to use without a author indicator (default: %(default)s)') + parser.add_argument( + '--repo', default='yt-dlp/yt-dlp', + help='the github repository to use for the operations (default: %(default)s)') + args = parser.parse_args() + + logging.basicConfig( + datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', + level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) + + commits = CommitRange.from_single(args.commitish, args.default_author) + + if not args.no_override: + if args.override_path.exists(): + with args.override_path.open() as file: + overrides = json.load(file) + commits.apply_overrides(overrides) + else: + logger.warning(f'File {args.override_path.as_posix()} does not exist') + + logger.info(f'Loaded {len(commits)} commits') + + new_contributors = get_new_contributors(args.contributors_path, commits) + if new_contributors: + if args.contributors: + with args.contributors_path.open('a') as file: + file.writelines(f'{contributor}\n' for contributor in new_contributors) + logger.info(f'New contributors: {", ".join(new_contributors)}') + + print(Changelog(commits.groups(), args.repo)) -- cgit v1.2.3 From 29cb20bd563c02671b31dd840139e93dd37150a1 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 3 Mar 2023 22:33:12 +0530 Subject: [build] Automated builds and nightly releases (#6220) Closes #1839 Authored by: Grub4K, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- .github/workflows/build.yml | 555 +++++++++++++++------------------- .github/workflows/publish.yml | 80 +++++ .github/workflows/release-nightly.yml | 49 +++ .github/workflows/release.yml | 125 ++++++++ Changelog.md | 8 +- README.md | 8 +- devscripts/make_readme.py | 22 +- devscripts/update-version.py | 46 ++- yt_dlp/YoutubeDL.py | 6 +- 9 files changed, 559 insertions(+), 340 deletions(-) create mode 100644 .github/workflows/publish.yml create mode 100644 .github/workflows/release-nightly.yml create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6041376a4..2183903ea 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,393 +1,338 @@ -name: Build -on: workflow_dispatch +name: Build Artifacts +on: + workflow_call: + inputs: + version: + required: true + type: string + channel: + required: false + default: stable + type: string + unix: + default: true + type: boolean + linux_arm: + default: true + type: boolean + macos: + default: true + type: boolean + macos_legacy: + default: true + type: boolean + windows: + default: true + type: boolean + windows32: + default: true + type: boolean + meta_files: + default: true + type: boolean + + workflow_dispatch: + inputs: + version: + description: Version tag (YYYY.MM.DD[.REV]) + required: true + type: string + channel: + description: Update channel (stable/nightly) + required: true + default: stable + type: string + unix: + description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip + default: true + type: boolean + linux_arm: + description: yt-dlp_linux_aarch64, yt-dlp_linux_armv7l + default: true + type: boolean + macos: + description: yt-dlp_macos, yt-dlp_macos.zip + default: true + type: boolean + macos_legacy: + description: yt-dlp_macos_legacy + default: true + type: boolean + windows: + description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip + default: true + type: boolean + windows32: + description: yt-dlp_x86.exe + default: true + type: boolean + meta_files: + description: SHA2-256SUMS, SHA2-512SUMS, _update_spec + default: true + type: boolean + permissions: contents: read jobs: - prepare: - permissions: - contents: write # for push_release - runs-on: ubuntu-latest - outputs: - version_suffix: ${{ steps.version_suffix.outputs.version_suffix }} - ytdlp_version: ${{ steps.bump_version.outputs.ytdlp_version }} - head_sha: ${{ steps.push_release.outputs.head_sha }} - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Set version suffix - id: version_suffix - env: - PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }} - if: "env.PUSH_VERSION_COMMIT == ''" - run: echo "version_suffix=$(date -u +"%H%M%S")" >> "$GITHUB_OUTPUT" - - name: Bump version - id: bump_version - run: | - python devscripts/update-version.py ${{ steps.version_suffix.outputs.version_suffix }} - make issuetemplates - - - name: Push to release - id: push_release - run: | - git config --global user.name github-actions - git config --global user.email github-actions@example.com - git add -u - git commit -m "[version] update" -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" - git push origin --force ${{ github.event.ref }}:release - echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" - - name: Update master - env: - PUSH_VERSION_COMMIT: ${{ secrets.PUSH_VERSION_COMMIT }} - if: "env.PUSH_VERSION_COMMIT != ''" - run: git push origin ${{ github.event.ref }} - - - build_unix: - needs: prepare + unix: + if: inputs.unix runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - uses: conda-incubator/setup-miniconda@v2 - with: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + - uses: conda-incubator/setup-miniconda@v2 + with: miniforge-variant: Mambaforge use-mamba: true channels: conda-forge auto-update-conda: true - activate-environment: '' + activate-environment: "" auto-activate-base: false - - name: Install Requirements - run: | + - name: Install Requirements + run: | sudo apt-get -y install zip pandoc man sed - python -m pip install -U pip setuptools wheel twine + python -m pip install -U pip setuptools wheel python -m pip install -U Pyinstaller -r requirements.txt reqs=$(mktemp) echo -e 'python=3.10.*\npyinstaller' >$reqs sed 's/^brotli.*/brotli-python/' >$reqs mamba create -n build --file $reqs - - name: Prepare - run: | - python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python devscripts/make_lazy_extractors.py - - name: Build Unix platform-independent binary - run: | + - name: Build Unix platform-independent binary + run: | make all tar - - name: Build Unix standalone binary - shell: bash -l {0} - run: | + - name: Build Unix standalone binary + shell: bash -l {0} + run: | unset LD_LIBRARY_PATH # Harmful; set by setup-python conda activate build python pyinst.py --onedir (cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .) python pyinst.py - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - yt-dlp - yt-dlp.tar.gz - dist/yt-dlp_linux - dist/yt-dlp_linux.zip - - - name: Build and publish on PyPi - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - if: "env.TWINE_PASSWORD != ''" - run: | - rm -rf dist/* - python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" - python setup.py sdist bdist_wheel - twine upload dist/* - - - name: Install SSH private key for Homebrew - env: - BREW_TOKEN: ${{ secrets.BREW_TOKEN }} - if: "env.BREW_TOKEN != ''" - uses: yt-dlp/ssh-agent@v0.5.3 - with: - ssh-private-key: ${{ env.BREW_TOKEN }} - - name: Update Homebrew Formulae - env: - BREW_TOKEN: ${{ secrets.BREW_TOKEN }} - if: "env.BREW_TOKEN != ''" - run: | - git clone git@github.com:yt-dlp/homebrew-taps taps/ - python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.ytdlp_version }}" - git -C taps/ config user.name github-actions - git -C taps/ config user.email github-actions@example.com - git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.ytdlp_version }}' - git -C taps/ push - - - build_linux_arm: + mv ./dist/yt-dlp_linux ./yt-dlp_linux + mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + yt-dlp + yt-dlp.tar.gz + yt-dlp_linux + yt-dlp_linux.zip + + linux_arm: + if: inputs.linux_arm permissions: - packages: write # for Creating cache + contents: read + packages: write # for creating cache runs-on: ubuntu-latest - needs: prepare strategy: matrix: architecture: - - armv7 - - aarch64 + - armv7 + - aarch64 steps: - - uses: actions/checkout@v3 - with: - path: ./repo - - name: Virtualized Install, Prepare & Build - uses: yt-dlp/run-on-arch-action@v2 - with: - githubToken: ${{ github.token }} # To cache image - arch: ${{ matrix.architecture }} - distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS - dockerRunArgs: --volume "${PWD}/repo:/repo" - install: | # Installing Python 3.10 from the Deadsnakes repo raises errors - apt update - apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip - python3.8 -m pip install -U pip setuptools wheel - # Cannot access requirements.txt from the repo directory at this stage - python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi - - run: | - cd repo - python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date - python3.8 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} - python3.8 devscripts/make_lazy_extractors.py - python3.8 pyinst.py - - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | # run-on-arch-action designates armv7l as armv7 - repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} - - - build_macos: + - uses: actions/checkout@v3 + with: + path: ./repo + - name: Virtualized Install, Prepare & Build + uses: yt-dlp/run-on-arch-action@v2 + with: + # Ref: https://github.com/uraimo/run-on-arch-action/issues/55 + env: | + GITHUB_WORKFLOW: build + githubToken: ${{ github.token }} # To cache image + arch: ${{ matrix.architecture }} + distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + dockerRunArgs: --volume "${PWD}/repo:/repo" + install: | # Installing Python 3.10 from the Deadsnakes repo raises errors + apt update + apt -y install zlib1g-dev python3.8 python3.8-dev python3.8-distutils python3-pip + python3.8 -m pip install -U pip setuptools wheel + # Cannot access requirements.txt from the repo directory at this stage + python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi + + run: | + cd repo + python3.8 -m pip install -U Pyinstaller -r requirements.txt # Cached version may be out of date + python3.8 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} + python3.8 devscripts/make_lazy_extractors.py + python3.8 pyinst.py + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | # run-on-arch-action designates armv7l as armv7 + repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} + + macos: + if: inputs.macos runs-on: macos-11 - needs: prepare steps: - - uses: actions/checkout@v3 - # NB: In order to create a universal2 application, the version of python3 in /usr/bin has to be used - - name: Install Requirements - run: | + - uses: actions/checkout@v3 + # NB: In order to create a universal2 application, the version of python3 in /usr/bin has to be used + - name: Install Requirements + run: | brew install coreutils /usr/bin/python3 -m pip install -U --user pip Pyinstaller -r requirements.txt - - name: Prepare - run: | - /usr/bin/python3 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + /usr/bin/python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} /usr/bin/python3 devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | /usr/bin/python3 pyinst.py --target-architecture universal2 --onedir (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) /usr/bin/python3 pyinst.py --target-architecture universal2 - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp_macos - dist/yt-dlp_macos.zip + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp_macos + dist/yt-dlp_macos.zip - - build_macos_legacy: + macos_legacy: + if: inputs.macos_legacy runs-on: macos-latest - needs: prepare steps: - - uses: actions/checkout@v3 - - name: Install Python - # We need the official Python, because the GA ones only support newer macOS versions - env: - PYTHON_VERSION: 3.10.5 - MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools - run: | + - uses: actions/checkout@v3 + - name: Install Python + # We need the official Python, because the GA ones only support newer macOS versions + env: + PYTHON_VERSION: 3.10.5 + MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools + run: | # Hack to get the latest patch version. Uncomment if needed #brew install python@3.10 #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg" sudo installer -pkg python.pkg -target / python3 --version - - name: Install Requirements - run: | + - name: Install Requirements + run: | brew install coreutils python3 -m pip install -U --user pip Pyinstaller -r requirements.txt - - name: Prepare - run: | - python3 devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python3 devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python3 devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | python3 pyinst.py mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp_macos_legacy - + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp_macos_legacy - build_windows: + windows: + if: inputs.windows runs-on: windows-latest - needs: prepare steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: # 3.8 is used for Win7 support - python-version: '3.8' - - name: Install Requirements - run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: # 3.8 is used for Win7 support + python-version: "3.8" + - name: Install Requirements + run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python -m pip install -U pip setuptools wheel py2exe pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt - - name: Prepare - run: | - python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | python setup.py py2exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe python pyinst.py python pyinst.py --onedir Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp.exe - dist/yt-dlp_min.exe - dist/yt-dlp_win.zip - + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp.exe + dist/yt-dlp_min.exe + dist/yt-dlp_win.zip - build_windows32: + windows32: + if: inputs.windows32 runs-on: windows-latest - needs: prepare steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: # 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390 - python-version: '3.7' - architecture: 'x86' - - name: Install Requirements - run: | + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: # 3.7 is used for Vista support. See https://github.com/yt-dlp/yt-dlp/issues/390 + python-version: "3.7" + architecture: "x86" + - name: Install Requirements + run: | python -m pip install -U pip setuptools wheel pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" -r requirements.txt - - name: Prepare - run: | - python devscripts/update-version.py ${{ needs.prepare.outputs.version_suffix }} + - name: Prepare + run: | + python devscripts/update-version.py -c ${{ inputs.channel }} ${{ inputs.version }} python devscripts/make_lazy_extractors.py - - name: Build - run: | + - name: Build + run: | python pyinst.py - - name: Upload artifacts - uses: actions/upload-artifact@v3 - with: - path: | - dist/yt-dlp_x86.exe - - - publish_release: - permissions: - contents: write # for action-gh-release + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + dist/yt-dlp_x86.exe + + meta_files: + if: inputs.meta_files && always() + needs: + - unix + - linux_arm + - macos + - macos_legacy + - windows + - windows32 runs-on: ubuntu-latest - needs: [prepare, build_unix, build_linux_arm, build_windows, build_windows32, build_macos, build_macos_legacy] - steps: - - uses: actions/checkout@v3 - - uses: actions/download-artifact@v3 - - - name: Get Changelog - run: | - changelog=$(grep -oPz '(?s)(?<=### ${{ needs.prepare.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)' Changelog.md) || true - echo "changelog<> $GITHUB_ENV - echo "$changelog" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - name: Make Update spec - run: | - echo "# This file is used for regulating self-update" >> _update_spec - echo "lock 2022.07.18 .+ Python 3.6" >> _update_spec - - name: Make SHA2-SUMS files - run: | - sha256sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp.tar.gz | awk '{print $1 " yt-dlp.tar.gz"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp.exe | awk '{print $1 " yt-dlp.exe"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_win.zip | awk '{print $1 " yt-dlp_win.zip"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_min.exe | awk '{print $1 " yt-dlp_min.exe"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_x86.exe | awk '{print $1 " yt-dlp_x86.exe"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-256SUMS - sha256sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-256SUMS - sha256sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-256SUMS - sha256sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-256SUMS - sha512sum artifact/yt-dlp | awk '{print $1 " yt-dlp"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp.tar.gz | awk '{print $1 " yt-dlp.tar.gz"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp.exe | awk '{print $1 " yt-dlp.exe"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_win.zip | awk '{print $1 " yt-dlp_win.zip"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_min.exe | awk '{print $1 " yt-dlp_min.exe"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_x86.exe | awk '{print $1 " yt-dlp_x86.exe"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_macos | awk '{print $1 " yt-dlp_macos"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_macos.zip | awk '{print $1 " yt-dlp_macos.zip"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_macos_legacy | awk '{print $1 " yt-dlp_macos_legacy"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_linux_armv7l | awk '{print $1 " yt-dlp_linux_armv7l"}' >> SHA2-512SUMS - sha512sum artifact/yt-dlp_linux_aarch64 | awk '{print $1 " yt-dlp_linux_aarch64"}' >> SHA2-512SUMS - sha512sum artifact/dist/yt-dlp_linux | awk '{print $1 " yt-dlp_linux"}' >> SHA2-512SUMS - sha512sum artifact/dist/yt-dlp_linux.zip | awk '{print $1 " yt-dlp_linux.zip"}' >> SHA2-512SUMS - - - name: Publish Release - uses: yt-dlp/action-gh-release@v1 - with: - tag_name: ${{ needs.prepare.outputs.ytdlp_version }} - name: yt-dlp ${{ needs.prepare.outputs.ytdlp_version }} - target_commitish: ${{ needs.prepare.outputs.head_sha }} - body: | - #### [A description of the various files]((https://github.com/yt-dlp/yt-dlp#release-files)) are in the README + - uses: actions/download-artifact@v3 - --- -

Changelog

-

- - ${{ env.changelog }} + - name: Make SHA2-SUMS files + run: | + cd ./artifact/ + sha256sum * > ../SHA2-256SUMS + sha512sum * > ../SHA2-512SUMS -

-
- files: | - SHA2-256SUMS - SHA2-512SUMS - artifact/yt-dlp - artifact/yt-dlp.tar.gz - artifact/yt-dlp.exe - artifact/yt-dlp_win.zip - artifact/yt-dlp_min.exe - artifact/yt-dlp_x86.exe - artifact/yt-dlp_macos - artifact/yt-dlp_macos.zip - artifact/yt-dlp_macos_legacy - artifact/yt-dlp_linux_armv7l - artifact/yt-dlp_linux_aarch64 - artifact/dist/yt-dlp_linux - artifact/dist/yt-dlp_linux.zip - _update_spec + - name: Make Update spec + run: | + cat >> _update_spec << EOF + # This file is used for regulating self-update + lock 2022.08.18.36 .+ Python 3.6 + EOF + + - name: Upload artifacts + uses: actions/upload-artifact@v3 + with: + path: | + SHA*SUMS* + _update_spec diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..42e66a29c --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,80 @@ +name: Publish +on: + workflow_call: + inputs: + nightly: + default: false + required: false + type: boolean + version: + required: true + type: string + target_commitish: + required: true + type: string + secrets: + ARCHIVE_REPO_TOKEN: + required: false + +permissions: + contents: write + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - uses: actions/download-artifact@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Generate release notes + run: | + cat >> ./RELEASE_NOTES << EOF + #### A description of the various files are in the [README](https://github.com/yt-dlp/yt-dlp#release-files) + --- +

Changelog

+ $(python ./devscripts/make_changelog.py -vv) +
+ EOF + echo "**This is an automated nightly pre-release build**" >> ./PRERELEASE_NOTES + cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES + echo "Generated from: https://github.com/${{ github.repository }}/commit/${{ inputs.target_commitish }}" >> ./ARCHIVE_NOTES + cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES + + - name: Archive nightly release + env: + GH_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} + GH_REPO: ${{ vars.ARCHIVE_REPO }} + if: | + inputs.nightly && env.GH_TOKEN != '' && env.GH_REPO != '' + run: | + gh release create \ + --notes-file ARCHIVE_NOTES \ + --title "Build ${{ inputs.version }}" \ + ${{ inputs.version }} \ + artifact/* + + - name: Prune old nightly release + if: inputs.nightly + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release delete --yes --cleanup-tag "nightly" || true + git tag --delete "nightly" || true + sleep 5 # Enough time to cover deletion race condition + + - name: Publish release${{ inputs.nightly && ' (nightly)' || '' }} + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release create \ + --notes-file ${{ inputs.nightly && 'PRE' || '' }}RELEASE_NOTES \ + --target ${{ inputs.target_commitish }} \ + --title "yt-dlp ${{ inputs.nightly && 'nightly ' || '' }}${{ inputs.version }}" \ + ${{ inputs.nightly && '--prerelease "nightly"' || inputs.version }} \ + artifact/* diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml new file mode 100644 index 000000000..ec079b8d0 --- /dev/null +++ b/.github/workflows/release-nightly.yml @@ -0,0 +1,49 @@ +name: Release (nightly) +on: + push: + branches: + - master + paths: + - "**.py" + - "!yt_dlp/version.py" +concurrency: + group: release-nightly + cancel-in-progress: true +permissions: + contents: read + +jobs: + prepare: + if: vars.BUILD_NIGHTLY != '' + runs-on: ubuntu-latest + outputs: + version: ${{ steps.get_version.outputs.version }} + + steps: + - uses: actions/checkout@v3 + - name: Get version + id: get_version + run: | + python devscripts/update-version.py "$(date -u +"%H%M%S")" | grep -Po "version=\d+(\.\d+){3}" >> "$GITHUB_OUTPUT" + + build: + needs: prepare + uses: ./.github/workflows/build.yml + with: + version: ${{ needs.prepare.outputs.version }} + channel: nightly + permissions: + contents: read + packages: write # For package cache + + publish: + needs: [prepare, build] + uses: ./.github/workflows/publish.yml + secrets: + ARCHIVE_REPO_TOKEN: ${{ secrets.ARCHIVE_REPO_TOKEN }} + permissions: + contents: write + with: + nightly: true + version: ${{ needs.prepare.outputs.version }} + target_commitish: ${{ github.sha }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..c97cd1f4a --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,125 @@ +name: Release +on: workflow_dispatch +permissions: + contents: read + +jobs: + prepare: + permissions: + contents: write + runs-on: ubuntu-latest + outputs: + version: ${{ steps.update_version.outputs.version }} + head_sha: ${{ steps.push_release.outputs.head_sha }} + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Update version + id: update_version + run: | + python devscripts/update-version.py ${{ vars.PUSH_VERSION_COMMIT == '' && '"$(date -u +"%H%M%S")"' || '' }} | \ + grep -Po "version=\d+\.\d+\.\d+(\.\d+)?" >> "$GITHUB_OUTPUT" + + - name: Update documentation + run: | + make doc + sed '/### /Q' Changelog.md >> ./CHANGELOG + echo '### ${{ steps.update_version.outputs.version }}' >> ./CHANGELOG + python ./devscripts/make_changelog.py -vv -c >> ./CHANGELOG + echo >> ./CHANGELOG + grep -Poz '(?s)### \d+\.\d+\.\d+.+' 'Changelog.md' | head -n -1 >> ./CHANGELOG + cat ./CHANGELOG > Changelog.md + + - name: Push to release + id: push_release + run: | + git config --global user.name github-actions + git config --global user.email github-actions@example.com + git add -u + git commit -m "Release ${{ steps.update_version.outputs.version }}" \ + -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" + git push origin --force ${{ github.event.ref }}:release + echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Update master + if: vars.PUSH_VERSION_COMMIT != '' + run: git push origin ${{ github.event.ref }} + + publish_pypi_homebrew: + needs: prepare + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install Requirements + run: | + python -m pip install -U pip setuptools wheel twine + python -m pip install -U -r requirements.txt + + - name: Prepare + run: | + python devscripts/update-version.py ${{ needs.prepare.outputs.version }} + python devscripts/make_lazy_extractors.py + + - name: Build and publish on PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + if: env.TWINE_PASSWORD != '' + run: | + rm -rf dist/* + python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" + python setup.py sdist bdist_wheel + twine upload dist/* + + - name: Checkout Homebrew repository + env: + BREW_TOKEN: ${{ secrets.BREW_TOKEN }} + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' + uses: actions/checkout@v3 + with: + repository: yt-dlp/homebrew-taps + path: taps + ssh-key: ${{ secrets.BREW_TOKEN }} + + - name: Update Homebrew Formulae + env: + BREW_TOKEN: ${{ secrets.BREW_TOKEN }} + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + if: env.BREW_TOKEN != '' && env.PYPI_TOKEN != '' + run: | + python devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ needs.prepare.outputs.version }}" + git -C taps/ config user.name github-actions + git -C taps/ config user.email github-actions@example.com + git -C taps/ commit -am 'yt-dlp: ${{ needs.prepare.outputs.version }}' + git -C taps/ push + + build: + needs: prepare + uses: ./.github/workflows/build.yml + with: + version: ${{ needs.prepare.outputs.version }} + permissions: + contents: read + packages: write # For package cache + + publish: + needs: [prepare, build] + uses: ./.github/workflows/publish.yml + permissions: + contents: write + with: + version: ${{ needs.prepare.outputs.version }} + target_commitish: ${{ needs.prepare.outputs.head_sha }} diff --git a/Changelog.md b/Changelog.md index 24bc8a2e2..60bd99f72 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,13 +1,7 @@ # Changelog ### 2023.02.17 diff --git a/README.md b/README.md index ddd71eeeb..e6e95b147 100644 --- a/README.md +++ b/README.md @@ -318,7 +318,8 @@ If you wish to build it anyway, install Python and py2exe, and then simply run ` Note: See their `--help` for more info. -You can also fork the project on GitHub and run your fork's [build workflow](.github/workflows/build.yml) to automatically build a full release +### Forking the project +If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases. # USAGE AND OPTIONS @@ -460,9 +461,8 @@ You can also fork the project on GitHub and run your fork's [build workflow](.gi --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. - E.g. "--date today-2weeks" downloads - only videos uploaded on the same day two - weeks ago + E.g. "--date today-2weeks" downloads only + videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index fad993a19..2270b31d3 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -45,33 +45,43 @@ switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' PATCHES = ( - ( # Standardize update message + ( # Standardize `--update` message r'(?m)^( -U, --update\s+).+(\n \s.+)*$', r'\1Update this program to the latest version', ), - ( # Headings + ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', r'## \1' ), - ( # Do not split URLs + ( # Fixup `--date` formatting + rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', + (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' + f'E.g. "--date today-2weeks" downloads only{delim}' + 'videos uploaded on the same day two weeks ago'), + ), + ( # Do not split URLs rf'({delim[:-1]})? (?P