From da1ffde15de28bf0565d1bd0c02d3f17edcdfff7 Mon Sep 17 00:00:00 2001 From: panatexxa <91012623+panatexxa@users.noreply.github.com> Date: Thu, 7 Apr 2022 04:26:12 +0200 Subject: [Moviepilot] Add extractor (#3282) Authored by: panatexxa --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/moviepilot.py | 115 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 yt_dlp/extractor/moviepilot.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 457f4c2aa..bd27e14b2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -915,6 +915,7 @@ from .motherless import ( ) from .motorsport import MotorsportIE from .movieclips import MovieClipsIE +from .moviepilot import MoviepilotIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py new file mode 100644 index 000000000..4605d3481 --- /dev/null +++ b/yt_dlp/extractor/moviepilot.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .dailymotion import DailymotionIE +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + try_get, +) + +import re + + +class MoviepilotIE(InfoExtractor): + _IE_NAME = 'moviepilot' + _IE_DESC = 'Moviepilot trailer' + _VALID_URL = r'https?://(?:www\.)?moviepilot\.de/movies/(?P[^/]+)' + + _TESTS = [{ + 'url': 'https://www.moviepilot.de/movies/interstellar-2/', + 'info_dict': { + 'id': 'x7xdut5', + 'display_id': 'interstellar-2', + 'ext': 'mp4', + 'title': 'Interstellar', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaXev1VvzitVZMFsR/x720', + 'timestamp': 1400491705, + 'description': 'md5:7dfc5c1758e7322a7346934f1f0c489c', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'uploader_id': 'x6nd9k', + 'upload_date': '20140519', + 'duration': 140, + 'age_limit': 0, + 'tags': ['Alle Trailer', 'Movie', 'Third Party'], + }, + }, { + 'url': 'https://www.moviepilot.de/movies/interstellar-2/trailer', + 'only_matching': True, + }, { + 'url': 'https://www.moviepilot.de/movies/interstellar-2/kinoprogramm/berlin', + 'only_matching': True, + }, { + 'url': 'https://www.moviepilot.de/movies/queen-slim/trailer', + 'info_dict': { + 'id': 'x7xj6o7', + 'display_id': 'queen-slim', + 'title': 'Queen & Slim', + 'ext': 'mp4', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SbUM71WtomSjVmI_q/x720', + 'timestamp': 1571838685, + 'description': 'md5:73058bcd030aa12d991e4280d65fbebe', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'uploader_id': 'x6nd9k', + 'upload_date': '20191023', + 'duration': 138, + 'age_limit': 0, + 'tags': ['Movie', 'Verleih', 'Neue Trailer'], + }, + }, { + 'url': 'https://www.moviepilot.de/movies/der-geiger-von-florenz/trailer', + 'info_dict': { + 'id': 'der-geiger-von-florenz', + 'title': 'Der Geiger von Florenz', + 'ext': 'mp4', + }, + 'skip': 'No trailer for this movie.', + }, { + 'url': 'https://www.moviepilot.de/movies/muellers-buero/', + 'info_dict': { + 'id': 'x7xcw1i', + 'display_id': 'muellers-buero', + 'title': 'Müllers Büro', + 'ext': 'mp4', + 'description': 'md5:57501251c05cdc61ca314b7633e0312e', + 'timestamp': 1287584475, + 'age_limit': 0, + 'duration': 82, + 'upload_date': '20101020', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1WfAm1d6maq_/x720', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'tags': ['Alle Trailer', 'Movie', 'Verleih'], + 'uploader_id': 'x6nd9k', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(f'https://www.moviepilot.de/movies/{video_id}/trailer', video_id) + + duration = try_get( + re.match(r'P(?P\d+)H(?P\d+)M(?P\d+)S', + self._html_search_meta('duration', webpage, fatal=False) or ''), + lambda mobj: sum(float(x) * y for x, y in zip(mobj.groups(), (3600, 60, 1)))) + # _html_search_meta is not used since we don't want name=description to match + description = self._html_search_regex( + ']+itemprop="description"[^>]+content="([^>"]+)"', webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'ie_key': DailymotionIE.ie_key(), + 'display_id': video_id, + 'title': self._og_search_title(webpage), + 'url': self._html_search_meta('embedURL', webpage), + 'thumbnail': self._html_search_meta('thumbnailURL', webpage), + 'description': description, + 'duration': duration, + 'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage), delimiter=' ') + } -- cgit v1.2.3 From 06b1628d3ed446d25ddbd4030fb92d8d90431c7e Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 7 Apr 2022 13:42:01 +0900 Subject: [twitcasting] Don't return multi_video for archive with single hls manifest (#3319) Authored by: Lesmiscore --- yt_dlp/extractor/twitcasting.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index af911de98..7f3fa0735 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -183,6 +183,14 @@ class TwitCastingIE(InfoExtractor): infodict = { 'formats': formats } + elif len(m3u8_urls) == 1: + formats = self._extract_m3u8_formats( + m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS) + self._sort_formats(formats) + infodict = { + # No problem here since there's only one manifest + 'formats': formats, + } else: infodict = { '_type': 'multi_video', -- cgit v1.2.3 From fcfa8853e41ca04714a7aa28a783e2804c184375 Mon Sep 17 00:00:00 2001 From: Justin Keogh Date: Thu, 7 Apr 2022 05:58:56 +0000 Subject: [utils] locked_file: Do not truncate files before locking (#2994) Authored by: jakeogh, pukkandan --- yt_dlp/utils.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 87dd04e23..66c3da4c8 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2222,10 +2222,23 @@ class locked_file(object): locked = False def __init__(self, filename, mode, block=True, encoding=None): - assert mode in {'r', 'rb', 'a', 'ab', 'w', 'wb'} - self.f = open(filename, mode, encoding=encoding) - self.mode = mode - self.block = block + if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}: + raise NotImplementedError(mode) + self.mode, self.block = mode, block + + writable = any(f in mode for f in 'wax+') + readable = any(f in mode for f in 'r+') + flags = functools.reduce(operator.ior, ( + getattr(os, 'O_CLOEXEC', 0), # UNIX only + getattr(os, 'O_BINARY', 0), # Windows only + getattr(os, 'O_NOINHERIT', 0), # Windows only + os.O_CREAT if writable else 0, # O_TRUNC only after locking + os.O_APPEND if 'a' in mode else 0, + os.O_EXCL if 'x' in mode else 0, + os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY, + )) + + self.f = os.fdopen(os.open(filename, flags), mode, encoding=encoding) def __enter__(self): exclusive = 'r' not in self.mode @@ -2235,6 +2248,8 @@ class locked_file(object): except IOError: self.f.close() raise + if 'w' in self.mode: + self.f.truncate() return self def unlock(self): -- cgit v1.2.3 From b63837bce0b104b1f72f2ebb6c0d05080cf2a607 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 12:00:58 +0530 Subject: [utils] locked_file: Fix non-blocking non-exclusive lock --- yt_dlp/utils.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 66c3da4c8..02b5ae2ee 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2190,18 +2190,15 @@ else: import fcntl def _lock_file(f, exclusive, block): + flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH + if not block: + flags |= fcntl.LOCK_NB try: - fcntl.flock(f, - fcntl.LOCK_SH if not exclusive - else fcntl.LOCK_EX if block - else fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.flock(f, flags) except BlockingIOError: raise except OSError: # AOSP does not have flock() - fcntl.lockf(f, - fcntl.LOCK_SH if not exclusive - else fcntl.LOCK_EX if block - else fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.lockf(f, flags) def _unlock_file(f): try: -- cgit v1.2.3 From b506289fe205cc2f3488f72c826034465cef2d0c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 11:30:46 +0530 Subject: [test] Add `test_locked_file` --- test/test_utils.py | 31 +++++++++++++++++++++++++++++++ yt_dlp/utils.py | 5 +++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 31f168998..1f826c2f2 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -56,6 +56,7 @@ from yt_dlp.utils import ( is_html, js_to_json, limit_length, + locked_file, merge_dicts, mimetype2ext, month_by_name, @@ -1795,6 +1796,36 @@ Line 1 self.assertEqual(Config.hide_login_info(['--username=foo']), ['--username=PRIVATE']) + def test_locked_file(self): + TEXT = 'test_locked_file\n' + FILE = 'test_locked_file.ytdl' + MODES = 'war' # Order is important + + try: + for lock_mode in MODES: + with locked_file(FILE, lock_mode, False) as f: + if lock_mode == 'r': + self.assertEqual(f.read(), TEXT * 2, 'Wrong file content') + else: + f.write(TEXT) + for test_mode in MODES: + testing_write = test_mode != 'r' + try: + with locked_file(FILE, test_mode, False): + pass + except (BlockingIOError, PermissionError): + if not testing_write: # FIXME + print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') + continue + self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') + else: + self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') + finally: + try: + os.remove(FILE) + except Exception: + pass + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 02b5ae2ee..84b2603df 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -684,8 +684,9 @@ def sanitize_open(filename, open_mode): try: try: if sys.platform == 'win32': - # FIXME: Windows only has mandatory locking which also locks the file from being read. - # So for now, don't lock the file on windows. Ref: https://github.com/yt-dlp/yt-dlp/issues/3124 + # FIXME: An exclusive lock also locks the file from being read. + # Since windows locks are mandatory, don't lock the file on windows (for now). + # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124 raise LockingUnsupportedError() stream = locked_file(filename, open_mode, block=False).__enter__() except LockingUnsupportedError: -- cgit v1.2.3 From 870efdee28860d7f6473c52bf7bb1bafb71aaeec Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Thu, 7 Apr 2022 16:19:36 +0900 Subject: [TVer] Fix extractor (#3268) Authored by: Lesmiscore --- yt_dlp/extractor/tver.py | 111 +++++++++++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 9ff3136e2..f23af1f14 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,77 +1,94 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, - remove_start, smuggle_url, + str_or_none, traverse_obj, ) class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?Pcorner|episode|feature|lp|tokyo2020/video)/(?P[fc]?\d+)' - # videos are only available for 7 days + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?Plp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'https://tver.jp/corner/f0062178', - 'only_matching': True, + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/episodes/ephss8yveb', + 'info_dict': { + 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', + 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', + }, + 'add_ie': ['BrightcoveNew'], }, { - 'url': 'https://tver.jp/feature/f0062413', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/episode/79622438', - 'only_matching': True, - }, { - # subtitle = ' ' - 'url': 'https://tver.jp/corner/f0068870', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/lp/f0009694', - 'only_matching': True, + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', + 'info_dict': { + # sorry but this is "correct" + 'title': '4月11日(月)23時06分 ~ 放送予定', + 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + }, + 'add_ie': ['BrightcoveNew'], }, { - 'url': 'https://tver.jp/lp/c0000239', + 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, }, { - 'url': 'https://tver.jp/tokyo2020/video/6264525510001', + 'url': 'https://tver.jp/lp/f0033031', 'only_matching': True, }] - _TOKEN = None BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' + _PLATFORM_UID = None + _PLATFORM_TOKEN = None def _real_initialize(self): - self._TOKEN = self._download_json( - 'https://tver.jp/api/access_token.php', None)['token'] + create_response = self._download_json( + 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None, + note='Creating session', data=b'device_type=pc', headers={ + 'Origin': 'https://s.tver.jp', + 'Referer': 'https://s.tver.jp/', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid')) + self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token')) def _real_extract(self, url): - path, video_id = self._match_valid_url(url).groups() - if path == 'lp': - webpage = self._download_webpage(url, video_id) - redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path') - path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups() - api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN}) - p_id = traverse_obj(api_response, ('main', 'publisher_id')) - if not p_id: - error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True - if not error_msg: - error_msg, expected = 'Failed to extract publisher ID', False - raise ExtractorError(error_msg, expected=expected) - service = remove_start(traverse_obj(api_response, ('main', 'service')), 'ts_') + video_id, video_type = self._match_valid_url(url).group('id', 'type') + if video_type not in {'series', 'episodes'}: + webpage = self._download_webpage(url, video_id, note='Resolving to new URL') + video_id = self._match_id(self._search_regex( + (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), + webpage, 'url regex')) + video_info = self._download_json( + f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, + query={'v': '5'}, headers={ + 'Origin': 'https://tver.jp', + 'Referer': 'https://tver.jp/', + }) + p_id = video_info['video']['accountID'] + r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False) + if not r_id: + raise ExtractorError('Failed to extract reference ID for Brightcove') + if not r_id.isdigit(): + r_id = f'ref:{r_id}' - r_id = traverse_obj(api_response, ('main', 'reference_id')) - if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): - r_id = 'ref:' + r_id - bc_url = smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), - {'geo_countries': ['JP']}) + additional_info = self._download_json( + f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', + video_id, fatal=False, + query={ + 'platform_uid': self._PLATFORM_UID, + 'platform_token': self._PLATFORM_TOKEN, + }, headers={ + 'x-tver-platform-type': 'web' + }) return { '_type': 'url_transparent', - 'description': traverse_obj(api_response, ('main', 'note', 0, 'text'), expected_type=compat_str), - 'episode_number': int_or_none(traverse_obj(api_response, ('main', 'ext', 'episode_number'), expected_type=compat_str)), - 'url': bc_url, + 'title': str_or_none(video_info.get('title')), + 'description': str_or_none(video_info.get('description')), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), + 'series': traverse_obj( + additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), + get_all=False), 'ie_key': 'BrightcoveNew', } -- cgit v1.2.3 From 61d3665d9da4f80c2c5cc4b6bed6a6830b29fcc3 Mon Sep 17 00:00:00 2001 From: coletdev Date: Thu, 7 Apr 2022 20:11:16 +1200 Subject: [youtube] Fix uploader for collaborative playlists (#3332) Authored by: coletdjnz --- yt_dlp/extractor/common.py | 6 ++++-- yt_dlp/extractor/youtube.py | 27 +++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e2605c1f4..9914910d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -23,6 +23,7 @@ from ..compat import ( compat_getpass, compat_http_client, compat_os_name, + compat_Pattern, compat_str, compat_urllib_error, compat_urllib_parse_unquote, @@ -41,7 +42,6 @@ from ..utils import ( base_url, bug_reports_message, clean_html, - compiled_regex_type, determine_ext, determine_protocol, dict_get, @@ -1203,7 +1203,9 @@ class InfoExtractor(object): In case of failure return a default value or raise a WARNING or a RegexNotFoundError, depending on fatal, specifying the field name. """ - if isinstance(pattern, (str, compat_str, compiled_regex_type)): + if string is None: + mobj = None + elif isinstance(pattern, (str, compat_Pattern)): mobj = re.search(pattern, string, flags) else: for p in pattern: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 017554c88..031aa35a1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4109,14 +4109,15 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if fatal: raise ExtractorError('Unable to find selected tab') - @classmethod - def _extract_uploader(cls, data): + def _extract_uploader(self, data): uploader = {} - renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} + renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} owner = try_get( renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) if owner: - uploader['uploader'] = owner.get('text') + owner_text = owner.get('text') + uploader['uploader'] = self._search_regex( + r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text) uploader['uploader_id'] = try_get( owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) uploader['uploader_url'] = urljoin( @@ -5136,6 +5137,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'note': 'non-standard redirect to regional channel', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'only_matching': True + }, { + 'note': 'collaborative playlist (uploader name in the form "by and x other(s)")', + 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'info_dict': { + 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'modified_date': '20220407', + 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + 'tags': [], + 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'uploader': 'pukkandan', + 'availability': 'unlisted', + 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'channel': 'pukkandan', + 'description': 'Test for collaborative playlist', + 'title': 'yt-dlp test - collaborative playlist', + 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + }, + 'playlist_mincount': 2 }] @classmethod -- cgit v1.2.3 From 22fba53fbd903cd42b0f4ef24c539a4f818fd6e9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 15:46:53 +0530 Subject: [FfmpegMetadata] Write id3v1 tags --- yt_dlp/postprocessor/ffmpeg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 643290286..27d06cbde 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -769,6 +769,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): if value is not None and mobj: metadata[mobj.group('i') or 'common'][mobj.group('key')] = value + # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags + yield ('-write_id3v1', '1') + for name, value in metadata['common'].items(): yield ('-metadata', f'{name}={value}') -- cgit v1.2.3 From bd4073c53575ef802720cd74c5415d6a6417c1dd Mon Sep 17 00:00:00 2001 From: Ha Tien Loi Date: Thu, 7 Apr 2022 18:03:13 +0700 Subject: [AfreecaTV] Add `AfreecaTVUserIE` (#3286) Closes #3257 Authored by: hatienl0i261299 --- yt_dlp/extractor/afreecatv.py | 58 +++++++++++++++++++++++++++++++++++++++++- yt_dlp/extractor/extractors.py | 1 + 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 77f0e3c10..28946e9dd 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .common import InfoExtractor from ..compat import compat_xpath from ..utils import ( + ExtractorError, + OnDemandPagedList, date_from_str, determine_ext, - ExtractorError, int_or_none, qualities, traverse_obj, @@ -482,3 +484,57 @@ class AfreecaTVLiveIE(AfreecaTVIE): 'formats': formats, 'is_live': True, } + + +class AfreecaTVUserIE(InfoExtractor): + IE_NAME = 'afreecatv:user' + _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P[^/]+)/vods/?(?P[^/]+)?' + _TESTS = [{ + 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - review', + }, + 'playlist_count': 218, + }, { + 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', + 'info_dict': { + '_type': 'playlist', + 'id': 'parang1995', + 'title': 'parang1995 - highlight', + }, + 'playlist_count': 997, + }, { + 'url': 'https://bj.afreecatv.com/ryuryu24/vods', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - all', + }, + 'playlist_count': 221, + }, { + 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - balloonclip', + }, + 'playlist_count': 0, + }] + _PER_PAGE = 60 + + def _fetch_page(self, user_id, user_type, page): + page += 1 + info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, + query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, + note=f'Downloading {user_type} video page {page}') + for item in info['data']: + yield self.url_result( + f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + + def _real_extract(self, url): + user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') + user_type = user_type or 'all' + entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE) + return self.playlist_result(entries, user_id, f'{user_id} - {user_type}') diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd27e14b2..3e711c3bb 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -44,6 +44,7 @@ from .aenetworks import ( from .afreecatv import ( AfreecaTVIE, AfreecaTVLiveIE, + AfreecaTVUserIE, ) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE -- cgit v1.2.3 From 316f2650f8b588507159cddcd13941dd67a4f70c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 7 Apr 2022 16:41:51 +0530 Subject: Ignore `mhtml` formats from `-f mergeall` Closes #3324 --- yt_dlp/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d03229d86..f5ea5a0b5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2179,7 +2179,8 @@ class YoutubeDL(object): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats( + f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) if not formats: return merged_format = formats[-1] -- cgit v1.2.3 From b52e788eb2ba9914aa812238365dcb3348be2944 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Thu, 7 Apr 2022 20:21:42 +0900 Subject: [Piapro] Extract description with break lines Authored by: Lesmiscore Closes #3334 --- yt_dlp/extractor/piapro.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index c4eb4913f..ae160623b 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -27,6 +27,18 @@ class PiaproIE(InfoExtractor): 'title': '裏表ラバーズ', 'thumbnail': r're:^https?://.*\.jpg$', } + }, { + 'note': 'There are break lines in description, mandating (?s) flag', + 'url': 'https://piapro.jp/t/9cSd', + 'md5': '952bb6d1e8de95050206408a87790676', + 'info_dict': { + 'id': '9cSd', + 'ext': 'mp3', + 'title': '青に溶けた風船 / 初音ミク', + 'description': 'md5:d395a9bd151447631a5a1460bc7f9132', + 'uploader': 'シアン・キノ', + 'uploader_id': 'cyankino', + } }] _login_status = False @@ -81,7 +93,7 @@ class PiaproIE(InfoExtractor): return { 'id': video_id, 'title': self._html_search_regex(r'(.+?)', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'(.+?)

\s*(.+?)

\s* Date: Thu, 7 Apr 2022 22:52:27 +0700 Subject: [NRK] Extract timestamp (#3231) Closes #3211 Authored by: hatienl0i261299 --- yt_dlp/extractor/nrk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 4d723e886..0cf26d598 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -13,6 +13,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + parse_iso8601, str_or_none, try_get, urljoin, @@ -247,6 +248,7 @@ class NRKIE(NRKBaseIE): 'age_limit': age_limit, 'formats': formats, 'subtitles': subtitles, + 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)) } if is_series: @@ -797,7 +799,7 @@ class NRKPlaylistBaseIE(InfoExtractor): for video_id in re.findall(self._ITEM_RE, webpage) ] - playlist_title = self. _extract_title(webpage) + playlist_title = self._extract_title(webpage) playlist_description = self._extract_description(webpage) return self.playlist_result( -- cgit v1.2.3 From 9b8b7a7b5e529fdb9c8d6804b592f7f8eeb3046e Mon Sep 17 00:00:00 2001 From: Alexander Seiler Date: Fri, 8 Apr 2022 08:44:58 +0200 Subject: [Zattoo] Fix extractors (#2288) Closes: #1244 Authored by: goggle --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/zattoo.py | 283 ++++++++++++++++++++++++----------------- 2 files changed, 165 insertions(+), 123 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 3e711c3bb..2aa1e0b45 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2119,18 +2119,17 @@ from .zattoo import ( EWETVIE, GlattvisionTVIE, MNetTVIE, - MyVisionTVIE, NetPlusIE, OsnatelTVIE, QuantumTVIE, - QuicklineIE, - QuicklineLiveIE, SaltTVIE, SAKTVIE, VTXTVIE, WalyTVIE, ZattooIE, ZattooLiveIE, + ZattooMoviesIE, + ZattooRecordingsIE, ) from .zdf import ZDFIE, ZDFChannelIE from .zee5 import ( diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index c02b4ca14..8614ca23d 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -51,25 +51,30 @@ class ZattooPlatformBaseIE(InfoExtractor): self._power_guide_hash = data['session']['power_guide_hash'] def _initialize_pre_login(self): - webpage = self._download_webpage( - self._host_url(), None, 'Downloading app token') - app_token = self._html_search_regex( - r'appToken\s*=\s*(["\'])(?P(?:(?!\1).)+?)\1', - webpage, 'app token', group='token') - app_version = self._html_search_regex( - r' +### 2022.04.08 + +* Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) +* Treat multiple `--match-filters` as OR +* File locking improvevemnts: + * Do not lock downloading file on Windows + * Do not prevent download if locking is unsupported + * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) + * Fix non-blocking non-exclusive lock +* De-prioritize automatic-subtitles when no `--sub-lang` is given +* Exit after `--dump-user-agent` +* Fallback to video-only format when selecting by extension +* Fix `--abort-on-error` for subtitles +* Fix `--no-overwrite` for playlist infojson +* Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger) +* Fix `--sleep-interval` +* Fix `--throttled-rate` +* Fix `autonumber` +* Fix case of `http_headers` +* Fix filepath sanitization in `--print-to-file` +* Handle float in `--wait-for-video` +* Ignore `mhtml` formats from `-f mergeall` +* Ignore format-specific fields in initial pass of `--match-filter` +* Protect stdout from unexpected progress and console-title +* Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz) +* Remove incorrect warning for `--dateafter` +* Show warning when all media formats have DRM +* [downloader] Fix invocation of `HttpieFD` +* [http] Fix #3215 +* [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) +* [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore) +* [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz) +* [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz) +* [extractor] Add `_perform_login` function +* [extractor] Allow control characters inside json +* [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz) +* [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore) +* [ffmpeg] Cache version data +* [FFmpegConcat] Ensure final directory exists +* [FfmpegMetadata] Write id3v1 tags +* [FFmpegVideoConvertor] Add more formats to `--remux-video` +* [FFmpegVideoConvertor] Ensure all streams are copied +* [MetadataParser] Validate outtmpl early +* [outtmpl] Fix replacement/default when used with alternate +* [outtmpl] Limit changes during sanitization +* [phantomjs] Fix bug +* [test] Add `test_locked_file` +* [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h) +* [utils] `traverse_obj`: Allow filtering by value +* [utils] Add `filter_dict`, `get_first`, `try_call` +* [utils] ExtractorError: Fix for older python versions +* [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore) +* [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz) +* [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes) +* [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli) +* [docs] Remove readthedocs +* [build] Add `requirements.txt` to pip distributions +* [cleanup, postprocessor] Create `_download_json` +* [cleanup, vimeo] Fix tests +* [cleanup] Misc fixes and minor cleanup +* [cleanup] Use `_html_extract_title` +* [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299) +* [arte] Add `format_note` to m3u8 formats +* [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte) +* [BanBye] Add extractor by [mehq](https://github.com/mehq) +* [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69) +* [Craftsy] Add extractor by [Bricio](https://github.com/Bricio) +* [Cybrary] Add extractor by [aaearon](https://github.com/aaearon) +* [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [ITProTV] Add extractor by [aaearon](https://github.com/aaearon) +* [Jable] Add extractors by [mehq](https://github.com/mehq) +* [LastFM] Add extractors by [mehq](https://github.com/mehq) +* [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa) +* [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark) +* [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299) +* [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) +* [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi) +* [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle) +* [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi) +* [BRMediathek] Fix VALID_URL +* [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1) +* [crunchyroll] Fix inheritance +* [daftsex] Fix extractor by [Soebb](https://github.com/Soebb) +* [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299) +* [ellentube] Extract subtitles from manifest +* [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan) +* [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299) +* [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz) +* [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) +* [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) +* [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 +* [niconico] Fix extraction of thumbnails and uploader (#3266) +* [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore) +* [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk) +* [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299) +* [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore) +* [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore) +* [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz) +* [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon) +* [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian) +* [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore) +* [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo) +* [rumble] unescape title +* [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore) +* [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen) +* [tenplay] Improve extractor by [aarubui](https://github.com/aarubui) +* [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299) +* [TikTokVM] Fix redirect to user URL +* [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) +* [TVer] Support landing page by [vvto33](https://github.com/vvto33) +* [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore) +* [veo] Fix `_VALID_URL` +* [Veo] Fix extractor by [i6t](https://github.com/i6t) +* [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h) +* [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29) +* [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) +* [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz) +* [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz) +* [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0) +* [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan) +* [youtube] Add extractor-arg to skip auto-translated subs +* [youtube] Avoid false positives when detecting damaged formats +* [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev) +* [youtube] Fix auto-translated automatic captions +* [youtube] Fix pagination of `membership` tab +* [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz) +* [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz) +* [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz) +* [youtube:tab] Minor improvements +* [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz) +* [Zattoo] Fix extractors by [goggle](https://github.com/goggle) +* [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299) + + ### 2022.03.08.1 * [cleanup] Refactor `__init__.py` @@ -34,7 +172,7 @@ * Set `webpage_url_...` from `webpage_url` and not input URL * Tolerate failure to `--write-link` due to unknown URL * [aria2c] Add `--http-accept-gzip=true` -* [build] Update pyinstaller to 4.10 by [shirt-dev](https://github.com/shirt-dev) +* [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev) * [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) * [devscripts] Improve `prepare_manpage` * [downloader] Do not use aria2c for non-native `m3u8` diff --git a/supportedsites.md b/supportedsites.md index 46ad1328d..eac7842a3 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -42,6 +42,7 @@ - **aenetworks:show** - **afreecatv**: afreecatv.com - **afreecatv:live**: afreecatv.com + - **afreecatv:user** - **AirMozilla** - **AliExpressLive** - **AlJazeera** @@ -104,6 +105,8 @@ - **awaan:video** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 + - **BanBye** + - **BanByeChannel** - **bandaichannel** - **Bandcamp** - **Bandcamp:album** @@ -245,6 +248,7 @@ - **cpac:playlist** - **Cracked** - **Crackle** + - **Craftsy** - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** @@ -263,6 +267,8 @@ - **curiositystream:collections** - **curiositystream:series** - **CWTV** + - **Cybrary** + - **CybraryCourse** - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** @@ -484,6 +490,7 @@ - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** + - **huya:live**: huya.com - **Hypem** - **ign.com** - **IGNArticle** @@ -512,6 +519,8 @@ - **iq.com**: International version of iQiyi - **iq.com:album** - **iqiyi**: 爱奇艺 + - **ITProTV** + - **ITProTVCourse** - **ITTF** - **ITV** - **ITVBTCC** @@ -520,6 +529,8 @@ - **ivideon**: Ivideon TV - **Iwara** - **Izlesene** + - **Jable** + - **JablePlaylist** - **Jamendo** - **JamendoAlbum** - **JeuxVideo** @@ -555,6 +566,9 @@ - **la7.it:podcast** - **laola1tv** - **laola1tv:embed** + - **LastFM** + - **LastFMPlaylist** + - **LastFMUser** - **lbry** - **lbry:channel** - **LCI** @@ -603,6 +617,7 @@ - **MallTV** - **mangomolo:live** - **mangomolo:video** + - **MangoTV**: 芒果TV - **ManotoTV**: Manoto TV (Episode) - **ManotoTVLive**: Manoto TV (Live) - **ManotoTVShow**: Manoto TV (Show) @@ -635,7 +650,6 @@ - **Metacritic** - **mewatch** - **Mgoon** - - **MGTV**: 芒果TV - **MiaoPai** - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom @@ -671,6 +685,7 @@ - **Motorsport**: motorsport.com - **MovieClips** - **MovieFap** + - **Moviepilot** - **Moviezine** - **MovingImage** - **MSN** @@ -705,7 +720,6 @@ - **MyVideoGe** - **MyVidster** - **MyviEmbed** - - **MyVisionTV** - **n-tv.de** - **N1Info:article** - **N1InfoAsset** @@ -863,6 +877,9 @@ - **PalcoMP3:song** - **PalcoMP3:video** - **pandora.tv**: 판도라TV + - **Panopto** + - **PanoptoList** + - **PanoptoPlaylist** - **ParamountNetwork** - **ParamountPlus** - **ParamountPlusSeries** @@ -912,6 +929,7 @@ - **PlutoTV** - **podomatic** - **Pokemon** + - **PokemonSoundLibrary** - **PokemonWatch** - **PokerGo** - **PokerGoCollection** @@ -957,8 +975,6 @@ - **qqmusic:toplist**: QQ音乐 - 排行榜 - **QuantumTV** - **Qub** - - **Quickline** - - **QuicklineLive** - **R7** - **R7Article** - **Radiko** @@ -1427,6 +1443,9 @@ - **Wakanim** - **Walla** - **WalyTV** + - **wasdtv:clip** + - **wasdtv:record** + - **wasdtv:stream** - **washingtonpost** - **washingtonpost:article** - **wat.tv** @@ -1520,6 +1539,8 @@ - **Zapiks** - **Zattoo** - **ZattooLive** + - **ZattooMovies** + - **ZattooRecordings** - **ZDF** - **ZDFChannel** - **Zee5** -- cgit v1.2.3 From dee1d65dc362f69b28287b2e82d93be4d22d1968 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 8 Apr 2022 09:57:06 +0000 Subject: [version] update Created by: pukkandan :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index c671a1910..39746047b 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 5ff022a04..4e072a436 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index acdfeb038..85b5d2cd5 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -60,12 +60,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index a4a038fc8..cdff538a1 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2022.03.08.1 (exe) + [debug] yt-dlp version 2022.04.08 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2022.03.08.1) + yt-dlp is up to date (2022.04.08) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 1bdafc441..59c8dd88e 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -13,7 +13,7 @@ body: required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've verified that I'm running yt-dlp version **2022.03.08.1** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) + - label: I've verified that I'm running yt-dlp version **2022.04.08** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index d5df2af90..fb3ec8c6d 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,5 +1,5 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2022.03.08.1' +__version__ = '2022.04.08' -RELEASE_GIT_HEAD = 'c0c2c57d3' +RELEASE_GIT_HEAD = '7884ade65' -- cgit v1.2.3 From 2d2b5493ee88ccde079a5cde3d58ac5469057d17 Mon Sep 17 00:00:00 2001 From: Ashish Gupta Date: Fri, 8 Apr 2022 21:03:50 +0530 Subject: [ZEE5] Fix extractor. Authored by: Ashish0804 Closes: https://github.com/yt-dlp/yt-dlp/issues/3105 --- yt_dlp/extractor/zee5.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 3e3f11b15..9e411d83f 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -86,8 +86,8 @@ class Zee5IE(InfoExtractor): 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', 'only_matching': True }] - _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' - _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb' + _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' + _DEVICE_ID = 'TszZPYPuY9Pq2cJizV0U000000000000' _USER_TOKEN = None _LOGIN_HINT = 'Use "--username " to login using otp or "--username token" and "--password " to login using user token.' _NETRC_MACHINE = 'zee5' -- cgit v1.2.3 From d46a3e7a127654b7537b0ab537f8c08ba16862ff Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Apr 2022 16:25:40 +0530 Subject: [rai] Add `release_year` Closes #2319 --- yt_dlp/extractor/rai.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 6864129c6..7c72d60c6 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -340,6 +340,7 @@ class RaiPlayIE(RaiBaseIE): 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'subtitles': subtitles, + 'release_year': traverse_obj(media, ('track_info', 'edit_year')), } info.update(relinker_info) -- cgit v1.2.3 From 4abea8ca0af0773db9fb2372b272d497bd77b207 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Apr 2022 10:11:25 +0530 Subject: [utils] `sanitize_path`: Fix when path is empty string --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 7af7a9fb9..d4f8d8cab 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2237,7 +2237,7 @@ class YoutubeDL(object): matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] - except IndexError: + except LazyList.IndexError: return filters = [self._build_format_filter(f) for f in selector.filters] diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 84b2603df..ba9566cab 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -778,7 +778,7 @@ def sanitize_path(s, force=False): for path_part in norm_path] if drive_or_unc: sanitized_path.insert(0, drive_or_unc + os.path.sep) - elif force and s[0] == os.path.sep: + elif force and s and s[0] == os.path.sep: sanitized_path.insert(0, os.path.sep) return os.path.join(*sanitized_path) -- cgit v1.2.3 From 98804d034d04d21cbeb8cd43d1e1d90f1cdae836 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 10 Apr 2022 01:23:27 +0900 Subject: [utils] locked_file: Do not give executable bits for newly created files Authored by: Lesmiscore --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ba9566cab..14dbbf59f 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2236,7 +2236,7 @@ class locked_file(object): os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY, )) - self.f = os.fdopen(os.open(filename, flags), mode, encoding=encoding) + self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding) def __enter__(self): exclusive = 'r' not in self.mode -- cgit v1.2.3 From f894294636989788f02b917037f1f4a771595489 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Apr 2022 21:19:00 +0530 Subject: [EmbedThumbnail] Do not remove id3v1 tags --- yt_dlp/postprocessor/embedthumbnail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 84ab54f44..057007f2e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -101,7 +101,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): success = True if info['ext'] == 'mp3': options = [ - '-c', 'copy', '-map', '0:0', '-map', '1:0', '-id3v2_version', '3', + '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"'] self._report_run('ffmpeg', filename) -- cgit v1.2.3 From a25bca9f89f77e6e5153c3400c4a27020d8cba9d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Apr 2022 01:00:21 +0530 Subject: [youtube, cleanup] Minor refactoring Authored by: coletdjnz, pukkandan --- yt_dlp/extractor/youtube.py | 71 ++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 031aa35a1..4ee09ad9a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -384,6 +384,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _real_initialize(self): self._initialize_pref() self._initialize_consent() + self._check_login_required() + + def _check_login_required(self): if (self._LOGIN_REQUIRED and self.get_param('cookiefile') is None and self.get_param('cookiesfrombrowser') is None): @@ -563,6 +566,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor): headers['X-Origin'] = origin return {h: v for h, v in headers.items() if v is not None} + def _download_ytcfg(self, client, video_id): + url = { + 'web': 'https://www.youtube.com', + 'web_music': 'https://music.youtube.com', + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + }.get(client) + if not url: + return {} + webpage = self._download_webpage( + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + return self.extract_ytcfg(video_id, webpage) or {} + @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { @@ -728,6 +743,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return None def _extract_time_text(self, renderer, *path_list): + """@returns (timestamp, time_text)""" text = self._get_text(renderer, *path_list) or '' dt = self.extract_relative_time(text) timestamp = None @@ -2959,16 +2975,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return orderedSet(requested_clients) - def _extract_player_ytcfg(self, client, video_id): - url = { - 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' - }.get(client) - if not url: - return {} - webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip()) - return self.extract_ytcfg(video_id, webpage) or {} - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): initial_pr = None if webpage: @@ -3005,8 +3011,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): while clients: client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip'): - player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg + if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') @@ -4347,6 +4353,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') + @property + def skip_webpage(self): + return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) + def _extract_webpage(self, url, item_id, fatal=True): retries = self.get_param('extractor_retries', 3) count = -1 @@ -4393,9 +4403,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): return webpage, data + def _report_playlist_authcheck(self, ytcfg, fatal=True): + """Use if failed to extract ytcfg (and data) from initial webpage""" + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download' + if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal: + raise ExtractorError( + f'{msg}. If you are not downloading private content, or ' + 'your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): data = None - if 'webpage' not in self._configuration_arg('skip'): + if not self.skip_webpage: webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) # Reject webpage data if redirected to home page without explicitly requesting @@ -4409,14 +4431,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): raise ExtractorError(msg, expected=True) self.report_warning(msg, only_once=True) if not data: - if not ytcfg and self.is_authenticated: - msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' - if 'authcheck' not in self._configuration_arg('skip') and fatal: - raise ExtractorError( - msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' - ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', - expected=True) - self.report_warning(msg, only_once=True) + self._report_playlist_authcheck(ytcfg, fatal=fatal) data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) return data, ytcfg @@ -4454,14 +4469,20 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'), ('continuationContents', ), ) + display_id = f'query "{query}"' check_get_keys = tuple(set(keys[0] for keys in content_keys)) + ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg, fatal=False) continuation_list = [None] + search = None for page_num in itertools.count(1): data.update(continuation_list[0] or {}) + headers = self.generate_api_headers( + ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client) search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - default_client=default_client, check_get_keys=check_get_keys) + item_id=f'{display_id} page {page_num}', ep='search', query=data, + default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers) slr_contents = traverse_obj(search, *content_keys) yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list) if not continuation_list[0]: @@ -5634,7 +5655,9 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): Subclasses must define the _FEED_NAME property. """ _LOGIN_REQUIRED = True - _TESTS = [] + + def _real_initialize(self): + YoutubeBaseInfoExtractor._check_login_required(self) @property def IE_NAME(self): -- cgit v1.2.3 From 97ec5bc550e0e34f3e79cdbfb5ad9d81b228ceb8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Apr 2022 01:01:48 +0530 Subject: [cookies] Report progress when importing cookies --- yt_dlp/YoutubeDL.py | 16 ++++--- yt_dlp/cookies.py | 124 +++++++++++++++++++++++++++++++++++---------------- yt_dlp/minicurses.py | 2 +- 3 files changed, 96 insertions(+), 46 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d4f8d8cab..fef05d517 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -643,6 +643,11 @@ class YoutubeDL(object): else: raise + if auto_init: + if auto_init != 'no_verbose_header': + self.print_debug_header() + self.add_default_info_extractors() + if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not self.params.get('restrictfilenames', False)): @@ -664,13 +669,6 @@ class YoutubeDL(object): # Set http_headers defaults according to std_headers self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {})) - self._setup_opener() - - if auto_init: - if auto_init != 'no_verbose_header': - self.print_debug_header() - self.add_default_info_extractors() - hooks = { 'post_hooks': self.add_post_hook, 'progress_hooks': self.add_progress_hook, @@ -687,6 +685,7 @@ class YoutubeDL(object): get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), when=when) + self._setup_opener() register_socks_protocols() def preload_download_archive(fn): @@ -3698,6 +3697,7 @@ class YoutubeDL(object): delim=', ') or 'none' write_debug('Optional libraries: %s' % lib_str) + self._setup_opener() proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): @@ -3717,6 +3717,8 @@ class YoutubeDL(object): latest_version) def _setup_opener(self): + if hasattr(self, '_opener'): + return timeout_val = self.params.get('socket_timeout') self._socket_timeout = 20 if timeout_val is None else float(timeout_val) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 1f08a3664..3476595d3 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -20,6 +20,7 @@ from .compat import ( compat_b64decode, compat_cookiejar_Cookie, ) +from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import ( error_to_str, expand_path, @@ -73,6 +74,32 @@ class YDLLogger: if self._ydl: self._ydl.report_error(message) + def progress_bar(self): + """Return a context manager with a print method. (Optional)""" + # Do not print to files/pipes, loggers, or when --no-progress is used + if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'): + return + file = self._ydl._out_files['error'] + try: + if not file.isatty(): + return + except BaseException: + return + + printer = MultilinePrinter(file, preserve_output=False) + printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0) + return printer + + +def _create_progress_bar(logger): + if hasattr(logger, 'progress_bar'): + printer = logger.progress_bar() + if printer: + return printer + printer = QuietMultilinePrinter() + printer.print = lambda _: None + return printer + def load_cookies(cookie_file, browser_specification, ydl): cookie_jars = [] @@ -115,7 +142,7 @@ def _extract_firefox_cookies(profile, logger): else: search_root = os.path.join(_firefox_browser_dir(), profile) - cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') + cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) if cookie_database_path is None: raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) @@ -126,13 +153,17 @@ def _extract_firefox_cookies(profile, logger): cursor = _open_database_copy(cookie_database_path, tmpdir) cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') jar = YoutubeDLCookieJar() - for host, name, value, path, expiry, is_secure in cursor.fetchall(): - cookie = compat_cookiejar_Cookie( - version=0, name=name, value=value, port=None, port_specified=False, - domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), - path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, - comment=None, comment_url=None, rest={}) - jar.set_cookie(cookie) + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, (host, name, value, path, expiry, is_secure) in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + cookie = compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, + comment=None, comment_url=None, rest={}) + jar.set_cookie(cookie) logger.info('Extracted {} cookies from firefox'.format(len(jar))) return jar finally: @@ -232,7 +263,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.error('{} does not support profiles'.format(browser_name)) search_root = config['browser_dir'] - cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') + cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) if cookie_database_path is None: raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) @@ -251,26 +282,18 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 - for host_key, name, value, encrypted_value, path, expires_utc, is_secure in cursor.fetchall(): - host_key = host_key.decode('utf-8') - name = name.decode('utf-8') - value = value.decode('utf-8') - path = path.decode('utf-8') - - if not value and encrypted_value: - value = decryptor.decrypt(encrypted_value) - if value is None: + with _create_progress_bar(logger) as progress_bar: + table = cursor.fetchall() + total_cookie_count = len(table) + for i, line in enumerate(table): + progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') + is_encrypted, cookie = _process_chrome_cookie(decryptor, *line) + if not cookie: failed_cookies += 1 continue - else: - unencrypted_cookies += 1 - - cookie = compat_cookiejar_Cookie( - version=0, name=name, value=value, port=None, port_specified=False, - domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), - path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, - comment=None, comment_url=None, rest={}) - jar.set_cookie(cookie) + elif not is_encrypted: + unencrypted_cookies += 1 + jar.set_cookie(cookie) if failed_cookies > 0: failed_message = ' ({} could not be decrypted)'.format(failed_cookies) else: @@ -285,6 +308,25 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): cursor.connection.close() +def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): + host_key = host_key.decode('utf-8') + name = name.decode('utf-8') + value = value.decode('utf-8') + path = path.decode('utf-8') + is_encrypted = not value and encrypted_value + + if is_encrypted: + value = decryptor.decrypt(encrypted_value) + if value is None: + return is_encrypted, None + + return is_encrypted, compat_cookiejar_Cookie( + version=0, name=name, value=value, port=None, port_specified=False, + domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), + path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, + comment=None, comment_url=None, rest={}) + + class ChromeCookieDecryptor: """ Overview: @@ -547,10 +589,12 @@ def _parse_safari_cookies_page(data, jar, logger): p.skip_to(record_offsets[0], 'unknown page header field') - for record_offset in record_offsets: - p.skip_to(record_offset, 'space between records') - record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) - p.read_bytes(record_length) + with _create_progress_bar(logger) as progress_bar: + for i, record_offset in enumerate(record_offsets): + progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}') + p.skip_to(record_offset, 'space between records') + record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) + p.read_bytes(record_length) p.skip_to_end('space in between pages') @@ -830,10 +874,11 @@ def _get_mac_keyring_password(browser_keyring_name, logger): def _get_windows_v10_key(browser_root, logger): - path = _find_most_recently_used_file(browser_root, 'Local State') + path = _find_most_recently_used_file(browser_root, 'Local State', logger) if path is None: logger.error('could not find local state file') return None + logger.debug(f'Found local state file at "{path}"') with open(path, 'r', encoding='utf8') as f: data = json.load(f) try: @@ -925,13 +970,16 @@ def _get_column_names(cursor, table_name): return [row[1].decode('utf-8') for row in table_info] -def _find_most_recently_used_file(root, filename): +def _find_most_recently_used_file(root, filename, logger): # if there are multiple browser profiles, take the most recently used one - paths = [] - for root, dirs, files in os.walk(root): - for file in files: - if file == filename: - paths.append(os.path.join(root, file)) + i, paths = 0, [] + with _create_progress_bar(logger) as progress_bar: + for curr_root, dirs, files in os.walk(root): + for file in files: + i += 1 + progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') + if file == filename: + paths.append(os.path.join(curr_root, file)) return None if not paths else max(paths, key=lambda path: os.lstat(path).st_mtime) diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index f9f99e390..d7a8ffddd 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -178,4 +178,4 @@ class MultilinePrinter(MultilinePrinterBase): *text, CONTROL_SEQUENCES['ERASE_LINE'], f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum) else: - self.write(*text, ' ' * self._lastlength) + self.write('\r', ' ' * self._lastlength, '\r') -- cgit v1.2.3 From ca5300c7edadad46ede0249ad9fa8feaa4ccddd4 Mon Sep 17 00:00:00 2001 From: krichbanana <77071421+krichbanana@users.noreply.github.com> Date: Sat, 9 Apr 2022 15:55:24 -0400 Subject: [youtube] Add `:ytnotifications` extractor (#3347) Authored by: krichbanana --- README.md | 2 +- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/youtube.py | 89 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1624a1fcb..f4b55f6d7 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that the NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **Youtube improvements**: - * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content + * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) and private playlists supports downloading multiple pages of content * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content * Some (but not all) age-gated content can be downloaded without cookies diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bc06ab463..0cb686304 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -2100,6 +2100,7 @@ from .youtube import ( YoutubeIE, YoutubeClipIE, YoutubeFavouritesIE, + YoutubeNotificationsIE, YoutubeHistoryIE, YoutubeTabIE, YoutubeLivestreamEmbedIE, diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4ee09ad9a..f284487b8 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5526,6 +5526,95 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): ie=YoutubeTabIE.ie_key()) +class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor): + IE_NAME = 'youtube:notif' + IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)' + _VALID_URL = r':ytnotif(?:ication)?s?' + _LOGIN_REQUIRED = True + _TESTS = [{ + 'url': ':ytnotif', + 'only_matching': True, + }, { + 'url': ':ytnotifications', + 'only_matching': True, + }] + + def _extract_notification_menu(self, response, continuation_list): + notification_list = traverse_obj( + response, + ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'), + ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'), + expected_type=list) or [] + continuation_list[0] = None + for item in notification_list: + entry = self._extract_notification_renderer(item.get('notificationRenderer')) + if entry: + yield entry + continuation = item.get('continuationItemRenderer') + if continuation: + continuation_list[0] = continuation + + def _extract_notification_renderer(self, notification): + video_id = traverse_obj( + notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) + url = f'https://www.youtube.com/watch?v={video_id}' + channel_id = None + if not video_id: + browse_ep = traverse_obj( + notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict) + channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str) + post_id = self._search_regex( + r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str), + 'post id', default=None) + if not channel_id or not post_id: + return + # The direct /post url redirects to this in the browser + url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}' + + channel = traverse_obj( + notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'), + expected_type=str) + title = self._search_regex( + rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'), + 'video title', default=None) + if title: + title = title.replace('\xad', '') # remove soft hyphens + upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d') + if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key()) + else None) + return { + '_type': 'url', + 'url': url, + 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(), + 'video_id': video_id, + 'title': title, + 'channel_id': channel_id, + 'channel': channel, + 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'), + 'upload_date': upload_date, + } + + def _notification_menu_entries(self, ytcfg): + continuation_list = [None] + response = None + for page in itertools.count(1): + ctoken = traverse_obj( + continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str) + response = self._extract_response( + item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg, + ep='notification/get_notification_menu', check_get_keys='actions', + headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))) + yield from self._extract_notification_menu(response, continuation_list) + if not continuation_list[0]: + break + + def _real_extract(self, url): + display_id = 'notifications' + ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg) + return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id) + + class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' -- cgit v1.2.3