diff options
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r-- | yt_dlp/extractor/abematv.py | 16 | ||||
-rw-r--r-- | yt_dlp/extractor/ant1newsgr.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 59 | ||||
-rw-r--r-- | yt_dlp/extractor/frontendmasters.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/iqiyi.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/periscope.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/soundcloud.py | 16 | ||||
-rw-r--r-- | yt_dlp/extractor/sovietscloset.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/zingmp3.py | 4 |
10 files changed, 57 insertions, 54 deletions
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 66b12c72f..360fa4699 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -8,10 +8,6 @@ import struct from base64 import urlsafe_b64encode from binascii import unhexlify -import typing -if typing.TYPE_CHECKING: - from ..YoutubeDL import YoutubeDL - from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..compat import ( @@ -36,15 +32,15 @@ from ..utils import ( # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) -def add_opener(self: 'YoutubeDL', handler): +def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - assert isinstance(self._opener, compat_urllib_request.OpenerDirector) - self._opener.add_handler(handler) + assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) + ydl._opener.add_handler(handler) -def remove_opener(self: 'YoutubeDL', handler): +def remove_opener(ydl, handler): ''' Remove handler(s) for opening URLs @param handler Either handler object itself or handler type. @@ -52,8 +48,8 @@ def remove_opener(self: 'YoutubeDL', handler): ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 - opener = self._opener - assert isinstance(self._opener, compat_urllib_request.OpenerDirector) + opener = ydl._opener + assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector) if isinstance(handler, (type, tuple)): find_cp = lambda x: isinstance(x, handler) else: diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index 7d70e0427..1075b461e 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -97,8 +97,8 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage)) if not embed_urls: raise ExtractorError('no videos found for %s' % video_id, expected=True) - return self.url_result_or_playlist_from_matches( - embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(), + return self.playlist_from_matches( + embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5b7de1296..354814433 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -226,6 +226,7 @@ class InfoExtractor(object): The following fields are optional: + direct: True if a direct video file was given (must only be set by GenericIE) alt_title: A secondary title of the video. display_id An alternative identifier for the video, not necessarily unique, but available before title. Typically, id is @@ -274,7 +275,7 @@ class InfoExtractor(object): * "url": A URL pointing to the subtitles file It can optionally also have: * "name": Name or description of the subtitles - * http_headers: A dictionary of additional HTTP headers + * "http_headers": A dictionary of additional HTTP headers to add to the request. "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles'; contains automatically generated @@ -425,8 +426,8 @@ class InfoExtractor(object): title, description etc. - Subclasses of this one should re-define the _real_initialize() and - _real_extract() methods and define a _VALID_URL regexp. + Subclasses of this should define a _VALID_URL regexp and, re-define the + _real_extract() and (optionally) _real_initialize() methods. Probably, they should also be added to the list of extractors. Subclasses may also override suitable() if necessary, but ensure the function @@ -661,7 +662,7 @@ class InfoExtractor(object): return False def set_downloader(self, downloader): - """Sets the downloader for this IE.""" + """Sets a YoutubeDL instance as the downloader for this IE.""" self._downloader = downloader def _real_initialize(self): @@ -670,7 +671,7 @@ class InfoExtractor(object): def _real_extract(self, url): """Real extraction process. Redefine in subclasses.""" - pass + raise NotImplementedError('This method must be implemented by subclasses') @classmethod def ie_key(cls): @@ -1661,31 +1662,31 @@ class InfoExtractor(object): 'format_id': {'type': 'alias', 'field': 'id'}, 'preference': {'type': 'alias', 'field': 'ie_pref'}, 'language_preference': {'type': 'alias', 'field': 'lang'}, - - # Deprecated - 'dimension': {'type': 'alias', 'field': 'res'}, - 'resolution': {'type': 'alias', 'field': 'res'}, - 'extension': {'type': 'alias', 'field': 'ext'}, - 'bitrate': {'type': 'alias', 'field': 'br'}, - 'total_bitrate': {'type': 'alias', 'field': 'tbr'}, - 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, - 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, - 'framerate': {'type': 'alias', 'field': 'fps'}, - 'protocol': {'type': 'alias', 'field': 'proto'}, 'source_preference': {'type': 'alias', 'field': 'source'}, + 'protocol': {'type': 'alias', 'field': 'proto'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, - 'filesize_estimate': {'type': 'alias', 'field': 'size'}, - 'samplerate': {'type': 'alias', 'field': 'asr'}, - 'video_ext': {'type': 'alias', 'field': 'vext'}, - 'audio_ext': {'type': 'alias', 'field': 'aext'}, - 'video_codec': {'type': 'alias', 'field': 'vcodec'}, - 'audio_codec': {'type': 'alias', 'field': 'acodec'}, - 'video': {'type': 'alias', 'field': 'hasvid'}, - 'has_video': {'type': 'alias', 'field': 'hasvid'}, - 'audio': {'type': 'alias', 'field': 'hasaud'}, - 'has_audio': {'type': 'alias', 'field': 'hasaud'}, - 'extractor': {'type': 'alias', 'field': 'ie_pref'}, - 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'}, + + # Deprecated + 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True}, + 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True}, + 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True}, + 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True}, + 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True}, + 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True}, + 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True}, + 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True}, + 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True}, + 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True}, + 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True}, + 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True}, + 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True}, + 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True}, + 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True}, + 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, + 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True}, } def __init__(self, ie, field_preference): @@ -1785,7 +1786,7 @@ class InfoExtractor(object): continue if self._get_field_setting(field, 'type') == 'alias': alias, field = field, self._get_field_setting(field, 'field') - if alias not in ('format_id', 'preference', 'language_preference'): + if self._get_field_setting(alias, 'deprecated'): self.ydl.deprecation_warning( f'Format sorting alias {alias} is deprecated ' f'and may be removed in a future version. Please use {field} instead') diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index 40b8cb0b4..0d29da29b 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE): entries = [] for lesson in lessons: lesson_name = lesson.get('slug') - if not lesson_name: - continue lesson_id = lesson.get('hash') or lesson.get('statsId') + if not lesson_id or not lesson_name: + continue entries.append(self._extract_lesson(chapters, lesson_id, lesson)) title = course.get('title') diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 74e20a54a..fdcf14469 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -621,7 +621,7 @@ class IqIE(InfoExtractor): preview_time = traverse_obj( initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False) if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none): - self.report_warning('This preview video is limited%s' % format_field(preview_time, template='to %s seconds')) + self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds')) # TODO: Extract audio-only formats for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])): diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index b93a02b7d..1a292b8ac 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -33,7 +33,7 @@ class PeriscopeBaseIE(InfoExtractor): return { 'id': broadcast.get('id') or video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'timestamp': parse_iso8601(broadcast.get('created_at')), 'uploader': uploader, 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 8146b3ef5..64b8a71b6 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -59,8 +59,16 @@ class SoundcloudEmbedIE(InfoExtractor): class SoundcloudBaseIE(InfoExtractor): + _NETRC_MACHINE = 'soundcloud' + _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' + _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' + _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' + _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s' + _access_token = None + _HEADERS = {} def _store_client_id(self, client_id): self._downloader.cache.store('soundcloud', 'client_id', client_id) @@ -103,14 +111,6 @@ class SoundcloudBaseIE(InfoExtractor): self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' self._login() - _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' - _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' - _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' - _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s' - _access_token = None - _HEADERS = {} - _NETRC_MACHINE = 'soundcloud' - def _login(self): username, password = self._get_login_info() if username is None: diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index daf1c7450..4bc2263f0 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -67,6 +67,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'series': 'The Witcher', 'season': 'Misc', 'episode_number': 13, + 'episode': 'Episode 13', }, }, { @@ -92,6 +93,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'series': 'Arma 3', 'season': 'Zeus Games', 'episode_number': 3, + 'episode': 'Episode 3', }, }, ] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 041815a19..6451c08c0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3094,6 +3094,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Some formats may have much smaller duration than others (possibly damaged during encoding) # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) + if is_damaged: + self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index f84ba5cff..22c62e22e 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -149,7 +149,7 @@ class ZingMp3IE(ZingMp3BaseIE): }, }, { 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', - 'md5': 'e9c972b693aa88301ef981c8151c4343', + 'md5': 'c7f23d971ac1a4f675456ed13c9b9612', 'info_dict': { 'id': 'ZO8ZF7C7', 'title': 'Sương Hoa Đưa Lối', @@ -158,6 +158,8 @@ class ZingMp3IE(ZingMp3BaseIE): 'duration': 207, 'track': 'Sương Hoa Đưa Lối', 'artist': 'K-ICM, RYO', + 'album': 'Sương Hoa Đưa Lối (Single)', + 'album_artist': 'K-ICM, RYO', }, }, { 'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false', |