aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/abematv.py16
-rw-r--r--yt_dlp/extractor/ant1newsgr.py4
-rw-r--r--yt_dlp/extractor/common.py59
-rw-r--r--yt_dlp/extractor/frontendmasters.py4
-rw-r--r--yt_dlp/extractor/iqiyi.py2
-rw-r--r--yt_dlp/extractor/periscope.py2
-rw-r--r--yt_dlp/extractor/soundcloud.py16
-rw-r--r--yt_dlp/extractor/sovietscloset.py2
-rw-r--r--yt_dlp/extractor/youtube.py2
-rw-r--r--yt_dlp/extractor/zingmp3.py4
10 files changed, 57 insertions, 54 deletions
diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py
index 66b12c72f..360fa4699 100644
--- a/yt_dlp/extractor/abematv.py
+++ b/yt_dlp/extractor/abematv.py
@@ -8,10 +8,6 @@ import struct
from base64 import urlsafe_b64encode
from binascii import unhexlify
-import typing
-if typing.TYPE_CHECKING:
- from ..YoutubeDL import YoutubeDL
-
from .common import InfoExtractor
from ..aes import aes_ecb_decrypt
from ..compat import (
@@ -36,15 +32,15 @@ from ..utils import (
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
-def add_opener(self: 'YoutubeDL', handler):
+def add_opener(ydl, handler):
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- assert isinstance(self._opener, compat_urllib_request.OpenerDirector)
- self._opener.add_handler(handler)
+ assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
+ ydl._opener.add_handler(handler)
-def remove_opener(self: 'YoutubeDL', handler):
+def remove_opener(ydl, handler):
'''
Remove handler(s) for opening URLs
@param handler Either handler object itself or handler type.
@@ -52,8 +48,8 @@ def remove_opener(self: 'YoutubeDL', handler):
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
- opener = self._opener
- assert isinstance(self._opener, compat_urllib_request.OpenerDirector)
+ opener = ydl._opener
+ assert isinstance(ydl._opener, compat_urllib_request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py
index 7d70e0427..1075b461e 100644
--- a/yt_dlp/extractor/ant1newsgr.py
+++ b/yt_dlp/extractor/ant1newsgr.py
@@ -97,8 +97,8 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE):
embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage))
if not embed_urls:
raise ExtractorError('no videos found for %s' % video_id, expected=True)
- return self.url_result_or_playlist_from_matches(
- embed_urls, video_id, info['title'], ie=Ant1NewsGrEmbedIE.ie_key(),
+ return self.playlist_from_matches(
+ embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(),
video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')})
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 5b7de1296..354814433 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -226,6 +226,7 @@ class InfoExtractor(object):
The following fields are optional:
+ direct: True if a direct video file was given (must only be set by GenericIE)
alt_title: A secondary title of the video.
display_id An alternative identifier for the video, not necessarily
unique, but available before title. Typically, id is
@@ -274,7 +275,7 @@ class InfoExtractor(object):
* "url": A URL pointing to the subtitles file
It can optionally also have:
* "name": Name or description of the subtitles
- * http_headers: A dictionary of additional HTTP headers
+ * "http_headers": A dictionary of additional HTTP headers
to add to the request.
"ext" will be calculated from URL if missing
automatic_captions: Like 'subtitles'; contains automatically generated
@@ -425,8 +426,8 @@ class InfoExtractor(object):
title, description etc.
- Subclasses of this one should re-define the _real_initialize() and
- _real_extract() methods and define a _VALID_URL regexp.
+ Subclasses of this should define a _VALID_URL regexp and, re-define the
+ _real_extract() and (optionally) _real_initialize() methods.
Probably, they should also be added to the list of extractors.
Subclasses may also override suitable() if necessary, but ensure the function
@@ -661,7 +662,7 @@ class InfoExtractor(object):
return False
def set_downloader(self, downloader):
- """Sets the downloader for this IE."""
+ """Sets a YoutubeDL instance as the downloader for this IE."""
self._downloader = downloader
def _real_initialize(self):
@@ -670,7 +671,7 @@ class InfoExtractor(object):
def _real_extract(self, url):
"""Real extraction process. Redefine in subclasses."""
- pass
+ raise NotImplementedError('This method must be implemented by subclasses')
@classmethod
def ie_key(cls):
@@ -1661,31 +1662,31 @@ class InfoExtractor(object):
'format_id': {'type': 'alias', 'field': 'id'},
'preference': {'type': 'alias', 'field': 'ie_pref'},
'language_preference': {'type': 'alias', 'field': 'lang'},
-
- # Deprecated
- 'dimension': {'type': 'alias', 'field': 'res'},
- 'resolution': {'type': 'alias', 'field': 'res'},
- 'extension': {'type': 'alias', 'field': 'ext'},
- 'bitrate': {'type': 'alias', 'field': 'br'},
- 'total_bitrate': {'type': 'alias', 'field': 'tbr'},
- 'video_bitrate': {'type': 'alias', 'field': 'vbr'},
- 'audio_bitrate': {'type': 'alias', 'field': 'abr'},
- 'framerate': {'type': 'alias', 'field': 'fps'},
- 'protocol': {'type': 'alias', 'field': 'proto'},
'source_preference': {'type': 'alias', 'field': 'source'},
+ 'protocol': {'type': 'alias', 'field': 'proto'},
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
- 'filesize_estimate': {'type': 'alias', 'field': 'size'},
- 'samplerate': {'type': 'alias', 'field': 'asr'},
- 'video_ext': {'type': 'alias', 'field': 'vext'},
- 'audio_ext': {'type': 'alias', 'field': 'aext'},
- 'video_codec': {'type': 'alias', 'field': 'vcodec'},
- 'audio_codec': {'type': 'alias', 'field': 'acodec'},
- 'video': {'type': 'alias', 'field': 'hasvid'},
- 'has_video': {'type': 'alias', 'field': 'hasvid'},
- 'audio': {'type': 'alias', 'field': 'hasaud'},
- 'has_audio': {'type': 'alias', 'field': 'hasaud'},
- 'extractor': {'type': 'alias', 'field': 'ie_pref'},
- 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'},
+
+ # Deprecated
+ 'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
+ 'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
+ 'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
+ 'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
+ 'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
+ 'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
+ 'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
+ 'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
+ 'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
+ 'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
+ 'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
+ 'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
+ 'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
+ 'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
+ 'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
+ 'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
+ 'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
}
def __init__(self, ie, field_preference):
@@ -1785,7 +1786,7 @@ class InfoExtractor(object):
continue
if self._get_field_setting(field, 'type') == 'alias':
alias, field = field, self._get_field_setting(field, 'field')
- if alias not in ('format_id', 'preference', 'language_preference'):
+ if self._get_field_setting(alias, 'deprecated'):
self.ydl.deprecation_warning(
f'Format sorting alias {alias} is deprecated '
f'and may be removed in a future version. Please use {field} instead')
diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py
index 40b8cb0b4..0d29da29b 100644
--- a/yt_dlp/extractor/frontendmasters.py
+++ b/yt_dlp/extractor/frontendmasters.py
@@ -252,9 +252,9 @@ class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
entries = []
for lesson in lessons:
lesson_name = lesson.get('slug')
- if not lesson_name:
- continue
lesson_id = lesson.get('hash') or lesson.get('statsId')
+ if not lesson_id or not lesson_name:
+ continue
entries.append(self._extract_lesson(chapters, lesson_id, lesson))
title = course.get('title')
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py
index 74e20a54a..fdcf14469 100644
--- a/yt_dlp/extractor/iqiyi.py
+++ b/yt_dlp/extractor/iqiyi.py
@@ -621,7 +621,7 @@ class IqIE(InfoExtractor):
preview_time = traverse_obj(
initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False)
if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none):
- self.report_warning('This preview video is limited%s' % format_field(preview_time, template='to %s seconds'))
+ self.report_warning('This preview video is limited%s' % format_field(preview_time, template=' to %s seconds'))
# TODO: Extract audio-only formats
for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py
index b93a02b7d..1a292b8ac 100644
--- a/yt_dlp/extractor/periscope.py
+++ b/yt_dlp/extractor/periscope.py
@@ -33,7 +33,7 @@ class PeriscopeBaseIE(InfoExtractor):
return {
'id': broadcast.get('id') or video_id,
- 'title': self._live_title(title) if is_live else title,
+ 'title': title,
'timestamp': parse_iso8601(broadcast.get('created_at')),
'uploader': uploader,
'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 8146b3ef5..64b8a71b6 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -59,8 +59,16 @@ class SoundcloudEmbedIE(InfoExtractor):
class SoundcloudBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'soundcloud'
+
_API_V2_BASE = 'https://api-v2.soundcloud.com/'
_BASE_URL = 'https://soundcloud.com/'
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
+ _access_token = None
+ _HEADERS = {}
def _store_client_id(self, client_id):
self._downloader.cache.store('soundcloud', 'client_id', client_id)
@@ -103,14 +111,6 @@ class SoundcloudBaseIE(InfoExtractor):
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
self._login()
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
- _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
- _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
- _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
- _access_token = None
- _HEADERS = {}
- _NETRC_MACHINE = 'soundcloud'
-
def _login(self):
username, password = self._get_login_info()
if username is None:
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index daf1c7450..4bc2263f0 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -67,6 +67,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'series': 'The Witcher',
'season': 'Misc',
'episode_number': 13,
+ 'episode': 'Episode 13',
},
},
{
@@ -92,6 +93,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
'series': 'Arma 3',
'season': 'Zeus Games',
'episode_number': 3,
+ 'episode': 'Episode 3',
},
},
]
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 041815a19..6451c08c0 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3094,6 +3094,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
+ if is_damaged:
+ self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py
index f84ba5cff..22c62e22e 100644
--- a/yt_dlp/extractor/zingmp3.py
+++ b/yt_dlp/extractor/zingmp3.py
@@ -149,7 +149,7 @@ class ZingMp3IE(ZingMp3BaseIE):
},
}, {
'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html',
- 'md5': 'e9c972b693aa88301ef981c8151c4343',
+ 'md5': 'c7f23d971ac1a4f675456ed13c9b9612',
'info_dict': {
'id': 'ZO8ZF7C7',
'title': 'Sương Hoa Đưa Lối',
@@ -158,6 +158,8 @@ class ZingMp3IE(ZingMp3BaseIE):
'duration': 207,
'track': 'Sương Hoa Đưa Lối',
'artist': 'K-ICM, RYO',
+ 'album': 'Sương Hoa Đưa Lối (Single)',
+ 'album_artist': 'K-ICM, RYO',
},
}, {
'url': 'https://zingmp3.vn/embed/song/ZWZEI76B?start=false',