aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-12-02 22:55:03 -0500
committerJesús <heckyel@hyperbola.info>2021-12-02 22:55:03 -0500
commit409d6c8e3cfb4330f9a797eb5784b94c00fb4e35 (patch)
treed98bf2c149a55eca565987ee6fb1c14f271b5f72
parent7739b37f957b098d237179534d1b8c20597d55b6 (diff)
parentd2b2fca53f635986918e364ee5b564d8e7d8af7e (diff)
downloadhypervideo-pre-409d6c8e3cfb4330f9a797eb5784b94c00fb4e35.tar.lz
hypervideo-pre-409d6c8e3cfb4330f9a797eb5784b94c00fb4e35.tar.xz
hypervideo-pre-409d6c8e3cfb4330f9a797eb5784b94c00fb4e35.zip
updated from upstream | 02/12/2021 at 22:55
-rw-r--r--yt_dlp/extractor/common.py10
-rw-r--r--yt_dlp/extractor/niconico.py56
-rw-r--r--yt_dlp/extractor/rai.py28
-rw-r--r--yt_dlp/extractor/soundcloud.py289
-rw-r--r--yt_dlp/utils.py4
5 files changed, 199 insertions, 188 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 597db63d1..2180f879c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3548,14 +3548,18 @@ class InfoExtractor(object):
def extractor():
comments = []
+ interrupted = True
try:
while True:
comments.append(next(generator))
- except KeyboardInterrupt:
- interrupted = True
- self.to_screen('Interrupted by user')
except StopIteration:
interrupted = False
+ except KeyboardInterrupt:
+ self.to_screen('Interrupted by user')
+ except Exception as e:
+ if self.get_param('ignoreerrors') is not True:
+ raise
+ self._downloader.report_error(e)
comment_count = len(comments)
self.to_screen(f'Extracted {comment_count} comments')
return {
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index b46ca293f..4fcf1d8ed 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -662,11 +662,32 @@ class NiconicoPlaylistIE(InfoExtractor):
}
-NicovideoSearchIE_NAME = 'nicovideo:search'
+class NicovideoSearchBaseIE(InfoExtractor):
+ def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
+ query = query or {}
+ pages = [query['page']] if 'page' in query else itertools.count(1)
+ for page_num in pages:
+ query['page'] = str(page_num)
+ webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
+ results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
+ for item in results:
+ yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
+ if not results:
+ break
+
+
+class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
+ IE_DESC = 'Nico video search'
+ IE_NAME = 'nicovideo:search'
+ _SEARCH_KEY = 'nicosearch'
+ def _search_results(self, query):
+ return self._entries(
+ self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-class NicovideoSearchURLIE(InfoExtractor):
- IE_NAME = f'{NicovideoSearchIE_NAME}_url'
+
+class NicovideoSearchURLIE(NicovideoSearchBaseIE):
+ IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
IE_DESC = 'Nico video search URLs'
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
_TESTS = [{
@@ -685,37 +706,14 @@ class NicovideoSearchURLIE(InfoExtractor):
'playlist_count': 31,
}]
- def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
- query = query or {}
- pages = [query['page']] if 'page' in query else itertools.count(1)
- for page_num in pages:
- query['page'] = str(page_num)
- webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
- results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
- for item in results:
- yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
- if not results:
- break
-
def _real_extract(self, url):
query = self._match_id(url)
return self.playlist_result(self._entries(url, query), query, query)
-class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE):
- IE_DESC = 'Nico video search'
- IE_NAME = NicovideoSearchIE_NAME
- _SEARCH_KEY = 'nicosearch'
- _TESTS = []
-
- def _search_results(self, query):
- return self._entries(
- self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-
-
-class NicovideoSearchDateIE(NicovideoSearchIE):
+class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
IE_DESC = 'Nico video search, newest first'
- IE_NAME = f'{NicovideoSearchIE_NAME}:date'
+ IE_NAME = f'{NicovideoSearchIE.IE_NAME}:date'
_SEARCH_KEY = 'nicosearchdate'
_TESTS = [{
'url': 'nicosearchdateall:a',
@@ -756,7 +754,7 @@ class NicovideoSearchDateIE(NicovideoSearchIE):
if page_num:
query['page'] = str(page_num)
- yield from NicovideoSearchURLIE._entries(self, url, item_id, query=query, note=note)
+ yield from super()._entries(url, item_id, query=query, note=note)
class NiconicoUserIE(InfoExtractor):
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
index 6aa62c955..4699fe17e 100644
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@@ -17,6 +17,7 @@ from ..utils import (
get_element_by_class,
HEADRequest,
int_or_none,
+ join_nonempty,
parse_duration,
parse_list,
remove_start,
@@ -138,6 +139,9 @@ class RaiBaseIE(InfoExtractor):
return False if resp.url == url else resp.url
return None
+ # filter out audio-only formats
+ fmts = [f for f in fmts if not f.get('vcodec') == 'none']
+
def get_format_info(tbr):
import math
br = int_or_none(tbr)
@@ -229,7 +233,7 @@ class RaiPlayIE(RaiBaseIE):
'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
'ext': 'mp4',
'title': 'Report del 07/04/2014',
- 'alt_title': 'St 2013/14 - Espresso nel caffè - 07/04/2014',
+ 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014',
'description': 'md5:d730c168a58f4bb35600fc2f881ec04e',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai Gulp',
@@ -237,7 +241,7 @@ class RaiPlayIE(RaiBaseIE):
'series': 'Report',
'season': '2013/14',
'subtitles': {
- 'it': 'count:2',
+ 'it': 'count:4',
},
},
'params': {
@@ -245,18 +249,18 @@ class RaiPlayIE(RaiBaseIE):
},
}, {
# 1080p direct mp4 url
- 'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
- 'md5': '2e501e8651d72f05ffe8f5d286ad560b',
+ 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html',
+ 'md5': 'aeda7243115380b2dd5e881fd42d949a',
'info_dict': {
- 'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
+ 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736',
'ext': 'mp4',
- 'title': 'Leonardo - S1E1',
- 'alt_title': 'St 1 Ep 1 - Episodio 1',
- 'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
+ 'title': 'Blanca - S1E1 - Senza occhi',
+ 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi',
+ 'description': 'md5:75f95d5c030ec8bac263b1212322e28c',
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Rai 1',
- 'duration': 3229,
- 'series': 'Leonardo',
+ 'duration': 6493,
+ 'series': 'Blanca',
'season': 'Season 1',
},
}, {
@@ -309,12 +313,14 @@ class RaiPlayIE(RaiBaseIE):
program_info = media.get('program_info') or {}
season = media.get('season')
+ alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ')
+
info = {
'id': remove_start(media.get('id'), 'ContentItem-') or video_id,
'display_id': video_id,
'title': self._live_title(title) if relinker_info.get(
'is_live') else title,
- 'alt_title': strip_or_none(media.get('subtitle')),
+ 'alt_title': strip_or_none(alt_title),
'description': media.get('description'),
'uploader': strip_or_none(media.get('channel')),
'creator': strip_or_none(media.get('editor') or None),
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 2bb449220..d5cbe70ea 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -58,7 +58,149 @@ class SoundcloudEmbedIE(InfoExtractor):
return self.url_result(api_url)
-class SoundcloudIE(InfoExtractor):
+class SoundcloudBaseIE(InfoExtractor):
+ _API_V2_BASE = 'https://api-v2.soundcloud.com/'
+ _BASE_URL = 'https://soundcloud.com/'
+
+ def _store_client_id(self, client_id):
+ self._downloader.cache.store('soundcloud', 'client_id', client_id)
+
+ def _update_client_id(self):
+ webpage = self._download_webpage('https://soundcloud.com/', None)
+ for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
+ script = self._download_webpage(src, None, fatal=False)
+ if script:
+ client_id = self._search_regex(
+ r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
+ script, 'client id', default=None)
+ if client_id:
+ self._CLIENT_ID = client_id
+ self._store_client_id(client_id)
+ return
+ raise ExtractorError('Unable to extract client id')
+
+ def _download_json(self, *args, **kwargs):
+ non_fatal = kwargs.get('fatal') is False
+ if non_fatal:
+ del kwargs['fatal']
+ query = kwargs.get('query', {}).copy()
+ for _ in range(2):
+ query['client_id'] = self._CLIENT_ID
+ kwargs['query'] = query
+ try:
+ return super()._download_json(*args, **compat_kwargs(kwargs))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+ self._store_client_id(None)
+ self._update_client_id()
+ continue
+ elif non_fatal:
+ self.report_warning(error_to_compat_str(e))
+ return False
+ raise
+
+ def _real_initialize(self):
+ self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
+ self._login()
+
+ _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
+ _access_token = None
+ _HEADERS = {}
+ _NETRC_MACHINE = 'soundcloud'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ if username == 'oauth' and password is not None:
+ self._access_token = password
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ payload = {'session': {'access_token': self._access_token}}
+ token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
+ if response is not False:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ self.report_login()
+ else:
+ self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
+ elif username is not None:
+ self.report_warning(
+ 'Login using username and password is not currently supported. '
+ 'Use "--user oauth --password <oauth_token>" to login using an oauth token')
+
+ r'''
+ def genDevId():
+ def genNumBlock():
+ return ''.join([str(random.randrange(10)) for i in range(6)])
+ return '-'.join([genNumBlock() for i in range(4)])
+
+ payload = {
+ 'client_id': self._CLIENT_ID,
+ 'recaptcha_pubkey': 'null',
+ 'recaptcha_response': 'null',
+ 'credentials': {
+ 'identifier': username,
+ 'password': password
+ },
+ 'signature': self.sign(username, password, self._CLIENT_ID),
+ 'device_id': genDevId(),
+ 'user_agent': self._USER_AGENT
+ }
+
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(login, None)
+ self._access_token = response.get('session').get('access_token')
+ if not self._access_token:
+ self.report_warning('Unable to get access token, login may has failed')
+ else:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+ '''
+
+ # signature generation
+ def sign(self, user, pw, clid):
+ a = 33
+ i = 1
+ s = 440123
+ w = 117
+ u = 1800000
+ l = 1042
+ b = 37
+ k = 37
+ c = 5
+ n = '0763ed7314c69015fd4a0dc16bbf4b90' # _KEY
+ y = '8' # _REV
+ r = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' # _USER_AGENT
+ e = user # _USERNAME
+ t = clid # _CLIENT_ID
+
+ d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]])
+ p = n + y + d + r + e + t + d + n
+ h = p
+
+ m = 8011470
+ f = 0
+
+ for f in range(f, len(h)):
+ m = (m >> 1) + ((1 & m) << 23)
+ m += ord(h[f])
+ m &= 16777215
+
+ # c is not even needed
+ out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c)
+
+ return out
+
+ @classmethod
+ def _resolv_url(cls, url):
+ return cls._API_V2_BASE + 'resolve?url=' + url
+
+
+class SoundcloudIE(SoundcloudBaseIE):
"""Information extractor for soundcloud.com
To access the media, the uid of the song and a stream token
must be extracted from the page source and the script must make
@@ -250,8 +392,6 @@ class SoundcloudIE(InfoExtractor):
},
]
- _API_V2_BASE = 'https://api-v2.soundcloud.com/'
- _BASE_URL = 'https://soundcloud.com/'
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
_ARTWORK_MAP = {
@@ -267,143 +407,6 @@ class SoundcloudIE(InfoExtractor):
'original': 0,
}
- def _store_client_id(self, client_id):
- self._downloader.cache.store('soundcloud', 'client_id', client_id)
-
- def _update_client_id(self):
- webpage = self._download_webpage('https://soundcloud.com/', None)
- for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
- script = self._download_webpage(src, None, fatal=False)
- if script:
- client_id = self._search_regex(
- r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
- script, 'client id', default=None)
- if client_id:
- self._CLIENT_ID = client_id
- self._store_client_id(client_id)
- return
- raise ExtractorError('Unable to extract client id')
-
- def _download_json(self, *args, **kwargs):
- non_fatal = kwargs.get('fatal') is False
- if non_fatal:
- del kwargs['fatal']
- query = kwargs.get('query', {}).copy()
- for _ in range(2):
- query['client_id'] = self._CLIENT_ID
- kwargs['query'] = query
- try:
- return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
- self._store_client_id(None)
- self._update_client_id()
- continue
- elif non_fatal:
- self.report_warning(error_to_compat_str(e))
- return False
- raise
-
- def _real_initialize(self):
- self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
- self._login()
-
- _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'
- _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
- _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
- _API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
- _access_token = None
- _HEADERS = {}
- _NETRC_MACHINE = 'soundcloud'
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- if username == 'oauth' and password is not None:
- self._access_token = password
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- payload = {'session': {'access_token': self._access_token}}
- token_verification = sanitized_Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
- if response is not False:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
- self.report_login()
- else:
- self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
- elif username is not None:
- self.report_warning(
- 'Login using username and password is not currently supported. '
- 'Use "--user oauth --password <oauth_token>" to login using an oauth token')
-
- r'''
- def genDevId():
- def genNumBlock():
- return ''.join([str(random.randrange(10)) for i in range(6)])
- return '-'.join([genNumBlock() for i in range(4)])
-
- payload = {
- 'client_id': self._CLIENT_ID,
- 'recaptcha_pubkey': 'null',
- 'recaptcha_response': 'null',
- 'credentials': {
- 'identifier': username,
- 'password': password
- },
- 'signature': self.sign(username, password, self._CLIENT_ID),
- 'device_id': genDevId(),
- 'user_agent': self._USER_AGENT
- }
-
- query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
- login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
- response = self._download_json(login, None)
- self._access_token = response.get('session').get('access_token')
- if not self._access_token:
- self.report_warning('Unable to get access token, login may has failed')
- else:
- self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
- '''
-
- # signature generation
- def sign(self, user, pw, clid):
- a = 33
- i = 1
- s = 440123
- w = 117
- u = 1800000
- l = 1042
- b = 37
- k = 37
- c = 5
- n = '0763ed7314c69015fd4a0dc16bbf4b90' # _KEY
- y = '8' # _REV
- r = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' # _USER_AGENT
- e = user # _USERNAME
- t = clid # _CLIENT_ID
-
- d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]])
- p = n + y + d + r + e + t + d + n
- h = p
-
- m = 8011470
- f = 0
-
- for f in range(f, len(h)):
- m = (m >> 1) + ((1 & m) << 23)
- m += ord(h[f])
- m &= 16777215
-
- # c is not even needed
- out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c)
-
- return out
-
- @classmethod
- def _resolv_url(cls, url):
- return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
-
def _extract_info_dict(self, info, full_title=None, secret_token=None):
track_id = compat_str(info['id'])
title = info['title']
@@ -581,7 +584,7 @@ class SoundcloudIE(InfoExtractor):
return self._extract_info_dict(info, full_title, token)
-class SoundcloudPlaylistBaseIE(SoundcloudIE):
+class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
def _extract_set(self, playlist, token=None):
playlist_id = compat_str(playlist['id'])
tracks = playlist.get('tracks') or []
@@ -654,7 +657,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
return self._extract_set(info, token)
-class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
+class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title):
return {
'_type': 'playlist',
@@ -853,7 +856,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
return self._extract_set(data, token)
-class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
+class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor):
IE_NAME = 'soundcloud:search'
IE_DESC = 'Soundcloud search'
_SEARCH_KEY = 'scsearch'
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 5537d63be..18d531202 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2608,8 +2608,8 @@ class ThrottledDownload(ReExtractInfo):
""" Download speed below --throttled-rate. """
msg = 'The download speed is below throttle limit'
- def __init__(self, msg):
- super().__init__(msg, expected=False)
+ def __init__(self):
+ super().__init__(self.msg, expected=False)
class UnavailableVideoError(YoutubeDLError):