diff options
Diffstat (limited to 'youtube_dl')
-rwxr-xr-x | youtube_dl/YoutubeDL.py | 28 | ||||
-rw-r--r-- | youtube_dl/downloader/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/downloader/youtube_live_chat.py | 94 | ||||
-rw-r--r-- | youtube_dl/extractor/biqle.py | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/doodstream.py | 71 | ||||
-rw-r--r-- | youtube_dl/extractor/extractors.py | 7 | ||||
-rw-r--r-- | youtube_dl/extractor/francetv.py | 12 | ||||
-rw-r--r-- | youtube_dl/extractor/hrfensehen.py | 102 | ||||
-rw-r--r-- | youtube_dl/extractor/soundcloud.py | 97 | ||||
-rw-r--r-- | youtube_dl/extractor/storyfire.py | 255 | ||||
-rw-r--r-- | youtube_dl/extractor/twitch.py | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/videa.py | 62 | ||||
-rw-r--r-- | youtube_dl/extractor/viki.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/xhamster.py | 17 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 47 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
16 files changed, 773 insertions, 67 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 19370f62b..0dc869d56 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1805,6 +1805,14 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return + def dl(name, info): + fd = get_suitable_downloader(info, self.params)(self, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) + return fd.download(name, info) + subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -1812,14 +1820,12 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters @@ -1831,11 +1837,11 @@ class YoutubeDL(object): return else: try: - sub_data = ie._request_webpage( - sub_info['url'], info_dict['id'], note=False).read() - with io.open(encodeFilename(sub_filename), 'wb') as subfile: - subfile.write(sub_data) - except (ExtractorError, IOError, OSError, ValueError) as err: + dl(sub_filename, sub_info) + except (ExtractorError, IOError, OSError, ValueError, + compat_urllib_error.URLError, + compat_http_client.HTTPException, + socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) continue @@ -1856,14 +1862,6 @@ class YoutubeDL(object): if not self.params.get('skip_download', False): try: - def dl(name, info): - fd = get_suitable_downloader(info, self.params)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) - if info_dict.get('requested_formats') is not None: downloaded = [] success = True diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 2e485df9d..4ae81f516 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .rtmp import RtmpFD from .dash import DashSegmentsFD from .rtsp import RtspFD from .ism import IsmFD +from .youtube_live_chat import YoutubeLiveChatReplayFD from .external import ( get_external_downloader, FFmpegFD, @@ -26,6 +27,7 @@ PROTOCOL_MAP = { 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'ism': IsmFD, + 'youtube_live_chat_replay': YoutubeLiveChatReplayFD, } diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py new file mode 100644 index 000000000..4932dd9c5 --- /dev/null +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -0,0 +1,94 @@ +from __future__ import division, unicode_literals + +import re +import json + +from .fragment import FragmentFD + + +class YoutubeLiveChatReplayFD(FragmentFD): + """ Downloads YouTube live chat replays fragment by fragment """ + + FD_NAME = 'youtube_live_chat_replay' + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + def dl_fragment(url): + headers = info_dict.get('http_headers', {}) + return self._download_fragment(ctx, url, info_dict, headers) + + def parse_yt_initial_data(data): + window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});' + var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});' + for patt in window_patt, var_patt: + try: + raw_json = re.search(patt, data).group(1) + return json.loads(raw_json) + except AttributeError: + continue + + self._prepare_and_start_frag_download(ctx) + + success, raw_fragment = dl_fragment( + 'https://www.youtube.com/watch?v={}'.format(video_id)) + if not success: + return False + data = parse_yt_initial_data(raw_fragment) + continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + first = True + offset = None + while continuation_id is not None: + data = None + if first: + url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id) + success, raw_fragment = dl_fragment(url) + if not success: + return False + data = parse_yt_initial_data(raw_fragment) + else: + url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay' + + '?continuation={}'.format(continuation_id) + + '&playerOffsetMs={}'.format(offset - 5000) + + '&hidden=false' + + '&pbj=1') + success, raw_fragment = dl_fragment(url) + if not success: + return False + data = json.loads(raw_fragment)['response'] + + first = False + continuation_id = None + + live_chat_continuation = data['continuationContents']['liveChatContinuation'] + offset = None + processed_fragment = bytearray() + if 'actions' in live_chat_continuation: + for action in live_chat_continuation['actions']: + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] + + self._append_fragment(ctx, processed_fragment) + + if test or offset is None: + break + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index af21e3ee5..17ebbb257 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -3,10 +3,11 @@ from __future__ import unicode_literals from .common import InfoExtractor from .vk import VKIE -from ..utils import ( - HEADRequest, - int_or_none, +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, ) +from ..utils import int_or_none class BIQLEIE(InfoExtractor): @@ -47,9 +48,16 @@ class BIQLEIE(InfoExtractor): if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) - self._request_webpage( - HEADRequest(embed_url), video_id, headers={'Referer': url}) - video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A') + embed_page = self._download_webpage( + embed_url, video_id, headers={'Referer': url}) + video_ext = self._get_cookies(embed_url).get('video_ext') + if video_ext: + video_ext = compat_urllib_parse_unquote(video_ext.value) + if not video_ext: + video_ext = compat_b64decode(self._search_regex( + r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', + embed_page, 'video_ext')).decode() + video_id, sig, _, access_token = video_ext.split(':') item = self._download_json( 'https://api.vk.com/method/video.get', video_id, headers={'User-Agent': 'okhttp/3.4.1'}, query={ diff --git a/youtube_dl/extractor/doodstream.py b/youtube_dl/extractor/doodstream.py new file mode 100644 index 000000000..2c9ea6898 --- /dev/null +++ b/youtube_dl/extractor/doodstream.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import string +import random +import time + +from .common import InfoExtractor + + +class DoodStreamIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)' + _TESTS = [{ + 'url': 'http://dood.to/e/5s1wmbdacezb', + 'md5': '4568b83b31e13242b3f1ff96c55f0595', + 'info_dict': { + 'id': '5s1wmbdacezb', + 'ext': 'mp4', + 'title': 'Kat Wonders - Monthly May 2020', + 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', + } + }, { + 'url': 'https://dood.to/d/jzrxn12t2s7n', + 'md5': '3207e199426eca7c2aa23c2872e6728a', + 'info_dict': { + 'id': 'jzrxn12t2s7n', + 'ext': 'mp4', + 'title': 'Stacy Cruz Cute ALLWAYSWELL', + 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com', + 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + if '/d/' in url: + url = "https://dood.to" + self._html_search_regex( + r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_meta(['og:title', 'twitter:title'], + webpage, default=None) + thumb = self._html_search_meta(['og:image', 'twitter:image'], + webpage, default=None) + token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token') + description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, default=None) + auth_url = 'https://dood.to' + self._html_search_regex( + r'(/pass_md5.*?)\'', webpage, 'pass_md5') + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', + 'referer': url + } + + webpage = self._download_webpage(auth_url, video_id, headers=headers) + final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000)) + + return { + 'id': video_id, + 'title': title, + 'url': final_url, + 'http_headers': headers, + 'ext': 'mp4', + 'description': description, + 'thumbnail': thumb, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..e213b1bea 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -293,6 +293,7 @@ from .discoverynetworks import DiscoveryNetworksDeIE from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE +from .doodstream import DoodStreamIE from .dropbox import DropboxIE from .dw import ( DWIE, @@ -440,6 +441,7 @@ from .hotstar import ( ) from .howcast import HowcastIE from .howstuffworks import HowStuffWorksIE +from .hrfensehen import HRFernsehenIE from .hrti import ( HRTiIE, HRTiPlaylistIE, @@ -1057,6 +1059,11 @@ from .spike import ( BellatorIE, ParamountNetworkIE, ) +from .storyfire import ( + StoryFireIE, + StoryFireUserIE, + StoryFireSeriesIE, +) from .stitcher import StitcherIE from .sport5 import Sport5IE from .sportbox import SportBoxIE diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 81b468c7d..e340cddba 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -316,13 +316,14 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)' _TESTS = [{ - 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', + 'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html', 'info_dict': { - 'id': '84981923', + 'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793', 'ext': 'mp4', 'title': 'Soir 3', - 'upload_date': '20130826', - 'timestamp': 1377548400, + 'upload_date': '20190822', + 'timestamp': 1566510900, + 'description': 'md5:72d167097237701d6e8452ff03b83c00', 'subtitles': { 'fr': 'mincount:2', }, @@ -374,7 +375,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): video_id = self._search_regex( (r'player\.load[^;]+src:\s*["\']([^"\']+)', r'id-video=([^@]+@[^"]+)', - r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'), + r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', + r'data-id="([^"]+)"'), webpage, 'video id') return self._make_url_result(video_id) diff --git a/youtube_dl/extractor/hrfensehen.py b/youtube_dl/extractor/hrfensehen.py new file mode 100644 index 000000000..2beadef2c --- /dev/null +++ b/youtube_dl/extractor/hrfensehen.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from youtube_dl.utils import int_or_none, unified_timestamp, unescapeHTML +from .common import InfoExtractor + + +class HRFernsehenIE(InfoExtractor): + IE_NAME = 'hrfernsehen' + _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html' + + _TESTS = [{ + 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html', + 'md5': '5c4e0ba94677c516a2f65a84110fc536', + 'info_dict': { + 'id': '130546', + 'ext': 'mp4', + 'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / ' + 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / ' + 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music', + 'subtitles': {'de': [{ + 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt' + }]}, + 'timestamp': 1598470200, + 'upload_date': '20200826', + 'thumbnails': [{ + 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg', + 'id': '0' + }, { + 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg', + 'id': '1' + }], + 'title': 'hessenschau vom 26.08.2020' + } + }, { + 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html', + 'only_matching': True + }] + + _GEO_COUNTRIES = ['DE'] + + def extract_airdate(self, loader_data): + airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate') + + if airdate_str is None: + return None + + return unified_timestamp(airdate_str) + + def extract_formats(self, loader_data): + stream_formats = [] + for stream_obj in loader_data["videoResolutionLevels"]: + stream_format = { + 'format_id': str(stream_obj['verticalResolution']) + "p", + 'height': stream_obj['verticalResolution'], + 'url': stream_obj['url'], + } + + quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit', + stream_obj['url']) + if quality_information: + stream_format['width'] = int_or_none(quality_information.group(1)) + stream_format['height'] = int_or_none(quality_information.group(2)) + stream_format['fps'] = int_or_none(quality_information.group(3)) + stream_format['tbr'] = int_or_none(quality_information.group(4)) + + stream_formats.append(stream_format) + + self._sort_formats(stream_formats) + return stream_formats + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_meta( + ['og:title', 'twitter:title', 'name'], webpage) + description = self._html_search_meta( + ['description'], webpage) + + loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader")) + loader_data = json.loads(loader_str) + + info = { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': self.extract_formats(loader_data), + 'timestamp': self.extract_airdate(loader_data) + } + + if "subtitle" in loader_data: + info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]} + + thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()])) + if len(thumbnails) > 0: + info["thumbnails"] = [{"url": t} for t in thumbnails] + + return info diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index d37c52543..ac09cb5e6 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import itertools import re +import json +import random from .common import ( InfoExtractor, @@ -28,6 +30,7 @@ from ..utils import ( update_url_query, url_or_none, urlhandle_detect_ext, + sanitized_Request, ) @@ -309,7 +312,81 @@ class SoundcloudIE(InfoExtractor): raise def _real_initialize(self): - self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' + self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or "T5R4kgWS2PRf6lzLyIravUMnKlbIxQag" # 'EXLwg5lHTO2dslU5EePe3xkw0m1h86Cd' # 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' + self._login() + + _USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" + _API_AUTH_QUERY_TEMPLATE = '?client_id=%s' + _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s' + _access_token = None + _HEADERS = {} + _NETRC_MACHINE = 'soundcloud' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + def genDevId(): + def genNumBlock(): + return ''.join([str(random.randrange(10)) for i in range(6)]) + return '-'.join([genNumBlock() for i in range(4)]) + + payload = { + 'client_id': self._CLIENT_ID, + 'recaptcha_pubkey': 'null', + 'recaptcha_response': 'null', + 'credentials': { + 'identifier': username, + 'password': password + }, + 'signature': self.sign(username, password, self._CLIENT_ID), + 'device_id': genDevId(), + 'user_agent': self._USER_AGENT + } + + query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID + login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8')) + response = self._download_json(login, None) + self._access_token = response.get('session').get('access_token') + if not self._access_token: + self.report_warning('Unable to get access token, login may has failed') + else: + self._HEADERS = {'Authorization': 'OAuth ' + self._access_token} + + # signature generation + def sign(self, user, pw, clid): + a = 33 + i = 1 + s = 440123 + w = 117 + u = 1800000 + l = 1042 + b = 37 + k = 37 + c = 5 + n = "0763ed7314c69015fd4a0dc16bbf4b90" # _KEY + y = "8" # _REV + r = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" # _USER_AGENT + e = user # _USERNAME + t = clid # _CLIENT_ID + + d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]]) + p = n + y + d + r + e + t + d + n + h = p + + m = 8011470 + f = 0 + + for f in range(f, len(h)): + m = (m >> 1) + ((1 & m) << 23) + m += ord(h[f]) + m &= 16777215 + + # c is not even needed + out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c) + + return out @classmethod def _resolv_url(cls, url): @@ -389,7 +466,7 @@ class SoundcloudIE(InfoExtractor): if not format_url: continue stream = self._download_json( - format_url, track_id, query=query, fatal=False) + format_url, track_id, query=query, fatal=False, headers=self._HEADERS) if not isinstance(stream, dict): continue stream_url = url_or_none(stream.get('url')) @@ -487,7 +564,7 @@ class SoundcloudIE(InfoExtractor): info_json_url = self._resolv_url(self._BASE_URL + resolve_title) info = self._download_json( - info_json_url, full_title, 'Downloading info JSON', query=query) + info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) return self._extract_info_dict(info, full_title, token) @@ -503,7 +580,7 @@ class SoundcloudPlaylistBaseIE(SoundcloudIE): 'ids': ','.join([compat_str(t['id']) for t in tracks]), 'playlistId': playlist_id, 'playlistSecretToken': token, - }) + }, headers=self._HEADERS) entries = [] for track in tracks: track_id = str_or_none(track.get('id')) @@ -547,7 +624,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): full_title += '/' + token info = self._download_json(self._resolv_url( - self._BASE_URL + full_title), full_title) + self._BASE_URL + full_title), full_title, headers=self._HEADERS) if 'errors' in info: msgs = (compat_str(err['error_message']) for err in info['errors']) @@ -572,7 +649,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): for i in itertools.count(): response = self._download_json( next_href, playlist_id, - 'Downloading track page %s' % (i + 1), query=query) + 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS) collection = response['collection'] @@ -694,7 +771,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): user = self._download_json( self._resolv_url(self._BASE_URL + uploader), - uploader, 'Downloading user info') + uploader, 'Downloading user info', headers=self._HEADERS) resource = mobj.group('rsrc') or 'all' @@ -719,7 +796,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): def _real_extract(self, url): track_name = self._match_id(url) - track = self._download_json(self._resolv_url(url), track_name) + track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS) track_id = self._search_regex( r'soundcloud:track-stations:(\d+)', track['id'], 'track id') @@ -752,7 +829,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): data = self._download_json( self._API_V2_BASE + 'playlists/' + playlist_id, - playlist_id, 'Downloading playlist', query=query) + playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS) return self._extract_set(data, token) @@ -789,7 +866,7 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): for i in itertools.count(1): response = self._download_json( next_url, collection_id, 'Downloading page {0}'.format(i), - 'Unable to download API page') + 'Unable to download API page', headers=self._HEADERS) collection = response.get('collection', []) if not collection: diff --git a/youtube_dl/extractor/storyfire.py b/youtube_dl/extractor/storyfire.py new file mode 100644 index 000000000..67457cc94 --- /dev/null +++ b/youtube_dl/extractor/storyfire.py @@ -0,0 +1,255 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +from .common import InfoExtractor + + +class StoryFireIE(InfoExtractor): + _VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)' + _TESTS = [{ + 'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181', + 'md5': '560953bfca81a69003cfa5e53ac8a920', + 'info_dict': { + 'id': '5df1d132b6378700117f9181', + 'ext': 'mp4', + 'title': 'Buzzfeed Teaches You About Memes', + 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', + 'timestamp': 1576129028, + 'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies', + 'uploader': 'whang!', + 'upload_date': '20191212', + }, + 'params': {'format': 'bestvideo'} # There are no merged formats in the playlist. + }, { + 'url': 'https://storyfire.app.link/5GxAvWOQr8', # Alternate URL format, with unrelated short ID + 'md5': '7a2dc6d60c4889edfed459c620fe690d', + 'info_dict': { + 'id': '5f1e11ecd78a57b6c702001d', + 'ext': 'm4a', + 'title': 'Weird Nintendo Prototype Leaks', + 'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis', + 'timestamp': 1595808576, + 'upload_date': '20200727', + 'uploader': 'whang!', + 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1', + }, + 'params': {'format': 'bestaudio'} # Verifying audio extraction + + }] + + _aformats = { + 'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10}, + 'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1}, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # Extracting the json blob is mandatory to proceed with extraction. + jsontext = self._html_search_regex( + r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>', + webpage, 'json_data') + + json = self._parse_json(jsontext, video_id) + + # The currentVideo field in the json is mandatory + # because it contains the only link to the m3u playlist + video = json['props']['initialState']['video']['currentVideo'] + videourl = video['vimeoVideoURL'] # Video URL is mandatory + + # Extract other fields from the json in an error tolerant fashion + # ID may be incorrect (on short URL format), correct it. + parsed_id = video.get('_id') + if parsed_id: + video_id = parsed_id + + title = video.get('title') + description = video.get('description') + + thumbnail = video.get('storyImage') + views = video.get('views') + likes = video.get('likesCount') + comments = video.get('commentsCount') + duration = video.get('videoDuration') + publishdate = video.get('publishDate') # Apparently epoch time, day only + + uploader = video.get('username') + uploader_id = video.get('hostID') + # Construct an uploader URL + uploader_url = None + if uploader_id: + uploader_url = "https://storyfire.com/user/%s/video" % uploader_id + + # Collect root playlist to determine formats + formats = self._extract_m3u8_formats( + videourl, video_id, 'mp4', 'm3u8_native') + + # Modify formats to fill in missing information about audio codecs + for format in formats: + aformat = self._aformats.get(format['format_id']) + if aformat: + format['acodec'] = aformat['acodec'] + format['abr'] = aformat['abr'] + format['preference'] = aformat['preference'] + format['ext'] = 'm4a' + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'ext': "mp4", + 'url': videourl, + 'formats': formats, + + 'thumbnail': thumbnail, + 'view_count': views, + 'like_count': likes, + 'comment_count': comments, + 'duration': duration, + 'timestamp': publishdate, + + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, + + } + + +class StoryFireUserIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video' + _TESTS = [{ + 'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video', + 'info_dict': { + 'id': 'ntZAJFECERSgqHSxzonV5K2E89s1', + 'title': 'whang!', + }, + 'playlist_mincount': 18 + }, { + 'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video', + 'info_dict': { + 'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2', + 'title': 'McJuggerNuggets', + }, + 'playlist_mincount': 143 + + }] + + # Generator for fetching playlist items + def _enum_videos(self, baseurl, user_id, firstjson): + totalVideos = int(firstjson['videosCount']) + haveVideos = 0 + json = firstjson + + for page in itertools.count(1): + for video in json['videos']: + id = video['_id'] + url = "https://storyfire.com/video-details/%s" % id + haveVideos += 1 + yield { + '_type': 'url', + 'id': id, + 'url': url, + 'ie_key': 'StoryFire', + + 'title': video.get('title'), + 'description': video.get('description'), + 'view_count': video.get('views'), + 'comment_count': video.get('commentsCount'), + 'duration': video.get('videoDuration'), + 'timestamp': video.get('publishDate'), + } + # Are there more pages we could fetch? + if haveVideos < totalVideos: + pageurl = baseurl + ("%i" % haveVideos) + json = self._download_json(pageurl, user_id, + note='Downloading page %s' % page) + + # Are there any videos in the new json? + videos = json.get('videos') + if not videos or len(videos) == 0: + break # no videos + + else: + break # We have fetched all the videos, stop + + def _real_extract(self, url): + user_id = self._match_id(url) + + baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id + + # Download first page to ensure it can be downloaded, and get user information if available. + firstpage = baseurl + "0" + firstjson = self._download_json(firstpage, user_id) + + title = None + videos = firstjson.get('videos') + if videos and len(videos): + title = videos[1].get('username') + + return { + '_type': 'playlist', + 'entries': self._enum_videos(baseurl, user_id, firstjson), + 'id': user_id, + 'title': title, + } + + +class StoryFireSeriesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)' + _TESTS = [{ + 'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/', + 'info_dict': { + 'id': '-Lq6MsuIHLODO6d2dDkr', + }, + 'playlist_mincount': 13 + }, { + 'url': 'https://storyfire.com/write/series/stories/the_mortal_one/', + 'info_dict': { + 'id': 'the_mortal_one', + }, + 'playlist_count': 0 # This playlist has entries, but no videos. + }, { + 'url': 'https://storyfire.com/write/series/stories/story_time', + 'info_dict': { + 'id': 'story_time', + }, + 'playlist_mincount': 10 + }] + + # Generator for returning playlist items + # This object is substantially different than the one in the user videos page above + def _enum_videos(self, jsonlist): + for video in jsonlist: + id = video['_id'] + if video.get('hasVideo'): # Boolean element + url = "https://storyfire.com/video-details/%s" % id + yield { + '_type': 'url', + 'id': id, + 'url': url, + 'ie_key': 'StoryFire', + + 'title': video.get('title'), + 'description': video.get('description'), + 'view_count': video.get('views'), + 'likes_count': video.get('likesCount'), + 'comment_count': video.get('commentsCount'), + 'duration': video.get('videoDuration'), + 'timestamp': video.get('publishDate'), + } + + def _real_extract(self, url): + list_id = self._match_id(url) + + listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id + json = self._download_json(listurl, list_id) + + return { + '_type': 'playlist', + 'entries': self._enum_videos(json), + 'id': list_id + } diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index e211cd4c8..3f0f7e277 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -380,11 +380,13 @@ class TwitchPlaylistBaseIE(TwitchBaseIE): _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d' _PAGE_LIMIT = 100 - def _extract_playlist(self, channel_id): + def _extract_playlist(self, channel_name): info = self._call_api( - 'kraken/channels/%s' % channel_id, - channel_id, 'Downloading channel info JSON') - channel_name = info.get('display_name') or info.get('name') + 'kraken/users?login=%s' % channel_name, + channel_name, 'Downloading channel info JSON') + info = info['users'][0] + channel_id = info['_id'] + channel_name = info.get('display_name') or info.get('name') or channel_name entries = [] offset = 0 limit = self._PAGE_LIMIT @@ -444,7 +446,7 @@ class TwitchProfileIE(TwitchPlaylistBaseIE): _TESTS = [{ 'url': 'http://www.twitch.tv/vanillatv/profile', 'info_dict': { - 'id': 'vanillatv', + 'id': '22744919', 'title': 'VanillaTV', }, 'playlist_mincount': 412, @@ -468,7 +470,7 @@ class TwitchAllVideosIE(TwitchVideosBaseIE): _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/all', 'info_dict': { - 'id': 'spamfish', + 'id': '497952', 'title': 'Spamfish', }, 'playlist_mincount': 869, @@ -487,7 +489,7 @@ class TwitchUploadsIE(TwitchVideosBaseIE): _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/uploads', 'info_dict': { - 'id': 'spamfish', + 'id': '497952', 'title': 'Spamfish', }, 'playlist_mincount': 0, @@ -506,7 +508,7 @@ class TwitchPastBroadcastsIE(TwitchVideosBaseIE): _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts', 'info_dict': { - 'id': 'spamfish', + 'id': '497952', 'title': 'Spamfish', }, 'playlist_mincount': 0, @@ -525,7 +527,7 @@ class TwitchHighlightsIE(TwitchVideosBaseIE): _TESTS = [{ 'url': 'https://www.twitch.tv/spamfish/videos/highlights', 'info_dict': { - 'id': 'spamfish', + 'id': '497952', 'title': 'Spamfish', }, 'playlist_mincount': 805, diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index d0e34c819..a03614cc1 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -2,15 +2,24 @@ from __future__ import unicode_literals import re +import random +import string +import struct from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, mimetype2ext, parse_codecs, xpath_element, xpath_text, ) +from ..compat import ( + compat_b64decode, + compat_ord, + compat_parse_qs, +) class VideaIE(InfoExtractor): @@ -60,15 +69,63 @@ class VideaIE(InfoExtractor): r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', webpage)] + def rc4(self, ciphertext, key): + res = b'' + + keyLen = len(key) + S = list(range(256)) + + j = 0 + for i in range(256): + j = (j + S[i] + ord(key[i % keyLen])) % 256 + S[i], S[j] = S[j], S[i] + + i = 0 + j = 0 + for m in range(len(ciphertext)): + i = (i + 1) % 256 + j = (j + S[i]) % 256 + S[i], S[j] = S[j], S[i] + k = S[(S[i] + S[j]) % 256] + res += struct.pack("B", k ^ compat_ord(ciphertext[m])) + + return res + def _real_extract(self, url): video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id, fatal=True) + error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params') + video_src_params = compat_parse_qs(video_src_params_raw) + player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True) + nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') + random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8)) + static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' + l = nonce[:32] + s = nonce[32:] + result = '' + for i in range(0, 32): + result += s[i - (static_secret.index(l[i]) - 31)] - info = self._download_xml( + video_src_params['_s'] = random_seed + video_src_params['_t'] = result[:16] + encryption_key_stem = result[16:] + random_seed + + [b64_info, handle] = self._download_webpage_handle( 'http://videa.hu/videaplayer_get_xml.php', video_id, - query={'v': video_id}) + query=video_src_params, fatal=True) + + encrypted_info = compat_b64decode(b64_info) + key = encryption_key_stem + handle.info()['x-videa-xs'] + info_str = self.rc4(encrypted_info, key).decode('utf8') + info = self._parse_xml(info_str, video_id) video = xpath_element(info, './/video', 'video', fatal=True) sources = xpath_element(info, './/video_sources', 'sources', fatal=True) + hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True) title = xpath_text(video, './title', fatal=True) @@ -77,6 +134,7 @@ class VideaIE(InfoExtractor): source_url = source.text if not source_url: continue + source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp')) f = parse_codecs(source.get('codecs')) f.update({ 'url': source_url, diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index b0dcdc0e6..9e4171237 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -56,14 +56,14 @@ class VikiBaseIE(InfoExtractor): def _call_api(self, path, video_id, note, timestamp=None, post_data=None): resp = self._download_json( - self._prepare_call(path, timestamp, post_data), video_id, note) + self._prepare_call(path, timestamp, post_data), video_id, note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404]) error = resp.get('error') if error: if error == 'invalid timestamp': resp = self._download_json( self._prepare_call(path, int(resp['current_timestamp']), post_data), - video_id, '%s (retry)' % note) + video_id, '%s (retry)' % note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404]) error = resp.get('error') if error: self._raise_error(resp['error']) diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py index 0f7be6a7d..902a3ed33 100644 --- a/youtube_dl/extractor/xhamster.py +++ b/youtube_dl/extractor/xhamster.py @@ -20,13 +20,13 @@ from ..utils import ( class XHamsterIE(InfoExtractor): - _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)' + _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)' _VALID_URL = r'''(?x) https?:// (?:.+?\.)?%s/ (?: - movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html| - videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+) + movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html| + videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+) ) ''' % _DOMAINS _TESTS = [{ @@ -100,11 +100,20 @@ class XHamsterIE(InfoExtractor): 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', 'only_matching': True, }, { + 'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, + }, { + 'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445', + 'only_matching': True, + }, { 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 'only_matching': True, }, { 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 'only_matching': True, + }, { + 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx', + 'only_matching': True, }] def _real_extract(self, url): @@ -129,7 +138,7 @@ class XHamsterIE(InfoExtractor): initials = self._parse_json( self._search_regex( - r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials', + r'window\.initials\s*=\s*({.+?})\s*;', webpage, 'initials', default='{}'), video_id, fatal=False) if initials: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 685b0cd64..70a5bd3b0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1384,7 +1384,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): funcname = self._search_regex( (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', - r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # Obsolete patterns r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', @@ -1435,7 +1435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'Signature extraction failed: ' + tb, cause=e) - def _get_subtitles(self, video_id, webpage): + def _get_subtitles(self, video_id, webpage, has_live_chat_replay): try: subs_doc = self._download_xml( 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, @@ -1462,6 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats + if has_live_chat_replay: + sub_lang_list['live_chat'] = [ + { + 'video_id': video_id, + 'ext': 'json', + 'protocol': 'youtube_live_chat_replay', + }, + ] if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {} @@ -1485,6 +1493,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) + def _get_yt_initial_data(self, video_id, webpage): + config = self._search_regex( + (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});', + r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'), + webpage, 'ytInitialData', default=None) + if config: + return self._parse_json( + uppercase_escape(config), video_id, fatal=False) + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -1661,21 +1678,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_chapters_from_json(self, webpage, video_id, duration): if not webpage: return - player = self._parse_json( + initial_data = self._parse_json( self._search_regex( - r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage, + r'window\["ytInitialData"\] = (.+);\n', webpage, 'player args', default='{}'), video_id, fatal=False) - if not player or not isinstance(player, dict): - return - watch_next_response = player.get('watch_next_response') - if not isinstance(watch_next_response, compat_str): - return - response = self._parse_json(watch_next_response, video_id, fatal=False) - if not response or not isinstance(response, dict): + if not initial_data or not isinstance(initial_data, dict): return chapters_list = try_get( - response, + initial_data, lambda x: x['playerOverlays'] ['playerOverlayRenderer'] ['decoratedPlayerBarRenderer'] @@ -1984,6 +1995,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if is_live is None: is_live = bool_or_none(video_details.get('isLive')) + has_live_chat_replay = False + if not is_live: + yt_initial_data = self._get_yt_initial_data(video_id, video_webpage) + try: + yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + has_live_chat_replay = True + except (KeyError, IndexError, TypeError): + pass + # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) @@ -2391,7 +2411,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0]))) # subtitles - video_subtitles = self.extract_subtitles(video_id, video_webpage) + video_subtitles = self.extract_subtitles( + video_id, video_webpage, has_live_chat_replay) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) video_duration = try_get( diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6b88eb38c..b50bd2b3b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.06.16.1' +__version__ = '2020.08.31' |