aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xyoutube_dl/YoutubeDL.py28
-rw-r--r--youtube_dl/downloader/__init__.py2
-rw-r--r--youtube_dl/downloader/youtube_live_chat.py94
-rw-r--r--youtube_dl/extractor/youtube.py31
4 files changed, 138 insertions, 17 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 19370f62b..0dc869d56 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -1805,6 +1805,14 @@ class YoutubeDL(object):
self.report_error('Cannot write annotations file: ' + annofn)
return
+ def dl(name, info):
+ fd = get_suitable_downloader(info, self.params)(self, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+ return fd.download(name, info)
+
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
@@ -1812,14 +1820,12 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
@@ -1831,11 +1837,11 @@ class YoutubeDL(object):
return
else:
try:
- sub_data = ie._request_webpage(
- sub_info['url'], info_dict['id'], note=False).read()
- with io.open(encodeFilename(sub_filename), 'wb') as subfile:
- subfile.write(sub_data)
- except (ExtractorError, IOError, OSError, ValueError) as err:
+ dl(sub_filename, sub_info)
+ except (ExtractorError, IOError, OSError, ValueError,
+ compat_urllib_error.URLError,
+ compat_http_client.HTTPException,
+ socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
@@ -1856,14 +1862,6 @@ class YoutubeDL(object):
if not self.params.get('skip_download', False):
try:
- def dl(name, info):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- if self.params.get('verbose'):
- self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
-
if info_dict.get('requested_formats') is not None:
downloaded = []
success = True
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 2e485df9d..4ae81f516 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -8,6 +8,7 @@ from .rtmp import RtmpFD
from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
+from .youtube_live_chat import YoutubeLiveChatReplayFD
from .external import (
get_external_downloader,
FFmpegFD,
@@ -26,6 +27,7 @@ PROTOCOL_MAP = {
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
+ 'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
}
diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py
new file mode 100644
index 000000000..4932dd9c5
--- /dev/null
+++ b/youtube_dl/downloader/youtube_live_chat.py
@@ -0,0 +1,94 @@
+from __future__ import division, unicode_literals
+
+import re
+import json
+
+from .fragment import FragmentFD
+
+
+class YoutubeLiveChatReplayFD(FragmentFD):
+ """ Downloads YouTube live chat replays fragment by fragment """
+
+ FD_NAME = 'youtube_live_chat_replay'
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+
+ test = self.params.get('test', False)
+
+ ctx = {
+ 'filename': filename,
+ 'live': True,
+ 'total_frags': None,
+ }
+
+ def dl_fragment(url):
+ headers = info_dict.get('http_headers', {})
+ return self._download_fragment(ctx, url, info_dict, headers)
+
+ def parse_yt_initial_data(data):
+ window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
+ var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
+ for patt in window_patt, var_patt:
+ try:
+ raw_json = re.search(patt, data).group(1)
+ return json.loads(raw_json)
+ except AttributeError:
+ continue
+
+ self._prepare_and_start_frag_download(ctx)
+
+ success, raw_fragment = dl_fragment(
+ 'https://www.youtube.com/watch?v={}'.format(video_id))
+ if not success:
+ return False
+ data = parse_yt_initial_data(raw_fragment)
+ continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+
+ first = True
+ offset = None
+ while continuation_id is not None:
+ data = None
+ if first:
+ url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False
+ data = parse_yt_initial_data(raw_fragment)
+ else:
+ url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+ + '?continuation={}'.format(continuation_id)
+ + '&playerOffsetMs={}'.format(offset - 5000)
+ + '&hidden=false'
+ + '&pbj=1')
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False
+ data = json.loads(raw_fragment)['response']
+
+ first = False
+ continuation_id = None
+
+ live_chat_continuation = data['continuationContents']['liveChatContinuation']
+ offset = None
+ processed_fragment = bytearray()
+ if 'actions' in live_chat_continuation:
+ for action in live_chat_continuation['actions']:
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
+
+ self._append_fragment(ctx, processed_fragment)
+
+ if test or offset is None:
+ break
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 03b726942..6e0bb6a12 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1435,7 +1435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError(
'Signature extraction failed: ' + tb, cause=e)
- def _get_subtitles(self, video_id, webpage):
+ def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
try:
subs_doc = self._download_xml(
'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -1462,6 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': ext,
})
sub_lang_list[lang] = sub_formats
+ if has_live_chat_replay:
+ sub_lang_list['live_chat'] = [
+ {
+ 'video_id': video_id,
+ 'ext': 'json',
+ 'protocol': 'youtube_live_chat_replay',
+ },
+ ]
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}
@@ -1485,6 +1493,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
+ def _get_yt_initial_data(self, video_id, webpage):
+ config = self._search_regex(
+ (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+ r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
+ webpage, 'ytInitialData', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
@@ -1978,6 +1995,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if is_live is None:
is_live = bool_or_none(video_details.get('isLive'))
+ has_live_chat_replay = False
+ if not is_live:
+ yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
+ try:
+ yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ has_live_chat_replay = True
+ except (KeyError, IndexError, TypeError):
+ pass
+
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
@@ -2385,7 +2411,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
# subtitles
- video_subtitles = self.extract_subtitles(video_id, video_webpage)
+ video_subtitles = self.extract_subtitles(
+ video_id, video_webpage, has_live_chat_replay)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
video_duration = try_get(