aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube_dl/downloader/__init__.py2
-rw-r--r--youtube_dl/downloader/youtube_live_chat.py88
-rw-r--r--youtube_dl/extractor/youtube.py8
3 files changed, 98 insertions, 0 deletions
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index 2e485df9d..4ae81f516 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -8,6 +8,7 @@ from .rtmp import RtmpFD
from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
+from .youtube_live_chat import YoutubeLiveChatReplayFD
from .external import (
get_external_downloader,
FFmpegFD,
@@ -26,6 +27,7 @@ PROTOCOL_MAP = {
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
+ 'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
}
diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py
new file mode 100644
index 000000000..64d1d20b2
--- /dev/null
+++ b/youtube_dl/downloader/youtube_live_chat.py
@@ -0,0 +1,88 @@
+from __future__ import division, unicode_literals
+
+import re
+import json
+
+from .fragment import FragmentFD
+
+
+class YoutubeLiveChatReplayFD(FragmentFD):
+ """ Downloads YouTube live chat replays fragment by fragment """
+
+ FD_NAME = 'youtube_live_chat_replay'
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+
+ test = self.params.get('test', False)
+
+ ctx = {
+ 'filename': filename,
+ 'live': True,
+ 'total_frags': None,
+ }
+
+ def dl_fragment(url):
+ headers = info_dict.get('http_headers', {})
+ return self._download_fragment(ctx, url, info_dict, headers)
+
+ def parse_yt_initial_data(data):
+ raw_json = re.search(b'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1)
+ return json.loads(raw_json)
+
+ self._prepare_and_start_frag_download(ctx)
+
+ success, raw_fragment = dl_fragment(
+ 'https://www.youtube.com/watch?v={}'.format(video_id))
+ if not success:
+ return False
+ data = parse_yt_initial_data(raw_fragment)
+ continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+
+ first = True
+ offset = None
+ while continuation_id is not None:
+ data = None
+ if first:
+ url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False
+ data = parse_yt_initial_data(raw_fragment)
+ else:
+ url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+ + '?continuation={}'.format(continuation_id)
+ + '&playerOffsetMs={}'.format(offset - 5000)
+ + '&hidden=false'
+ + '&pbj=1')
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False
+ data = json.loads(raw_fragment)['response']
+
+ first = False
+ continuation_id = None
+
+ live_chat_continuation = data['continuationContents']['liveChatContinuation']
+ offset = None
+ processed_fragment = bytearray()
+ if 'actions' in live_chat_continuation:
+ for action in live_chat_continuation['actions']:
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
+
+ self._append_fragment(ctx, processed_fragment)
+
+ if test or offset is None:
+ break
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b35bf03aa..e554702e7 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1462,6 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'ext': ext,
})
sub_lang_list[lang] = sub_formats
+ # TODO check that live chat replay actually exists
+ sub_lang_list['live_chat'] = [
+ {
+ 'video_id': video_id,
+ 'ext': 'json',
+ 'protocol': 'youtube_live_chat_replay',
+ },
+ ]
if not sub_lang_list:
self._downloader.report_warning('video doesn\'t have subtitles')
return {}