aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsiikamiika <siikamiika@users.noreply.github.com>2021-02-15 11:57:21 +0200
committerGitHub <noreply@github.com>2021-02-15 15:27:21 +0530
commit273762c8d045ace16143a6614c8d258f02a8094b (patch)
treebb4422037784a4df69754baea8db036d1cfe44a2
parent7620cd46c3ac6c265be3730925281f77a801c89c (diff)
downloadhypervideo-pre-273762c8d045ace16143a6614c8d258f02a8094b.tar.lz
hypervideo-pre-273762c8d045ace16143a6614c8d258f02a8094b.tar.xz
hypervideo-pre-273762c8d045ace16143a6614c8d258f02a8094b.zip
#86 [youtube_live_chat] Use POST API (Closes #82)
YouTube has removed support for the old GET based live chat API, and it's now returning 404 Authored by siikamiika
-rw-r--r--youtube_dlc/downloader/fragment.py3
-rw-r--r--youtube_dlc/downloader/http.py5
-rw-r--r--youtube_dlc/downloader/youtube_live_chat.py75
3 files changed, 45 insertions, 38 deletions
diff --git a/youtube_dlc/downloader/fragment.py b/youtube_dlc/downloader/fragment.py
index f4104c713..5bc7f50f6 100644
--- a/youtube_dlc/downloader/fragment.py
+++ b/youtube_dlc/downloader/fragment.py
@@ -95,11 +95,12 @@ class FragmentFD(FileDownloader):
frag_index_stream.write(json.dumps({'downloader': downloader}))
frag_index_stream.close()
- def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
+ 'request_data': request_data,
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
diff --git a/youtube_dlc/downloader/http.py b/youtube_dlc/downloader/http.py
index d8ac41dcc..bf77f4427 100644
--- a/youtube_dlc/downloader/http.py
+++ b/youtube_dlc/downloader/http.py
@@ -27,6 +27,7 @@ from ..utils import (
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
+ request_data = info_dict.get('request_data', None)
class DownloadContext(dict):
__getattr__ = dict.get
@@ -101,7 +102,7 @@ class HttpFD(FileDownloader):
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
- request = sanitized_Request(url, None, headers)
+ request = sanitized_Request(url, request_data, headers)
if has_range:
set_range(request, range_start, range_end)
# Establish connection
@@ -152,7 +153,7 @@ class HttpFD(FileDownloader):
try:
# Open the connection again without the range header
ctx.data = self.ydl.urlopen(
- sanitized_Request(url, None, headers))
+ sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
diff --git a/youtube_dlc/downloader/youtube_live_chat.py b/youtube_dlc/downloader/youtube_live_chat.py
index 5ac24c020..8e173d8b5 100644
--- a/youtube_dlc/downloader/youtube_live_chat.py
+++ b/youtube_dlc/downloader/youtube_live_chat.py
@@ -1,11 +1,13 @@
from __future__ import division, unicode_literals
-import re
import json
from .fragment import FragmentFD
from ..compat import compat_urllib_error
-from ..utils import try_get
+from ..utils import (
+ try_get,
+ RegexNotFoundError,
+)
from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
@@ -27,40 +29,28 @@ class YoutubeLiveChatReplayFD(FragmentFD):
'total_frags': None,
}
- def dl_fragment(url):
- headers = info_dict.get('http_headers', {})
- return self._download_fragment(ctx, url, info_dict, headers)
+ ie = YT_BaseIE(self.ydl)
- def parse_yt_initial_data(data):
- patterns = (
- r'%s\\s*%s' % (YT_BaseIE._YT_INITIAL_DATA_RE, YT_BaseIE._YT_INITIAL_BOUNDARY_RE),
- r'%s' % YT_BaseIE._YT_INITIAL_DATA_RE)
- data = data.decode('utf-8', 'replace')
- for patt in patterns:
- try:
- raw_json = re.search(patt, data).group(1)
- return json.loads(raw_json)
- except AttributeError:
- continue
+ def dl_fragment(url, data=None, headers=None):
+ http_headers = info_dict.get('http_headers', {})
+ if headers:
+ http_headers = http_headers.copy()
+ http_headers.update(headers)
+ return self._download_fragment(ctx, url, info_dict, http_headers, data)
- def download_and_parse_fragment(url, frag_index):
+ def download_and_parse_fragment(url, frag_index, request_data):
count = 0
while count <= fragment_retries:
try:
- success, raw_fragment = dl_fragment(url)
+ success, raw_fragment = dl_fragment(url, request_data, {'content-type': 'application/json'})
if not success:
return False, None, None
- data = parse_yt_initial_data(raw_fragment)
+ try:
+ data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ data = None
if not data:
- raw_data = json.loads(raw_fragment)
- # sometimes youtube replies with a list
- if not isinstance(raw_data, list):
- raw_data = [raw_data]
- try:
- data = next(item['response'] for item in raw_data if 'response' in item)
- except StopIteration:
- data = {}
-
+ data = json.loads(raw_fragment)
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
@@ -93,22 +83,37 @@ class YoutubeLiveChatReplayFD(FragmentFD):
'https://www.youtube.com/watch?v={}'.format(video_id))
if not success:
return False
- data = parse_yt_initial_data(raw_fragment)
+ try:
+ data = ie._extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ return False
continuation_id = try_get(
data,
lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
# no data yet but required to call _append_fragment
self._append_fragment(ctx, b'')
+ ytcfg = ie._extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+
+ if not ytcfg:
+ return False
+ api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
+ innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
+ if not api_key or not innertube_context:
+ return False
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+
frag_index = offset = 0
while continuation_id is not None:
frag_index += 1
- url = ''.join((
- 'https://www.youtube.com/live_chat_replay',
- '/get_live_chat_replay' if frag_index > 1 else '',
- '?continuation=%s' % continuation_id,
- '&playerOffsetMs=%d&hidden=false&pbj=1' % max(offset - 5000, 0) if frag_index > 1 else ''))
- success, continuation_id, offset = download_and_parse_fragment(url, frag_index)
+ request_data = {
+ 'context': innertube_context,
+ 'continuation': continuation_id,
+ }
+ if frag_index > 1:
+ request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+ success, continuation_id, offset = download_and_parse_fragment(
+ url, frag_index, json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n')
if not success:
return False
if test: