aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube/comments.py69
-rw-r--r--youtube/proto.py85
-rw-r--r--youtube/templates/comments.html20
-rw-r--r--youtube/yt_data_extract/everything_else.py6
4 files changed, 129 insertions, 51 deletions
diff --git a/youtube/comments.py b/youtube/comments.py
index 8ab2b2c..66b5353 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -33,8 +33,8 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
video_id = proto.as_bytes(video_id)
secret_key = proto.as_bytes(secret_key)
- page_info = proto.string(4, video_id) + proto.uint(6, sort)
+ page_info = proto.string(4,video_id) + proto.uint(6, sort)
offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
if secret_key:
offset_information = proto.string(1, secret_key) + offset_information
@@ -47,15 +47,6 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
return base64.urlsafe_b64encode(result).decode('ascii')
-def comment_replies_ctoken(video_id, comment_id, max_results=500):
-
- params = proto.string(2, comment_id) + proto.uint(9, max_results)
- params = proto.nested(3, params)
-
- result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3, 6) + proto.nested(6, params)
- return base64.urlsafe_b64encode(result).decode('ascii')
-
-
mobile_headers = {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
'Accept': '*/*',
@@ -66,10 +57,11 @@ mobile_headers = {
def request_comments(ctoken, replies=False):
- if replies: # let's make it use different urls for no reason despite all the data being encoded
- base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
+ base_url = 'https://m.youtube.com/watch_comment?'
+ if replies:
+ base_url += 'action_get_comment_replies=1&ctoken='
else:
- base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
+ base_url += 'action_get_comments=1&ctoken='
url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
content = util.fetch_url(
@@ -99,17 +91,24 @@ def post_process_comments_info(comments_info):
comment['permalink'] = concat_or_none(
util.URL_ORIGIN, '/watch?v=',
- comments_info['video_id'], '&lc=', comment['id'])
+ comments_info['video_id'],
+ '&lc=', comment['id']
+ )
reply_count = comment['reply_count']
-
- if reply_count == 0:
- comment['replies_url'] = None
- else:
- comment['replies_url'] = concat_or_none(
- util.URL_ORIGIN,
- '/comments?parent_id=', comment['id'],
- '&video_id=', comments_info['video_id'])
+ comment['replies_url'] = None
+ if comment['reply_ctoken']:
+ # change max_replies field to 250 in ctoken
+ ctoken = comment['reply_ctoken']
+ ctoken, err = proto.set_protobuf_value(
+ ctoken,
+ 'base64p', 6, 3, 9, value=250)
+ if err:
+ print('Error setting ctoken value:')
+ print(err)
+ comment['replies_url'] = None
+ comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
+ '/comments?replies=1&ctoken=' + ctoken)
if reply_count == 0:
comment['view_replies_text'] = 'Reply'
@@ -118,6 +117,7 @@ def post_process_comments_info(comments_info):
else:
comment['view_replies_text'] = str(reply_count) + ' replies'
+
if comment['like_count'] == 1:
comment['likes_text'] = '1 like'
else:
@@ -125,10 +125,12 @@ def post_process_comments_info(comments_info):
comments_info['include_avatars'] = settings.enable_comment_avatars
if comments_info['ctoken']:
+ replies_param = '&replies=1' if comments_info['is_replies'] else ''
comments_info['more_comments_url'] = concat_or_none(
util.URL_ORIGIN,
'/comments?ctoken=',
- comments_info['ctoken']
+ comments_info['ctoken'],
+ replies_param
)
comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1)
@@ -137,14 +139,11 @@ def post_process_comments_info(comments_info):
comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
comments_info['video_url'] = concat_or_none(
- util.URL_ORIGIN,
- '/watch?v=',
- comments_info['video_id']
- )
-
+ util.URL_ORIGIN, '/watch?v=', comments_info['video_id'])
comments_info['video_thumbnail'] = concat_or_none(
settings.img_prefix, 'https://i.ytimg.com/vi/',
- comments_info['video_id'], '/mqdefault.jpg')
+ comments_info['video_id'], '/mqdefault.jpg'
+ )
def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
@@ -198,17 +197,9 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
@yt_app.route('/comments')
def get_comments_page():
ctoken = request.args.get('ctoken', '')
- replies = False
- if not ctoken:
- video_id = request.args['video_id']
- parent_id = request.args['parent_id']
-
- ctoken = comment_replies_ctoken(video_id, parent_id)
- replies = True
-
- comments_info = yt_data_extract.extract_comments_info(
- request_comments(ctoken, replies))
+ replies = request.args.get('replies', '0') == '1'
+ comments_info = yt_data_extract.extract_comments_info(request_comments(ctoken, replies))
post_process_comments_info(comments_info)
if not replies:
diff --git a/youtube/proto.py b/youtube/proto.py
index ff59eac..933ac92 100644
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -1,6 +1,7 @@
from math import ceil
import base64
import io
+import traceback
def byte(n):
@@ -92,7 +93,6 @@ def read_group(data, end_sequence):
data.seek(index + len(end_sequence))
return data.original[start:index]
-
def read_protobuf(data):
data_original = data
data = io.BytesIO(data)
@@ -122,12 +122,89 @@ def read_protobuf(data):
yield (wire_type, field_number, value)
-def parse(data):
- return {field_number: value for _, field_number, value in read_protobuf(data)}
+def parse(data, include_wire_type=False):
+ '''Returns a dict mapping field numbers to values
+
+ data is the protobuf structure, which must not be b64-encoded'''
+ if include_wire_type:
+ return {field_number: [wire_type, value]
+ for wire_type, field_number, value in read_protobuf(data)}
+ return {field_number: value
+ for _, field_number, value in read_protobuf(data)}
+
+
+base64_enc_funcs = {
+ 'base64': base64.urlsafe_b64encode,
+ 'base64s': unpadded_b64encode,
+ 'base64p': percent_b64encode,
+}
+
+
+def _make_protobuf(data):
+ # must be dict mapping field_number to [wire_type, value]
+ if isinstance(data, dict):
+ new_data = []
+ for field_num, (wire_type, value) in sorted(data.items()):
+ new_data.append((wire_type, field_num, value))
+ data = new_data
+ if isinstance(data, str):
+ return data.encode('utf-8')
+ elif len(data) == 2 and data[0] in base64_enc_funcs:
+ return base64_enc_funcs[data[0]](make_proto(data[1]))
+ elif isinstance(data, list):
+ result = b''
+ for field in data:
+ if field[0] == 0:
+ result += uint(field[1], field[2])
+ elif field[0] == 2:
+ result += string(field[1], _make_protobuf(field[2]))
+ else:
+ raise NotImplementedError('Wire type ' + str(field[0])
+ + ' not implemented')
+ return result
+ return data
+
+
+def make_protobuf(data):
+ return _make_protobuf(data).decode('ascii')
+
+
+def _set_protobuf_value(data, *path, value):
+ if not path:
+ return value
+ op = path[0]
+ if op in base64_enc_funcs:
+ inner_data = b64_to_bytes(data)
+ return base64_enc_funcs[op](
+ _set_protobuf_value(inner_data, *path[1:], value=value)
+ )
+ pb_dict = parse(data, include_wire_type=True)
+ pb_dict[op][1] = _set_protobuf_value(
+ pb_dict[op][1], *path[1:], value=value
+ )
+ return _make_protobuf(pb_dict)
+
+
+def set_protobuf_value(data, *path, value):
+ '''Set a field's value in a raw protobuf structure
+
+ path is a list of field numbers and/or base64 encoding directives
+
+ The directives are
+ base64: normal base64 encoding with equal signs padding
+ base64s ("stripped"): no padding
+ base64p: %3D instead of = for padding
+
+ return new_protobuf, err'''
+ try:
+ new_protobuf = _set_protobuf_value(data, *path, value=value)
+ return new_protobuf.decode('ascii'), None
+ except Exception:
+ return None, traceback.format_exc()
def b64_to_bytes(data):
if isinstance(data, bytes):
data = data.decode('ascii')
data = data.replace("%3D", "=")
- return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4))
+ return base64.urlsafe_b64decode(data + "="*((4 - len(data) % 4) % 4))
diff --git a/youtube/templates/comments.html b/youtube/templates/comments.html
index ceb31b8..808f98a 100644
--- a/youtube/templates/comments.html
+++ b/youtube/templates/comments.html
@@ -23,14 +23,18 @@
<span class="comment-likes">{{ comment['likes_text'] if comment['like_count'] else ''}}</span>
<div class="button-row">
- {% if settings.use_comments_js and comment['reply_count'] %}
- <details class="replies" data-src="{{ comment['replies_url'] }}">
- <summary>{{ comment['view_replies_text'] }}</summary>
- <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
- <div class="comment_page">loading..</div>
- </details>
- {% elif comment['reply_count'] %}
- <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
+ {% if comment['reply_count'] %}
+ {% if settings.use_comments_js and comment['replies_url'] %}
+ <details class="replies" src="{{ comment['replies_url'] }}">
+ <summary>{{ comment['view_replies_text'] }}</summary>
+ <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
+ <div class="comment_page">loading...</div>
+ </details>
+ {% elif comment['replies_url'] %}
+ <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
+ {% else %}
+ <a class="replies">{{ comment['view_replies_text'] }} (error constructing url)</a>
+ {% endif %}
{% endif %}
</div>
</div>
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index ae8715f..197cf88 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -251,13 +251,19 @@ def extract_comments_info(polymer_json):
info['video_title'] = extract_str(comment_thread.get('commentTargetTitle'))
if 'replies' not in comment_thread:
comment_info['reply_count'] = 0
+ comment_info['reply_ctoken'] = None
else:
comment_info['reply_count'] = extract_int(deep_get(comment_thread,
'replies', 'commentRepliesRenderer', 'moreText'
), default=1) # With 1 reply, the text reads "View reply"
+ comment_info['reply_ctoken'] = deep_get(comment_thread,
+ 'replies', 'commentRepliesRenderer', 'continuations', 0,
+ 'nextContinuationData', 'continuation'
+ )
comment_renderer = deep_get(comment_thread, 'comment', 'commentRenderer', default={})
elif 'commentRenderer' in comment: # replies
comment_info['reply_count'] = 0 # replyCount, below, not present for replies even if the reply has further replies to it
+ comment_info['reply_ctoken'] = None
conservative_update(info, 'is_replies', True)
comment_renderer = comment['commentRenderer']
else: