Fix comment replies

Comment reply protobuf now requires the channel id of the uploader of the video. Otherwise the endpoint returns 500. Instead of making the protobuf ourselves and passing this data around through query parameters, just use the ctoken provided to us but modify the max_replies field from 10 to 250. Fixes #53 Signed-off-by: Jesús <heckyel@hyperbola.info>
author: James Taylor <user234683@users.noreply.github.com> 2021-02-25 15:55:23 -0800
committer: Jesús <heckyel@hyperbola.info> 2021-02-26 11:39:23 -0500
commit: 00ef1c862744ec00886bc3fa4b95fdfc6c151866 (patch)
tree: bac6025f0e09df7d1fc65665eaef429407bf1391 /youtube
parent: f26c9be85e1ac78d30954b3aa38c119bef415579 (diff)
download: yt-local-00ef1c862744ec00886bc3fa4b95fdfc6c151866.tar.lz
yt-local-00ef1c862744ec00886bc3fa4b95fdfc6c151866.tar.xz
yt-local-00ef1c862744ec00886bc3fa4b95fdfc6c151866.zip
4 files changed, 129 insertions, 51 deletions
diff --git a/youtube/comments.py b/youtube/comments.py
index 8ab2b2c..66b5353 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -33,8 +33,8 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
     video_id = proto.as_bytes(video_id)
     secret_key = proto.as_bytes(secret_key)
 
-    page_info = proto.string(4, video_id) + proto.uint(6, sort)
 
+    page_info = proto.string(4,video_id) + proto.uint(6, sort)
     offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
     if secret_key:
         offset_information = proto.string(1, secret_key) + offset_information
@@ -47,15 +47,6 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
     return base64.urlsafe_b64encode(result).decode('ascii')
 
 
-def comment_replies_ctoken(video_id, comment_id, max_results=500):
-
-    params = proto.string(2, comment_id) + proto.uint(9, max_results)
-    params = proto.nested(3, params)
-
-    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3, 6) + proto.nested(6, params)
-    return base64.urlsafe_b64encode(result).decode('ascii')
-
-
 mobile_headers = {
     'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
     'Accept': '*/*',
@@ -66,10 +57,11 @@ mobile_headers = {
 
 
 def request_comments(ctoken, replies=False):
-    if replies: # let's make it use different urls for no reason despite all the data being encoded
-        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
+    base_url = 'https://m.youtube.com/watch_comment?'
+    if replies:
+        base_url += 'action_get_comment_replies=1&ctoken='
     else:
-        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
+        base_url += 'action_get_comments=1&ctoken='
     url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
 
     content = util.fetch_url(
@@ -99,17 +91,24 @@ def post_process_comments_info(comments_info):
 
         comment['permalink'] = concat_or_none(
             util.URL_ORIGIN, '/watch?v=',
-            comments_info['video_id'], '&lc=', comment['id'])
+            comments_info['video_id'],
+            '&lc=', comment['id']
+        )
 
         reply_count = comment['reply_count']
-
-        if reply_count == 0:
-            comment['replies_url'] = None
-        else:
-            comment['replies_url'] = concat_or_none(
-                util.URL_ORIGIN,
-                '/comments?parent_id=', comment['id'],
-                '&video_id=', comments_info['video_id'])
+        comment['replies_url'] = None
+        if comment['reply_ctoken']:
+            # change max_replies field to 250 in ctoken
+            ctoken = comment['reply_ctoken']
+            ctoken, err = proto.set_protobuf_value(
+                ctoken,
+                'base64p', 6, 3, 9, value=250)
+            if err:
+                print('Error setting ctoken value:')
+                print(err)
+                comment['replies_url'] = None
+            comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
+                '/comments?replies=1&ctoken=' + ctoken)
 
         if reply_count == 0:
             comment['view_replies_text'] = 'Reply'
@@ -118,6 +117,7 @@ def post_process_comments_info(comments_info):
         else:
             comment['view_replies_text'] = str(reply_count) + ' replies'
 
+
         if comment['like_count'] == 1:
             comment['likes_text'] = '1 like'
         else:
@@ -125,10 +125,12 @@ def post_process_comments_info(comments_info):
 
     comments_info['include_avatars'] = settings.enable_comment_avatars
     if comments_info['ctoken']:
+        replies_param = '&replies=1' if comments_info['is_replies'] else ''
         comments_info['more_comments_url'] = concat_or_none(
             util.URL_ORIGIN,
             '/comments?ctoken=',
-            comments_info['ctoken']
+            comments_info['ctoken'],
+            replies_param
         )
 
     comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1)
@@ -137,14 +139,11 @@ def post_process_comments_info(comments_info):
         comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
 
     comments_info['video_url'] = concat_or_none(
-        util.URL_ORIGIN,
-        '/watch?v=',
-        comments_info['video_id']
-    )
-
+        util.URL_ORIGIN, '/watch?v=', comments_info['video_id'])
     comments_info['video_thumbnail'] = concat_or_none(
         settings.img_prefix, 'https://i.ytimg.com/vi/',
-        comments_info['video_id'], '/mqdefault.jpg')
+        comments_info['video_id'], '/mqdefault.jpg'
+    )
 
 
 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
@@ -198,17 +197,9 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
 @yt_app.route('/comments')
 def get_comments_page():
     ctoken = request.args.get('ctoken', '')
-    replies = False
-    if not ctoken:
-        video_id = request.args['video_id']
-        parent_id = request.args['parent_id']
-
-        ctoken = comment_replies_ctoken(video_id, parent_id)
-        replies = True
-
-    comments_info = yt_data_extract.extract_comments_info(
-        request_comments(ctoken, replies))
+    replies = request.args.get('replies', '0') == '1'
 
+    comments_info = yt_data_extract.extract_comments_info(request_comments(ctoken, replies))
     post_process_comments_info(comments_info)
 
     if not replies:
diff --git a/youtube/proto.py b/youtube/proto.py
index ff59eac..933ac92 100644
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -1,6 +1,7 @@
 from math import ceil
 import base64
 import io
+import traceback
 
 
 def byte(n):
@@ -92,7 +93,6 @@ def read_group(data, end_sequence):
     data.seek(index + len(end_sequence))
     return data.original[start:index]
 
-
 def read_protobuf(data):
     data_original = data
     data = io.BytesIO(data)
@@ -122,12 +122,89 @@ def read_protobuf(data):
         yield (wire_type, field_number, value)
 
 
-def parse(data):
-    return {field_number: value for _, field_number, value in read_protobuf(data)}
+def parse(data, include_wire_type=False):
+    '''Returns a dict mapping field numbers to values
+
+    data is the protobuf structure, which must not be b64-encoded'''
+    if include_wire_type:
+        return {field_number: [wire_type, value]
+                for wire_type, field_number, value in read_protobuf(data)}
+    return {field_number: value
+            for _, field_number, value in read_protobuf(data)}
+
+
+base64_enc_funcs = {
+    'base64': base64.urlsafe_b64encode,
+    'base64s': unpadded_b64encode,
+    'base64p': percent_b64encode,
+}
+
+
+def _make_protobuf(data):
+    # must be dict mapping field_number to [wire_type, value]
+    if isinstance(data, dict):
+        new_data = []
+        for field_num, (wire_type, value) in sorted(data.items()):
+            new_data.append((wire_type, field_num, value))
+        data = new_data
+    if isinstance(data, str):
+        return data.encode('utf-8')
+    elif len(data) == 2 and data[0] in base64_enc_funcs:
+        return base64_enc_funcs[data[0]](make_proto(data[1]))
+    elif isinstance(data, list):
+        result = b''
+        for field in data:
+            if field[0] == 0:
+                result += uint(field[1], field[2])
+            elif field[0] == 2:
+                result += string(field[1], _make_protobuf(field[2]))
+            else:
+                raise NotImplementedError('Wire type ' + str(field[0])
+                    + ' not implemented')
+        return result
+    return data
+
+
+def make_protobuf(data):
+    return _make_protobuf(data).decode('ascii')
+
+
+def _set_protobuf_value(data, *path, value):
+    if not path:
+        return value
+    op = path[0]
+    if op in base64_enc_funcs:
+        inner_data = b64_to_bytes(data)
+        return base64_enc_funcs[op](
+            _set_protobuf_value(inner_data, *path[1:], value=value)
+        )
+    pb_dict = parse(data, include_wire_type=True)
+    pb_dict[op][1] = _set_protobuf_value(
+        pb_dict[op][1], *path[1:], value=value
+    )
+    return _make_protobuf(pb_dict)
+
+
+def set_protobuf_value(data, *path, value):
+    '''Set a field's value in a raw protobuf structure
+
+    path is a list of field numbers and/or base64 encoding directives
+
+    The directives are
+        base64: normal base64 encoding with equal signs padding
+        base64s ("stripped"): no padding
+        base64p: %3D instead of = for padding
+
+    return new_protobuf, err'''
+    try:
+        new_protobuf = _set_protobuf_value(data, *path, value=value)
+        return new_protobuf.decode('ascii'), None
+    except Exception:
+        return None, traceback.format_exc()
 
 
 def b64_to_bytes(data):
     if isinstance(data, bytes):
         data = data.decode('ascii')
     data = data.replace("%3D", "=")
-    return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4))
+    return base64.urlsafe_b64decode(data + "="*((4 - len(data) % 4) % 4))
diff --git a/youtube/templates/comments.html b/youtube/templates/comments.html
index ceb31b8..808f98a 100644
--- a/youtube/templates/comments.html
+++ b/youtube/templates/comments.html
@@ -23,14 +23,18 @@
 
             <span class="comment-likes">{{ comment['likes_text'] if comment['like_count'] else ''}}</span>
             <div class="button-row">
-                {% if settings.use_comments_js and comment['reply_count'] %}
-                    <details class="replies" data-src="{{ comment['replies_url'] }}">
-                        <summary>{{ comment['view_replies_text'] }}</summary>
-                        <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
-                        <div class="comment_page">loading..</div>
-                    </details>
-                {% elif comment['reply_count'] %}
-                    <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
+                {% if comment['reply_count'] %}
+                    {% if settings.use_comments_js and comment['replies_url'] %}
+                        <details class="replies" src="{{ comment['replies_url'] }}">
+                            <summary>{{ comment['view_replies_text'] }}</summary>
+                            <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
+                            <div class="comment_page">loading...</div>
+                        </details>
+                    {% elif comment['replies_url'] %}
+                        <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
+                    {% else %}
+                        <a class="replies">{{ comment['view_replies_text'] }} (error constructing url)</a>
+                    {% endif %}
                 {% endif %}
             </div>
         </div>
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index ae8715f..197cf88 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -251,13 +251,19 @@ def extract_comments_info(polymer_json):
             info['video_title'] = extract_str(comment_thread.get('commentTargetTitle'))
             if 'replies' not in comment_thread:
                 comment_info['reply_count'] = 0
+                comment_info['reply_ctoken'] = None
             else:
                 comment_info['reply_count'] = extract_int(deep_get(comment_thread,
                     'replies', 'commentRepliesRenderer', 'moreText'
                 ), default=1)   # With 1 reply, the text reads "View reply"
+                comment_info['reply_ctoken'] = deep_get(comment_thread,
+                    'replies', 'commentRepliesRenderer', 'continuations', 0,
+                    'nextContinuationData', 'continuation'
+                )
             comment_renderer = deep_get(comment_thread, 'comment', 'commentRenderer', default={})
         elif 'commentRenderer' in comment:  # replies
             comment_info['reply_count'] = 0     # replyCount, below, not present for replies even if the reply has further replies to it
+            comment_info['reply_ctoken'] = None
             conservative_update(info, 'is_replies', True)
             comment_renderer = comment['commentRenderer']
         else:
author	James Taylor <user234683@users.noreply.github.com>	2021-02-25 15:55:23 -0800
committer	Jesús <heckyel@hyperbola.info>	2021-02-26 11:39:23 -0500
commit	00ef1c862744ec00886bc3fa4b95fdfc6c151866 (patch)
tree	bac6025f0e09df7d1fc65665eaef429407bf1391 /youtube
parent	f26c9be85e1ac78d30954b3aa38c119bef415579 (diff)
download	yt-local-00ef1c862744ec00886bc3fa4b95fdfc6c151866.tar.lz yt-local-00ef1c862744ec00886bc3fa4b95fdfc6c151866.tar.xz yt-local-00ef1c862744ec00886bc3fa4b95fdfc6c151866.zip