4 files changed, 129 insertions, 51 deletions
diff --git a/youtube/comments.py b/youtube/comments.py
index 8ab2b2c..66b5353 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -33,8 +33,8 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
     video_id = proto.as_bytes(video_id)
     secret_key = proto.as_bytes(secret_key)
 
-    page_info = proto.string(4, video_id) + proto.uint(6, sort)
 
+    page_info = proto.string(4,video_id) + proto.uint(6, sort)
     offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
     if secret_key:
         offset_information = proto.string(1, secret_key) + offset_information
@@ -47,15 +47,6 @@ def make_comment_ctoken(video_id, sort=0, offset=0, lc='', secret_key=''):
     return base64.urlsafe_b64encode(result).decode('ascii')
 
 
-def comment_replies_ctoken(video_id, comment_id, max_results=500):
-
-    params = proto.string(2, comment_id) + proto.uint(9, max_results)
-    params = proto.nested(3, params)
-
-    result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3, 6) + proto.nested(6, params)
-    return base64.urlsafe_b64encode(result).decode('ascii')
-
-
 mobile_headers = {
     'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
     'Accept': '*/*',
@@ -66,10 +57,11 @@ mobile_headers = {
 
 
 def request_comments(ctoken, replies=False):
-    if replies: # let's make it use different urls for no reason despite all the data being encoded
-        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
+    base_url = 'https://m.youtube.com/watch_comment?'
+    if replies:
+        base_url += 'action_get_comment_replies=1&ctoken='
     else:
-        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
+        base_url += 'action_get_comments=1&ctoken='
     url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
 
     content = util.fetch_url(
@@ -99,17 +91,24 @@ def post_process_comments_info(comments_info):
 
         comment['permalink'] = concat_or_none(
             util.URL_ORIGIN, '/watch?v=',
-            comments_info['video_id'], '&lc=', comment['id'])
+            comments_info['video_id'],
+            '&lc=', comment['id']
+        )
 
         reply_count = comment['reply_count']
-
-        if reply_count == 0:
-            comment['replies_url'] = None
-        else:
-            comment['replies_url'] = concat_or_none(
-                util.URL_ORIGIN,
-                '/comments?parent_id=', comment['id'],
-                '&video_id=', comments_info['video_id'])
+        comment['replies_url'] = None
+        if comment['reply_ctoken']:
+            # change max_replies field to 250 in ctoken
+            ctoken = comment['reply_ctoken']
+            ctoken, err = proto.set_protobuf_value(
+                ctoken,
+                'base64p', 6, 3, 9, value=250)
+            if err:
+                print('Error setting ctoken value:')
+                print(err)
+                comment['replies_url'] = None
+            comment['replies_url'] = concat_or_none(util.URL_ORIGIN,
+                '/comments?replies=1&ctoken=' + ctoken)
 
         if reply_count == 0:
             comment['view_replies_text'] = 'Reply'
@@ -118,6 +117,7 @@ def post_process_comments_info(comments_info):
         else:
             comment['view_replies_text'] = str(reply_count) + ' replies'
 
+
         if comment['like_count'] == 1:
             comment['likes_text'] = '1 like'
         else:
@@ -125,10 +125,12 @@ def post_process_comments_info(comments_info):
 
     comments_info['include_avatars'] = settings.enable_comment_avatars
     if comments_info['ctoken']:
+        replies_param = '&replies=1' if comments_info['is_replies'] else ''
         comments_info['more_comments_url'] = concat_or_none(
             util.URL_ORIGIN,
             '/comments?ctoken=',
-            comments_info['ctoken']
+            comments_info['ctoken'],
+            replies_param
         )
 
     comments_info['page_number'] = page_number = str(int(comments_info['offset']/20) + 1)
@@ -137,14 +139,11 @@ def post_process_comments_info(comments_info):
         comments_info['sort_text'] = 'top' if comments_info['sort'] == 0 else 'newest'
 
     comments_info['video_url'] = concat_or_none(
-        util.URL_ORIGIN,
-        '/watch?v=',
-        comments_info['video_id']
-    )
-
+        util.URL_ORIGIN, '/watch?v=', comments_info['video_id'])
     comments_info['video_thumbnail'] = concat_or_none(
         settings.img_prefix, 'https://i.ytimg.com/vi/',
-        comments_info['video_id'], '/mqdefault.jpg')
+        comments_info['video_id'], '/mqdefault.jpg'
+    )
 
 
 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
@@ -198,17 +197,9 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
 @yt_app.route('/comments')
 def get_comments_page():
     ctoken = request.args.get('ctoken', '')
-    replies = False
-    if not ctoken:
-        video_id = request.args['video_id']
-        parent_id = request.args['parent_id']
-
-        ctoken = comment_replies_ctoken(video_id, parent_id)
-        replies = True
-
-    comments_info = yt_data_extract.extract_comments_info(
-        request_comments(ctoken, replies))
+    replies = request.args.get('replies', '0') == '1'
 
+    comments_info = yt_data_extract.extract_comments_info(request_comments(ctoken, replies))
     post_process_comments_info(comments_info)
 
     if not replies:
diff --git a/youtube/proto.py b/youtube/proto.py
index ff59eac..933ac92 100644
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -1,6 +1,7 @@
 from math import ceil
 import base64
 import io
+import traceback
 
 
 def byte(n):
@@ -92,7 +93,6 @@ def read_group(data, end_sequence):
     data.seek(index + len(end_sequence))
     return data.original[start:index]
 
-
 def read_protobuf(data):
     data_original = data
     data = io.BytesIO(data)
@@ -122,12 +122,89 @@ def read_protobuf(data):
         yield (wire_type, field_number, value)
 
 
-def parse(data):
-    return {field_number: value for _, field_number, value in read_protobuf(data)}
+def parse(data, include_wire_type=False):
+    '''Returns a dict mapping field numbers to values
+
+    data is the protobuf structure, which must not be b64-encoded'''
+    if include_wire_type:
+        return {field_number: [wire_type, value]
+                for wire_type, field_number, value in read_protobuf(data)}
+    return {field_number: value
+            for _, field_number, value in read_protobuf(data)}
+
+
+base64_enc_funcs = {
+    'base64': base64.urlsafe_b64encode,
+    'base64s': unpadded_b64encode,
+    'base64p': percent_b64encode,
+}
+
+
+def _make_protobuf(data):
+    # must be dict mapping field_number to [wire_type, value]
+    if isinstance(data, dict):
+        new_data = []
+        for field_num, (wire_type, value) in sorted(data.items()):
+            new_data.append((wire_type, field_num, value))
+        data = new_data
+    if isinstance(data, str):
+        return data.encode('utf-8')
+    elif len(data) == 2 and data[0] in base64_enc_funcs:
+        return base64_enc_funcs[data[0]](make_proto(data[1]))
+    elif isinstance(data, list):
+        result = b''
+        for field in data:
+            if field[0] == 0:
+                result += uint(field[1], field[2])
+            elif field[0] == 2:
+                result += string(field[1], _make_protobuf(field[2]))
+            else:
+                raise NotImplementedError('Wire type ' + str(field[0])
+                    + ' not implemented')
+        return result
+    return data
+
+
+def make_protobuf(data):
+    return _make_protobuf(data).decode('ascii')
+
+
+def _set_protobuf_value(data, *path, value):
+    if not path:
+        return value
+    op = path[0]
+    if op in base64_enc_funcs:
+        inner_data = b64_to_bytes(data)
+        return base64_enc_funcs[op](
+            _set_protobuf_value(inner_data, *path[1:], value=value)
+        )
+    pb_dict = parse(data, include_wire_type=True)
+    pb_dict[op][1] = _set_protobuf_value(
+        pb_dict[op][1], *path[1:], value=value
+    )
+    return _make_protobuf(pb_dict)
+
+
+def set_protobuf_value(data, *path, value):
+    '''Set a field's value in a raw protobuf structure
+
+    path is a list of field numbers and/or base64 encoding directives
+
+    The directives are
+        base64: normal base64 encoding with equal signs padding
+        base64s ("stripped"): no padding
+        base64p: %3D instead of = for padding
+
+    return new_protobuf, err'''
+    try:
+        new_protobuf = _set_protobuf_value(data, *path, value=value)
+        return new_protobuf.decode('ascii'), None
+    except Exception:
+        return None, traceback.format_exc()
 
 
 def b64_to_bytes(data):
     if isinstance(data, bytes):
         data = data.decode('ascii')
     data = data.replace("%3D", "=")
-    return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4))
+    return base64.urlsafe_b64decode(data + "="*((4 - len(data) % 4) % 4))
diff --git a/youtube/templates/comments.html b/youtube/templates/comments.html
index ceb31b8..808f98a 100644
--- a/youtube/templates/comments.html
+++ b/youtube/templates/comments.html
@@ -23,14 +23,18 @@
 
             <span class="comment-likes">{{ comment['likes_text'] if comment['like_count'] else ''}}</span>
             <div class="button-row">
-                {% if settings.use_comments_js and comment['reply_count'] %}
-                    <details class="replies" data-src="{{ comment['replies_url'] }}">
-                        <summary>{{ comment['view_replies_text'] }}</summary>
-                        <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
-                        <div class="comment_page">loading..</div>
-                    </details>
-                {% elif comment['reply_count'] %}
-                    <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
+                {% if comment['reply_count'] %}
+                    {% if settings.use_comments_js and comment['replies_url'] %}
+                        <details class="replies" src="{{ comment['replies_url'] }}">
+                            <summary>{{ comment['view_replies_text'] }}</summary>
+                            <a href="{{ comment['replies_url'] }}" class="replies-open-new-tab" target="_blank">Open in new tab</a>
+                            <div class="comment_page">loading...</div>
+                        </details>
+                    {% elif comment['replies_url'] %}
+                        <a href="{{ comment['replies_url'] }}" class="replies">{{ comment['view_replies_text'] }}</a>
+                    {% else %}
+                        <a class="replies">{{ comment['view_replies_text'] }} (error constructing url)</a>
+                    {% endif %}
                 {% endif %}
             </div>
         </div>
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index ae8715f..197cf88 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -251,13 +251,19 @@ def extract_comments_info(polymer_json):
             info['video_title'] = extract_str(comment_thread.get('commentTargetTitle'))
             if 'replies' not in comment_thread:
                 comment_info['reply_count'] = 0
+                comment_info['reply_ctoken'] = None
             else:
                 comment_info['reply_count'] = extract_int(deep_get(comment_thread,
                     'replies', 'commentRepliesRenderer', 'moreText'
                 ), default=1)   # With 1 reply, the text reads "View reply"
+                comment_info['reply_ctoken'] = deep_get(comment_thread,
+                    'replies', 'commentRepliesRenderer', 'continuations', 0,
+                    'nextContinuationData', 'continuation'
+                )
             comment_renderer = deep_get(comment_thread, 'comment', 'commentRenderer', default={})
         elif 'commentRenderer' in comment:  # replies
             comment_info['reply_count'] = 0     # replyCount, below, not present for replies even if the reply has further replies to it
+            comment_info['reply_ctoken'] = None
             conservative_update(info, 'is_replies', True)
             comment_renderer = comment['commentRenderer']
         else: