From 666456146d1fd7bb73273e12d6294525511edf9a Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 6 Jul 2018 20:11:08 -0700 Subject: fix comment replies being limited to 10 --- youtube/comments.py | 29 +++++++++++++++++++---- youtube/proto.py | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/youtube/comments.py b/youtube/comments.py index 3f44758..695d18a 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -59,6 +59,21 @@ def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''): result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information) return base64.urlsafe_b64encode(result).decode('ascii') +def comment_replies_ctoken(video_id, comment_id, max_results=500): + + params = proto.string(2, comment_id) + proto.uint(9, max_results) + params = proto.nested(3, params) + + result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, params) + return base64.urlsafe_b64encode(result).decode('ascii') + +def get_ids(ctoken): + params = proto.parse(proto.b64_to_bytes(ctoken)) + video_id = proto.parse(params[2])[2] + params = proto.parse(params[6]) + params = proto.parse(params[3]) + return params[2].decode('ascii'), video_id.decode('ascii') + mobile_headers = { 'Host': 'm.youtube.com', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', @@ -83,7 +98,7 @@ def request_comments(ctoken, replies=False): print("got , retrying") continue break - '''with open('comments_debug', 'wb') as f: + '''with open('debug/comments_debug', 'wb') as f: f.write(content)''' return content @@ -100,7 +115,8 @@ def parse_comments(content, replies=False): if not replies: if comment_raw['replies'] is not None: ctoken = comment_raw['replies']['continuations'][0]['continuation'] - replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1" + comment_id, video_id = get_ids(ctoken) + replies_url = URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id comment_raw = comment_raw['comment'] comment = { 'author': comment_raw['author']['runs'][0]['text'], @@ -148,8 +164,13 @@ more_comments_template = Template('''> 3 + + if wire_type == 0: + value = read_varint(data) + elif wire_type == 1: + value = data.read(8) + elif wire_type == 2: + length = read_varint(data) + value = data.read(length) + elif wire_type == 3: + end_bytes = encode_varint((field_number << 3) | 4) + value = read_group(data, end_bytes) + elif wire_type == 5: + value = data.read(4) + else: + raise Exception("Unknown wire type: " + str(wire_type) + ", Tag: " + bytes_to_hex(succinct_encode(tag)) + ", at position " + str(data.tell())) + yield (wire_type, field_number, value) + +def parse(data): + return {field_number: value for _, field_number, value in read_protobuf(data)} + +def b64_to_bytes(data): + if isinstance(data, bytes): + data = data.decode('ascii') + data = data.replace("%3D", "=") + return base64.urlsafe_b64decode(data + "="*((4 - len(data)%4)%4) ) + -- cgit v1.2.3