[Youtube] Improve comment API requests

co-authored by bbepis
author: Bepis <36346617+bbepis@users.noreply.github.com> 2020-12-30 19:25:24 +1100
committer: pukkandan <pukkandan@gmail.com> 2021-01-28 11:49:31 +0530
commit: 8d0ea5f95532bfe940d6cc74520b449af3334e46 (patch)
tree: e19b767a20b29b0827dcd9f382dcb383744878ea
parent: 0748b3317bb0d6db492fe5effaa2d43c7dddd582 (diff)
download: hypervideo-pre-8d0ea5f95532bfe940d6cc74520b449af3334e46.tar.lz
hypervideo-pre-8d0ea5f95532bfe940d6cc74520b449af3334e46.tar.xz
hypervideo-pre-8d0ea5f95532bfe940d6cc74520b449af3334e46.zip
1 files changed, 23 insertions, 27 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 265c29a5a..e5e36075e 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -2485,17 +2485,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
             try:
                 ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
-                continuations = [(ncd['continuation'], ncd['clickTrackingParams'])]
+                continuations = [ncd['continuation']]
             # Handle videos where comments have been disabled entirely
             except StopIteration:
                 continuations = []
 
-            def get_continuation(continuation, itct, session_token, replies=False):
+            def get_continuation(continuation, session_token, replies=False):
                 query = {
-                    'pbj': 1,
-                    'ctoken': continuation,
-                    'continuation': continuation,
-                    'itct': itct,
+                        'pbj': 1,
+                        'ctoken': continuation,
                 }
                 if replies:
                     query['action_get_comment_replies'] = 1
@@ -2523,23 +2521,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     response_code = handle.getcode()
                     if (response_code == 200):
                         return self._parse_json(content, video_id)
-                    if (response_code == 413):  # Sometimes google makes continuations that are too big to be accepted by themselves. Grade A engineering
-                        # self.to_screen(json.dumps(query))
-                        # self.to_screen('Google API rate limit detected; waiting 30 seconds before continuing')
-                        # time.sleep(30)
-                        # continue
+                    if (response_code == 413):
                         return None
                     raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
 
             first_continuation = True
             while continuations:
                 continuation, itct = continuations.pop()
-                comment_response = get_continuation(continuation, itct, xsrf_token)
+                comment_response = get_continuation(continuation, xsrf_token)
                 if not comment_response:
                     continue
                 if list(search_dict(comment_response, 'externalErrorMessage')):
                     raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
 
+                if 'continuationContents' not in comment_response['response']:
+                    # Something is wrong here. Youtube won't accept this continuation token for some reason and responds with a user satisfaction dialog (error?)
+                    continue
+                # not sure if this actually helps
+                if 'xsrf_token' in comment_response:
+                    xsrf_token = comment_response['xsrf_token']
+
                 item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
                 if first_continuation:
                     expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
@@ -2554,7 +2555,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     video_comments.append({
                         'id': comment['commentId'],
                         'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
-                        'time_text': comment['publishedTimeText']['runs'][0]['text'],
+                        'time_text': ''.join([c['text'] for c in comment['publishedTimeText']['runs']]),
                         'author': comment.get('authorText', {}).get('simpleText', ''),
                         'votes': comment.get('voteCount', {}).get('simpleText', '0'),
                         'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
@@ -2563,14 +2564,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     if 'replies' not in meta_comment['commentThreadRenderer']:
                         continue
 
-                    reply_continuation = meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']
-                    continuation = reply_continuation['continuation']
-                    itct = reply_continuation['clickTrackingParams']
-                    while True:
+                    reply_continuations = [rcn['nextContinuationData']['continuation'] for rcn in meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations']]
+                    while reply_continuations:
                         time.sleep(1)
-                        replies_data = get_continuation(continuation, itct, xsrf_token, True)
+                        continuation = reply_continuations.pop()
+                        replies_data = get_continuation(continuation, xsrf_token, True)
                         if not replies_data or 'continuationContents' not in replies_data[1]['response']:
-                            break
+                            continue
 
                         if self._downloader.params.get('verbose', False):
                             self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
@@ -2580,25 +2580,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             video_comments.append({
                                 'id': reply_comment['commentId'],
                                 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
-                                'time_text': reply_comment['publishedTimeText']['runs'][0]['text'],
+                                'time_text': ''.join([c['text'] for c in reply_comment['publishedTimeText']['runs']]),
                                 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
                                 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
                                 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
                                 'parent': comment['commentId']
                             })
                         if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
-                            break
+                            continue
 
-                        continuation = reply_comment_meta['continuations'][0]['nextContinuationData']['continuation']
-                        itct = reply_comment_meta['continuations'][0]['nextContinuationData']['clickTrackingParams']
+                        reply_continuations += [rcn['nextContinuationData']['continuation'] for rcn in reply_comment_meta['continuations']]
 
                 self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
 
                 if 'continuations' in item_section:
-                    new_continuations = [
-                        (ncd['nextContinuationData']['continuation'], ncd['nextContinuationData']['clickTrackingParams'])
-                        for ncd in item_section['continuations']]
-                    continuations += new_continuations
+                    continuations += [ncd['nextContinuationData']['continuation'] for ncd in item_section['continuations']]
                 time.sleep(1)
 
             self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
author	Bepis <36346617+bbepis@users.noreply.github.com>	2020-12-30 19:25:24 +1100
committer	pukkandan <pukkandan@gmail.com>	2021-01-28 11:49:31 +0530
commit	8d0ea5f95532bfe940d6cc74520b449af3334e46 (patch)
tree	e19b767a20b29b0827dcd9f382dcb383744878ea
parent	0748b3317bb0d6db492fe5effaa2d43c7dddd582 (diff)
download	hypervideo-pre-8d0ea5f95532bfe940d6cc74520b449af3334e46.tar.lz hypervideo-pre-8d0ea5f95532bfe940d6cc74520b449af3334e46.tar.xz hypervideo-pre-8d0ea5f95532bfe940d6cc74520b449af3334e46.zip