Fix comment count & disabled extraction not working sometimes

because of A/B test.
author: James Taylor <user234683@users.noreply.github.com> 2020-04-10 13:57:11 -0700
committer: James Taylor <user234683@users.noreply.github.com> 2020-04-10 13:57:11 -0700
commit: b2f482f1fbeb9587102fdd3a97ea674235e6bbcd (patch)
tree: 8596edea0873a74f32ee5a49ba71b3db79155292 /youtube/yt_data_extract
parent: 481b4ecf5865c233a9f37d5f9e0a8ecc332f3831 (diff)
download: yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.tar.lz
yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.tar.xz
yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.zip
1 files changed, 14 insertions, 3 deletions
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 8fb0af4..bcb414a 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -234,10 +234,21 @@ def _extract_watch_info_mobile(top_level):
             info['dislike_count'] = count
 
     # comment section info
-    items, _ = extract_items(response, item_types={'commentSectionRenderer'})
+    items, _ = extract_items(response, item_types={
+        'commentSectionRenderer', 'commentsEntryPointHeaderRenderer'})
     if items:
-        comment_info = items[0]['commentSectionRenderer']
-        comment_count_text = extract_str(deep_get(comment_info, 'header', 'commentSectionHeaderRenderer', 'countText'))
+        header_type = list(items[0])[0]
+        comment_info = items[0][header_type]
+        # This seems to be some kind of A/B test being done on mobile, where
+        # this is present instead of the normal commentSectionRenderer. It can
+        # be seen here:
+        # https://www.androidpolice.com/2019/10/31/google-youtube-app-comment-section-below-videos/
+        # https://www.youtube.com/watch?v=bR5Q-wD-6qo
+        if header_type == 'commentsEntryPointHeaderRenderer':
+            comment_count_text = extract_str(comment_info.get('headerText'))
+        else:
+            comment_count_text = extract_str(deep_get(comment_info,
+                'header', 'commentSectionHeaderRenderer', 'countText'))
         if comment_count_text == 'Comments':    # just this with no number, means 0 comments
             info['comment_count'] = 0
         else:
author	James Taylor <user234683@users.noreply.github.com>	2020-04-10 13:57:11 -0700
committer	James Taylor <user234683@users.noreply.github.com>	2020-04-10 13:57:11 -0700
commit	b2f482f1fbeb9587102fdd3a97ea674235e6bbcd (patch)
tree	8596edea0873a74f32ee5a49ba71b3db79155292 /youtube/yt_data_extract
parent	481b4ecf5865c233a9f37d5f9e0a8ecc332f3831 (diff)
download	yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.tar.lz yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.tar.xz yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.zip