diff options
author | James Taylor <user234683@users.noreply.github.com> | 2020-04-10 13:57:11 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2020-04-10 13:57:11 -0700 |
commit | b2f482f1fbeb9587102fdd3a97ea674235e6bbcd (patch) | |
tree | 8596edea0873a74f32ee5a49ba71b3db79155292 /youtube/yt_data_extract/watch_extraction.py | |
parent | 481b4ecf5865c233a9f37d5f9e0a8ecc332f3831 (diff) | |
download | yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.tar.lz yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.tar.xz yt-local-b2f482f1fbeb9587102fdd3a97ea674235e6bbcd.zip |
Fix comment count & disabled extraction not working sometimes
because of A/B test.
Diffstat (limited to 'youtube/yt_data_extract/watch_extraction.py')
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 8fb0af4..bcb414a 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -234,10 +234,21 @@ def _extract_watch_info_mobile(top_level): info['dislike_count'] = count # comment section info - items, _ = extract_items(response, item_types={'commentSectionRenderer'}) + items, _ = extract_items(response, item_types={ + 'commentSectionRenderer', 'commentsEntryPointHeaderRenderer'}) if items: - comment_info = items[0]['commentSectionRenderer'] - comment_count_text = extract_str(deep_get(comment_info, 'header', 'commentSectionHeaderRenderer', 'countText')) + header_type = list(items[0])[0] + comment_info = items[0][header_type] + # This seems to be some kind of A/B test being done on mobile, where + # this is present instead of the normal commentSectionRenderer. It can + # be seen here: + # https://www.androidpolice.com/2019/10/31/google-youtube-app-comment-section-below-videos/ + # https://www.youtube.com/watch?v=bR5Q-wD-6qo + if header_type == 'commentsEntryPointHeaderRenderer': + comment_count_text = extract_str(comment_info.get('headerText')) + else: + comment_count_text = extract_str(deep_get(comment_info, + 'header', 'commentSectionHeaderRenderer', 'countText')) if comment_count_text == 'Comments': # just this with no number, means 0 comments info['comment_count'] = 0 else: |