aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r--youtube/yt_data_extract/common.py31
-rw-r--r--youtube/yt_data_extract/everything_else.py3
2 files changed, 23 insertions, 11 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 3b2ebb5..dd02f2e 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -392,6 +392,13 @@ nested_renderer_list_dispatch = {
'playlistVideoListRenderer': _traverse_standard_list,
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
}
+def get_nested_renderer_list_function(key):
+ if key in nested_renderer_list_dispatch:
+ return nested_renderer_list_dispatch[key]
+ elif key.endswith('Continuation'):
+ return _traverse_standard_list
+ return None
+
def extract_items_from_renderer(renderer, item_types=_item_types):
ctoken = None
items = []
@@ -423,13 +430,13 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
items.append(renderer)
# has a list in it, add it to the iter stack
- elif key in nested_renderer_list_dispatch:
- renderer_list, continuation = nested_renderer_list_dispatch[key](value)
+ elif get_nested_renderer_list_function(key):
+ renderer_list, cont = get_nested_renderer_list_function(key)(value)
if renderer_list:
iter_stack.append(current_iter)
current_iter = iter(renderer_list)
- if continuation:
- ctoken = continuation
+ if cont:
+ ctoken = cont
# new renderer nested inside this one
elif key in nested_renderer_dispatch:
@@ -441,12 +448,16 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
def extract_items(response, item_types=_item_types):
'''return items, ctoken'''
if 'continuationContents' in response:
- # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something
- for key, renderer_continuation in get(response, 'continuationContents', {}).items():
- if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation
- items = multi_get(renderer_continuation, 'contents', 'items', default=[])
- ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation')
- return items, ctoken
+ # sometimes there's another, empty, junk [something]Continuation key
+ # find real one
+ for key, renderer_cont in get(response,
+ 'continuationContents', {}).items():
+ # e.g. commentSectionContinuation, playlistVideoListContinuation
+ if key.endswith('Continuation'):
+ items, cont = extract_items_from_renderer({key: renderer_cont},
+ item_types=item_types)
+ if items:
+ return items, cont
return [], None
elif 'contents' in response:
renderer = get(response, 'contents', {})
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index d1389c6..20e0f30 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -227,7 +227,8 @@ def extract_comments_info(polymer_json):
info['sort'] = metadata.get('sort')
info['video_title'] = None
- comments, ctoken = extract_items(response)
+ comments, ctoken = extract_items(response,
+ item_types={'commentThreadRenderer', 'commentRenderer'})
info['comments'] = []
info['ctoken'] = ctoken
for comment in comments: