From f706689a56fb7d828faab4e54e0c5aa43486a619 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Tue, 24 Dec 2019 13:11:21 -0800 Subject: extract_item_info: Don't extract author, author_id, etc. for channel items Philosophically, a channel doesn't create itself. --- youtube/yt_data_extract/common.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'youtube/yt_data_extract') diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 4af76c2..5572281 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -218,13 +218,14 @@ def extract_item_info(item, additional_info={}): info['type'] = 'unsupported' info['title'] = extract_str(item.get('title')) - info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText')) - info['author_id'] = extract_str(multi_deep_get(item, - ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'], - ['shortBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'], - ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'] - )) - info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None + if primary_type != 'channel': + info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText')) + info['author_id'] = extract_str(multi_deep_get(item, + ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'], + ['shortBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'], + ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'] + )) + info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None info['description'] = extract_formatted_text(multi_get(item, 'descriptionSnippet', 'descriptionText')) info['thumbnail'] = multi_deep_get(item, ['thumbnail', 'thumbnails', 0, 'url'], # videos -- cgit v1.2.3