aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract/common.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-12-24 13:11:21 -0800
committerJames Taylor <user234683@users.noreply.github.com>2019-12-24 13:11:21 -0800
commitf706689a56fb7d828faab4e54e0c5aa43486a619 (patch)
treefff49c1f903f2a6ccd96a3802a939f8927a3cd3c /youtube/yt_data_extract/common.py
parent3200d66d880d72ba2c4e687840d31c9c98c66f6a (diff)
downloadyt-local-f706689a56fb7d828faab4e54e0c5aa43486a619.tar.lz
yt-local-f706689a56fb7d828faab4e54e0c5aa43486a619.tar.xz
yt-local-f706689a56fb7d828faab4e54e0c5aa43486a619.zip
extract_item_info: Don't extract author, author_id, etc. for channel items
Philosophically, a channel doesn't create itself.
Diffstat (limited to 'youtube/yt_data_extract/common.py')
-rw-r--r--youtube/yt_data_extract/common.py15
1 files changed, 8 insertions, 7 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 4af76c2..5572281 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -218,13 +218,14 @@ def extract_item_info(item, additional_info={}):
info['type'] = 'unsupported'
info['title'] = extract_str(item.get('title'))
- info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
- info['author_id'] = extract_str(multi_deep_get(item,
- ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
- ['shortBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
- ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId']
- ))
- info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None
+ if primary_type != 'channel':
+ info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
+ info['author_id'] = extract_str(multi_deep_get(item,
+ ['longBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
+ ['shortBylineText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId'],
+ ['ownerText', 'runs', 0, 'navigationEndpoint', 'browseEndpoint', 'browseId']
+ ))
+ info['author_url'] = ('https://www.youtube.com/channel/' + info['author_id']) if info['author_id'] else None
info['description'] = extract_formatted_text(multi_get(item, 'descriptionSnippet', 'descriptionText'))
info['thumbnail'] = multi_deep_get(item,
['thumbnail', 'thumbnails', 0, 'url'], # videos