diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-18 20:53:11 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-18 20:53:11 -0800 |
commit | 004e14a53800a5235d850517db8a3b421e804b30 (patch) | |
tree | ef59b0e5b57ad7bbe5dbe186bcb4b2d444df3d4f /youtube/yt_data_extract.py | |
parent | f6bf5213a579b16e17e8d72b51b090ffe4bc9bdb (diff) | |
download | yt-local-004e14a53800a5235d850517db8a3b421e804b30.tar.lz yt-local-004e14a53800a5235d850517db8a3b421e804b30.tar.xz yt-local-004e14a53800a5235d850517db8a3b421e804b30.zip |
Extraction: Use accessibility data to get timestamp and to get views for recommended videos
Diffstat (limited to 'youtube/yt_data_extract.py')
-rw-r--r-- | youtube/yt_data_extract.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 6a5e4bb..ac5b78b 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -386,6 +386,16 @@ def extract_item_info(item, additional_info={}): if primary_type == 'video': info['id'] = item.get('videoId') info['view_count'] = extract_int(item.get('viewCountText')) + + # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published + accessibility_label = deep_get(item, 'title', 'accessibility', 'accessibilityData', 'label', default='') + timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label) + if timestamp: + conservative_update(info, 'time_published', timestamp.group(1)) + view_count = re.search(r'(\d+) views', accessibility_label.replace(',', '')) + if view_count: + conservative_update(info, 'view_count', int(view_count.group(1))) + if info['view_count']: info['approx_view_count'] = '{:,}'.format(info['view_count']) else: |