From e6fd9b40f4c5a789dfa542a3c441eda89695ff7a Mon Sep 17 00:00:00 2001 From: Jesus E Date: Sat, 17 Jun 2023 16:08:52 -0400 Subject: Fix parsing shorts Add check for extracting duration for shorts Make short duration extraction stricter Fix handling shorts with no views --- youtube/yt_data_extract/common.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'youtube/yt_data_extract') diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 5680b16..f04ff64 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -323,8 +323,7 @@ def extract_item_info(item, additional_info={}): # handle case where it is "No views" if not info['approx_view_count']: - if ('No views' in item.get('shortViewCountText', '') - or 'no views' in accessibility_label.lower()): + if ('No views' in extract_str(item.get('viewCountText', ''))): info['view_count'] = 0 info['approx_view_count'] = '0' @@ -365,12 +364,13 @@ def extract_item_info(item, additional_info={}): ['accessibility', 'accessibilityData', 'label'], default='') - duration = re.search(r'(\d+) (second|seconds|minute) - play video', + duration = re.search(r'(\d+) (second|seconds|minute) - play video$', accessibility_label) - if duration.group(2) == 'minute': - info['duration'] = "1:00" - else: - info['duration'] = "0:" + duration.group(1).zfill(2) + if duration: + if duration.group(2) == 'minute': + info['duration'] = '1:00' + else: + info['duration'] = '0:' + duration.group(1).zfill(2) # if it's an item in a playlist, get its index if 'index' in item: # url has wrong index on playlist page -- cgit v1.2.3