diff options
author | Jesus E <heckyel@riseup.net> | 2023-06-17 16:08:52 -0400 |
---|---|---|
committer | Jesus E <heckyel@riseup.net> | 2023-06-17 16:08:52 -0400 |
commit | e6fd9b40f4c5a789dfa542a3c441eda89695ff7a (patch) | |
tree | ece9e7b1ebf746c505e33f9f08e4a947504dbf90 /youtube/yt_data_extract | |
parent | f322035d4ac6aa17386ac9dd05f9c7a8d6720e99 (diff) | |
download | yt-local-e6fd9b40f4c5a789dfa542a3c441eda89695ff7a.tar.lz yt-local-e6fd9b40f4c5a789dfa542a3c441eda89695ff7a.tar.xz yt-local-e6fd9b40f4c5a789dfa542a3c441eda89695ff7a.zip |
Fix parsing shorts
Add check for extracting duration for shorts
Make short duration extraction stricter
Fix handling shorts with no views
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/common.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 5680b16..f04ff64 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -323,8 +323,7 @@ def extract_item_info(item, additional_info={}): # handle case where it is "No views" if not info['approx_view_count']: - if ('No views' in item.get('shortViewCountText', '') - or 'no views' in accessibility_label.lower()): + if ('No views' in extract_str(item.get('viewCountText', ''))): info['view_count'] = 0 info['approx_view_count'] = '0' @@ -365,12 +364,13 @@ def extract_item_info(item, additional_info={}): ['accessibility', 'accessibilityData', 'label'], default='') - duration = re.search(r'(\d+) (second|seconds|minute) - play video', + duration = re.search(r'(\d+) (second|seconds|minute) - play video$', accessibility_label) - if duration.group(2) == 'minute': - info['duration'] = "1:00" - else: - info['duration'] = "0:" + duration.group(1).zfill(2) + if duration: + if duration.group(2) == 'minute': + info['duration'] = '1:00' + else: + info['duration'] = '0:' + duration.group(1).zfill(2) # if it's an item in a playlist, get its index if 'index' in item: # url has wrong index on playlist page |