diff options
author | Jesus E <heckyel@riseup.net> | 2023-06-17 16:05:40 -0400 |
---|---|---|
committer | Jesus E <heckyel@riseup.net> | 2023-06-17 16:05:40 -0400 |
commit | f322035d4ac6aa17386ac9dd05f9c7a8d6720e99 (patch) | |
tree | 865c9ad567525b1d0d5d912a7e2e92b33ee90378 /youtube/yt_data_extract | |
parent | 74907a81835435f881424b41729cc71cb9d50056 (diff) | |
download | yt-local-f322035d4ac6aa17386ac9dd05f9c7a8d6720e99.tar.lz yt-local-f322035d4ac6aa17386ac9dd05f9c7a8d6720e99.tar.xz yt-local-f322035d4ac6aa17386ac9dd05f9c7a8d6720e99.zip |
Add functional but preliminary channel tab support
Add channel tabs to the channel template and script
Update continuation token to request different tabs
Add support for 'reelItemRenderer' format required to extract shorts
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/common.py | 47 | ||||
-rw-r--r-- | youtube/yt_data_extract/everything_else.py | 2 |
2 files changed, 48 insertions, 1 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index fcefbf7..5680b16 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -249,6 +249,9 @@ def extract_item_info(item, additional_info={}): primary_type = type_parts[-2] if primary_type == 'video': info['type'] = 'video' + elif type_parts[0] == 'reel': # shorts + info['type'] = 'video' + primary_type = 'short' elif primary_type in ('playlist', 'radio', 'show'): info['type'] = 'playlist' info['playlist_type'] = primary_type @@ -343,6 +346,48 @@ def extract_item_info(item, additional_info={}): else: info['index'] = None + elif primary_type == 'short': + info['id'] = item.get('videoId') + if not info['id']: + info['id'] = deep_get(item,'navigationEndpoint', + 'reelWatchEndpoint', 'videoId') + info['approx_view_count'] = extract_approx_int(item.get('viewCountText')) + + # handle case where it is "No views" + if not info['approx_view_count']: + if ('No views' in item.get('shortViewCountText', '') + or 'no views' in accessibility_label.lower()): + info['view_count'] = 0 + info['approx_view_count'] = '0' + + # dig into accessibility data to get duration for shorts + accessibility_label = multi_deep_get(item, + ['accessibility', 'accessibilityData', 'label'], + default='') + + duration = re.search(r'(\d+) (second|seconds|minute) - play video', + accessibility_label) + if duration.group(2) == 'minute': + info['duration'] = "1:00" + else: + info['duration'] = "0:" + duration.group(1).zfill(2) + + # if it's an item in a playlist, get its index + if 'index' in item: # url has wrong index on playlist page + info['index'] = extract_int(item.get('index')) + elif 'indexText' in item: + # Current item in playlist has ▶ instead of the actual index, must + # dig into url + match = re.search(r'index=(\d+)', deep_get(item, + 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', + 'url', default='')) + if match is None: # worth a try then + info['index'] = extract_int(item.get('indexText')) + else: + info['index'] = int(match.group(1)) + else: + info['index'] = None + elif primary_type in ('playlist', 'radio'): info['id'] = item.get('playlistId') info['video_count'] = extract_int(item.get('videoCount')) @@ -398,6 +443,8 @@ _item_types = { 'gridVideoRenderer', 'playlistVideoRenderer', + 'reelItemRenderer', + 'playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer', diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py index 9a6e31a..745d08f 100644 --- a/youtube/yt_data_extract/everything_else.py +++ b/youtube/yt_data_extract/everything_else.py @@ -73,7 +73,7 @@ def extract_channel_info(polymer_json, tab, continuation=False): #if 'contents' not in response and 'continuationContents' not in response: # return info - if tab in ('videos', 'playlists', 'search'): + if tab in ('videos', 'shorts', 'streams', 'playlists', 'search'): items, ctoken = extract_items(response) additional_info = { 'author': info['channel_name'], |