aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube/channel.py39
-rw-r--r--youtube/templates/channel.html8
-rw-r--r--youtube/yt_data_extract/common.py47
-rw-r--r--youtube/yt_data_extract/everything_else.py2
4 files changed, 75 insertions, 21 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index 4cf6cdf..5c757d3 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -32,16 +32,23 @@ real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
# added an extra nesting under the 2nd base64 compared to v4
+# added tab support
def channel_ctoken_v5(channel_id, page, sort, tab, view=1):
new_sort = (2 if int(sort) == 1 else 1)
offset = str(30*(int(page) - 1))
+ if tab == 'videos':
+ tab = 15
+ elif tab == 'shorts':
+ tab = 10
+ elif tab == 'streams':
+ tab = 14
pointless_nest = proto.string(80226972,
proto.string(2, channel_id)
+ proto.string(3,
proto.percent_b64encode(
proto.string(110,
proto.string(3,
- proto.string(15,
+ proto.string(tab,
proto.string(1,
proto.string(1,
proto.unpadded_b64encode(
@@ -167,7 +174,7 @@ def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
tab = proto.string(2, tab)
sort = proto.uint(3, int(sort))
- # page = proto.string(15, str(page) )
+ #page = proto.string(15, str(page))
shelf_view = proto.uint(4, 0)
view = proto.uint(6, int(view))
@@ -202,7 +209,7 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
message = 'Got channel tab' if print_status else None
if not ctoken:
- if tab == 'videos':
+ if tab in ('videos', 'shorts', 'streams'):
ctoken = channel_ctoken_v5(channel_id, page, sort, tab, view)
else:
ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
@@ -349,11 +356,11 @@ def post_process_channel_info(info):
info['links'][i] = (text, util.prefix_url(url))
-def get_channel_first_page(base_url=None, channel_id=None):
+def get_channel_first_page(base_url=None, channel_id=None, tab='videos'):
if channel_id:
base_url = 'https://www.youtube.com/channel/' + channel_id
- return util.fetch_url(base_url + '/videos?pbj=1&view=0', headers_desktop,
- debug_name='gen_channel_videos')
+ return util.fetch_url(base_url + '/' + tab + '?pbj=1&view=0',
+ headers_desktop, debug_name='gen_channel_' + tab)
playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
@@ -374,24 +381,25 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
default_params = (page_number == 1 and sort == '3' and view == '1')
continuation = bool(ctoken) # whether or not we're using a continuation
- if tab == 'videos' and channel_id and not default_params:
+ if (tab in ('videos', 'shorts', 'streams') and channel_id and
+ not default_params):
tasks = (
gevent.spawn(get_number_of_videos_channel, channel_id),
gevent.spawn(get_channel_tab, channel_id, page_number, sort,
- 'videos', view, ctoken)
+ tab, view, ctoken)
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
continuation = True
- elif tab == 'videos':
+ elif tab in ('videos', 'shorts', 'streams'):
if channel_id:
num_videos_call = (get_number_of_videos_channel, channel_id)
else:
num_videos_call = (get_number_of_videos_general, base_url)
tasks = (
gevent.spawn(*num_videos_call),
- gevent.spawn(get_channel_first_page, base_url=base_url),
+ gevent.spawn(get_channel_first_page, base_url=base_url, tab=tab),
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
@@ -440,13 +448,13 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
item.update(additional_info)
if info['error'] is not None:
- return flask.render_template('error.html', error_message=info['error'])
+ return flask.render_template('error.html', error_message = info['error'])
- if tab == 'videos':
+ if tab in ('videos', 'shorts', 'streams'):
info['number_of_videos'] = number_of_videos
info['number_of_pages'] = math.ceil(number_of_videos/30)
info['header_playlist_names'] = local_playlist.get_playlist_names()
- if tab in ('videos', 'playlists'):
+ if tab in ('videos', 'shorts', 'streams', 'playlists'):
info['current_sort'] = sort
elif tab == 'search':
info['search_box_value'] = query
@@ -457,9 +465,8 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
post_process_channel_info(info)
- return flask.render_template(
- 'channel.html',
- parameters_dictionary=request.args,
+ return flask.render_template('channel.html',
+ parameters_dictionary = request.args,
**info
)
diff --git a/youtube/templates/channel.html b/youtube/templates/channel.html
index 6266aab..b86cd54 100644
--- a/youtube/templates/channel.html
+++ b/youtube/templates/channel.html
@@ -33,7 +33,7 @@
<hr/>
<nav class="channel-tabs">
- {% for tab_name in ('Videos', 'Playlists', 'About') %}
+ {% for tab_name in ('Videos', 'Shorts', 'Streams', 'Playlists', 'About') %}
{% if tab_name.lower() == current_tab %}
<a class="tab page-button">{{ tab_name }}</a>
{% else %}
@@ -73,7 +73,7 @@
<!-- new-->
<div id="links-metadata">
- {% if current_tab == 'videos' %}
+ {% if current_tab in ('videos', 'shorts', 'streams') %}
{% set sorts = [('1', 'views'), ('2', 'oldest'), ('3', 'newest')] %}
<div id="number-of-results">{{ number_of_videos }} videos</div>
{% elif current_tab == 'playlists' %}
@@ -110,11 +110,11 @@
<hr/>
<footer class="pagination-container">
- {% if current_tab == 'videos' and current_sort.__str__() == '2' %}
+ {% if (current_tab in ('videos', 'shorts', 'streams')) and current_sort.__str__() == '2' %}
<nav class="next-previous-button-row">
{{ common_elements.next_previous_ctoken_buttons(None, ctoken, channel_url + '/' + current_tab, parameters_dictionary) }}
</nav>
- {% elif current_tab == 'videos' %}
+ {% elif current_tab in ('videos', 'shorts', 'streams') %}
<nav class="pagination-list">
{{ common_elements.page_buttons(number_of_pages, channel_url + '/' + current_tab, parameters_dictionary, include_ends=(current_sort.__str__() == '3')) }}
</nav>
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index fcefbf7..5680b16 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -249,6 +249,9 @@ def extract_item_info(item, additional_info={}):
primary_type = type_parts[-2]
if primary_type == 'video':
info['type'] = 'video'
+ elif type_parts[0] == 'reel': # shorts
+ info['type'] = 'video'
+ primary_type = 'short'
elif primary_type in ('playlist', 'radio', 'show'):
info['type'] = 'playlist'
info['playlist_type'] = primary_type
@@ -343,6 +346,48 @@ def extract_item_info(item, additional_info={}):
else:
info['index'] = None
+ elif primary_type == 'short':
+ info['id'] = item.get('videoId')
+ if not info['id']:
+ info['id'] = deep_get(item,'navigationEndpoint',
+ 'reelWatchEndpoint', 'videoId')
+ info['approx_view_count'] = extract_approx_int(item.get('viewCountText'))
+
+ # handle case where it is "No views"
+ if not info['approx_view_count']:
+ if ('No views' in item.get('shortViewCountText', '')
+ or 'no views' in accessibility_label.lower()):
+ info['view_count'] = 0
+ info['approx_view_count'] = '0'
+
+ # dig into accessibility data to get duration for shorts
+ accessibility_label = multi_deep_get(item,
+ ['accessibility', 'accessibilityData', 'label'],
+ default='')
+
+ duration = re.search(r'(\d+) (second|seconds|minute) - play video',
+ accessibility_label)
+ if duration.group(2) == 'minute':
+ info['duration'] = "1:00"
+ else:
+ info['duration'] = "0:" + duration.group(1).zfill(2)
+
+ # if it's an item in a playlist, get its index
+ if 'index' in item: # url has wrong index on playlist page
+ info['index'] = extract_int(item.get('index'))
+ elif 'indexText' in item:
+ # Current item in playlist has ▶ instead of the actual index, must
+ # dig into url
+ match = re.search(r'index=(\d+)', deep_get(item,
+ 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata',
+ 'url', default=''))
+ if match is None: # worth a try then
+ info['index'] = extract_int(item.get('indexText'))
+ else:
+ info['index'] = int(match.group(1))
+ else:
+ info['index'] = None
+
elif primary_type in ('playlist', 'radio'):
info['id'] = item.get('playlistId')
info['video_count'] = extract_int(item.get('videoCount'))
@@ -398,6 +443,8 @@ _item_types = {
'gridVideoRenderer',
'playlistVideoRenderer',
+ 'reelItemRenderer',
+
'playlistRenderer',
'compactPlaylistRenderer',
'gridPlaylistRenderer',
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index 9a6e31a..745d08f 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -73,7 +73,7 @@ def extract_channel_info(polymer_json, tab, continuation=False):
#if 'contents' not in response and 'continuationContents' not in response:
# return info
- if tab in ('videos', 'playlists', 'search'):
+ if tab in ('videos', 'shorts', 'streams', 'playlists', 'search'):
items, ctoken = extract_items(response)
additional_info = {
'author': info['channel_name'],