aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube/channel.py107
-rw-r--r--youtube/playlist.py30
-rw-r--r--youtube/templates/channel.html10
3 files changed, 100 insertions, 47 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index 4f0d768..fe72e64 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -1,5 +1,6 @@
import base64
-from youtube import util, yt_data_extract, local_playlist, subscriptions
+from youtube import (util, yt_data_extract, local_playlist, subscriptions,
+ playlist)
from youtube import yt_app
import urllib
@@ -243,7 +244,8 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
# cache entries expire after 30 minutes
-@cachetools.func.ttl_cache(maxsize=128, ttl=30*60)
+number_of_videos_cache = cachetools.TTLCache(128, 30*60)
+@cachetools.cached(number_of_videos_cache)
def get_number_of_videos_channel(channel_id):
if channel_id is None:
return 1000
@@ -268,11 +270,14 @@ def get_number_of_videos_channel(channel_id):
return int(match.group(1).replace(',',''))
else:
return 0
+def set_cached_number_of_videos(channel_id, num_videos):
+ @cachetools.cached(number_of_videos_cache)
+ def dummy_func_using_same_cache(channel_id):
+ return num_videos
+ dummy_func_using_same_cache(channel_id)
channel_id_re = re.compile(r'videos\.xml\?channel_id=([a-zA-Z0-9_-]{24})"')
-
-
@cachetools.func.lru_cache(maxsize=128)
def get_channel_id(base_url):
# method that gives the smallest possible response at ~4 kb
@@ -357,7 +362,7 @@ def post_process_channel_info(info):
info['links'][i] = (text, util.prefix_url(url))
-def get_channel_first_page(base_url=None, channel_id=None, tab='videos'):
+def get_channel_first_page(base_url=None, tab='videos', channel_id=None):
if channel_id:
base_url = 'https://www.youtube.com/channel/' + channel_id
return util.fetch_url(base_url + '/' + tab + '?pbj=1&view=0',
@@ -370,8 +375,6 @@ playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
# youtube.com/user/[username]/[tab]
# youtube.com/c/[custom]/[tab]
# youtube.com/[custom]/[tab]
-
-
def get_channel_page_general_url(base_url, tab, request, channel_id=None):
page_number = int(request.args.get('page', 1))
@@ -379,32 +382,80 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
view = request.args.get('view', '1')
query = request.args.get('query', '')
ctoken = request.args.get('ctoken', '')
- default_params = (page_number == 1 and sort == '3' and view == '1')
+ include_shorts = (sort != '2')
+ default_params = (page_number == 1 and sort in ('2', '3') and view == '1')
continuation = bool(ctoken) # whether or not we're using a continuation
+ page_size = 30
- if (tab in ('videos', 'shorts', 'streams') and channel_id and
- not default_params):
- tasks = (
- gevent.spawn(get_number_of_videos_channel, channel_id),
- gevent.spawn(get_channel_tab, channel_id, page_number, sort,
- tab, view, ctoken)
- )
- gevent.joinall(tasks)
- util.check_gevent_exceptions(*tasks)
- number_of_videos, polymer_json = tasks[0].value, tasks[1].value
- continuation = True
- elif tab in ('videos', 'shorts', 'streams'):
+ # Use the special UU playlist which contains all the channel's uploads
+ playlist_method_failed = False
+ if tab == 'videos':
+ if not channel_id:
+ channel_id = get_channel_id(base_url)
+ if page_number == 1 and include_shorts:
+ tasks = (
+ gevent.spawn(playlist.playlist_first_page,
+ 'UU' + channel_id[2:],
+ report_text='Retrieved channel videos'),
+ gevent.spawn(get_metadata, channel_id),
+ )
+ gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
+
+ # Ignore the metadata for now, it is cached and will be
+ # recalled later
+ pl_json = tasks[0].value
+ pl_info = yt_data_extract.extract_playlist_info(pl_json)
+ number_of_videos = pl_info['metadata']['video_count']
+ if number_of_videos is None:
+ number_of_videos = 1000
+ else:
+ set_cached_number_of_videos(channel_id, number_of_videos)
+ else:
+ tasks = (
+ gevent.spawn(playlist.get_videos, 'UU' + channel_id[2:],
+ page_number, include_shorts=include_shorts),
+ gevent.spawn(get_metadata, channel_id),
+ gevent.spawn(get_number_of_videos_channel, channel_id),
+ )
+ gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
+
+ pl_json = tasks[0].value
+ pl_info = yt_data_extract.extract_playlist_info(pl_json)
+ number_of_videos = tasks[2].value
+ print(number_of_videos)
+ info = pl_info
+ info['channel_id'] = channel_id
+ info['current_tab'] = 'videos'
+ if info['items']:
+ page_size = 100
+ else:
+ playlist_method_failed = True # Try the first-page method next
+
+ # Use the regular channel API
+ if tab in ('shorts','streams') or tab=='videos' and playlist_method_failed:
if channel_id:
num_videos_call = (get_number_of_videos_channel, channel_id)
else:
num_videos_call = (get_number_of_videos_general, base_url)
+
+ # Use ctoken method, which YouTube changes all the time
+ if channel_id and not default_params:
+ page_call = (get_channel_tab, channel_id, page_number, sort,
+ tab, view, ctoken)
+ # Use the first-page method, which won't break
+ else:
+ page_call = (get_channel_first_page, base_url, tab)
+
tasks = (
gevent.spawn(*num_videos_call),
- gevent.spawn(get_channel_first_page, base_url=base_url, tab=tab),
+ gevent.spawn(*page_call),
)
gevent.joinall(tasks)
util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
+
elif tab == 'about':
polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
elif tab == 'playlists' and page_number == 1:
@@ -418,12 +469,16 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
elif tab == 'search':
url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='')
polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search')
+ elif tab == 'videos':
+ pass
else:
flask.abort(404, 'Unknown channel tab: ' + tab)
+ if tab != 'videos' or playlist_method_failed:
+ info = yt_data_extract.extract_channel_info(
+ json.loads(polymer_json), tab, continuation=continuation
+ )
- info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab,
- continuation=continuation)
if channel_id:
info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id
info['channel_id'] = channel_id
@@ -431,11 +486,11 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
channel_id = info['channel_id']
# Will have microformat present, cache metadata while we have it
- if channel_id and default_params:
+ if channel_id and default_params and tab != 'videos':
metadata = extract_metadata_for_caching(info)
set_cached_metadata(channel_id, metadata)
# Otherwise, populate with our (hopefully cached) metadata
- elif channel_id and info['channel_name'] is None:
+ elif channel_id and info.get('channel_name') is None:
metadata = get_metadata(channel_id)
for key, value in metadata.items():
yt_data_extract.conservative_update(info, key, value)
@@ -453,7 +508,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
if tab in ('videos', 'shorts', 'streams'):
info['number_of_videos'] = number_of_videos
- info['number_of_pages'] = math.ceil(number_of_videos/30)
+ info['number_of_pages'] = math.ceil(number_of_videos/page_size)
info['header_playlist_names'] = local_playlist.get_playlist_names()
if tab in ('videos', 'shorts', 'streams', 'playlists'):
info['current_sort'] = sort
diff --git a/youtube/playlist.py b/youtube/playlist.py
index 7eb4d22..a2ff80f 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -47,23 +47,25 @@ def playlist_first_page(playlist_id, report_text="Retrieved playlist", use_mobil
return content
-def get_videos(playlist_id, page, use_mobile=False):
+def get_videos(playlist_id, page, include_shorts=True, use_mobile=False):
# mobile requests return 20 videos per page
if use_mobile:
- url = "https://m.youtube.com/playlist?ctoken="
- url += playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
- content = util.fetch_url(
- url, util.mobile_xhr_headers,
- report_text="Retrieved playlist", debug_name='playlist_videos'
- )
+ page_size = 20
+ headers = util.mobile_xhr_headers
# desktop requests return 100 videos per page
else:
- url = "https://www.youtube.com/playlist?ctoken="
- url += playlist_ctoken(playlist_id, (int(page)-1)*100) + "&pbj=1"
- content = util.fetch_url(
- url, util.desktop_xhr_headers,
- report_text="Retrieved playlist", debug_name='playlist_videos'
- )
+ page_size = 100
+ headers = util.desktop_xhr_headers
+
+ url = "https://m.youtube.com/playlist?ctoken="
+ url += playlist_ctoken(playlist_id, (int(page)-1)*page_size,
+ include_shorts=include_shorts)
+ url += "&pbj=1"
+ content = util.fetch_url(
+ url, headers, report_text="Retrieved playlist",
+ debug_name='playlist_videos'
+ )
+
info = json.loads(content.decode('utf-8'))
return info
@@ -117,7 +119,7 @@ def get_playlist_page():
'playlist.html',
header_playlist_names=local_playlist.get_playlist_names(),
video_list=info.get('items', []),
- num_pages = math.ceil(video_count/100),
+ num_pages=math.ceil(video_count/100),
parameters_dictionary=request.args,
**info['metadata']
diff --git a/youtube/templates/channel.html b/youtube/templates/channel.html
index b86cd54..5fff9de 100644
--- a/youtube/templates/channel.html
+++ b/youtube/templates/channel.html
@@ -74,7 +74,7 @@
<!-- new-->
<div id="links-metadata">
{% if current_tab in ('videos', 'shorts', 'streams') %}
- {% set sorts = [('1', 'views'), ('2', 'oldest'), ('3', 'newest')] %}
+ {% set sorts = [('1', 'views'), ('2', 'newest - no shorts'), ('3', 'newest')] %}
<div id="number-of-results">{{ number_of_videos }} videos</div>
{% elif current_tab == 'playlists' %}
{% set sorts = [('2', 'oldest'), ('3', 'newest'), ('4', 'last video added')] %}
@@ -110,13 +110,9 @@
<hr/>
<footer class="pagination-container">
- {% if (current_tab in ('videos', 'shorts', 'streams')) and current_sort.__str__() == '2' %}
- <nav class="next-previous-button-row">
- {{ common_elements.next_previous_ctoken_buttons(None, ctoken, channel_url + '/' + current_tab, parameters_dictionary) }}
- </nav>
- {% elif current_tab in ('videos', 'shorts', 'streams') %}
+ {% if current_tab in ('videos', 'shorts', 'streams') %}
<nav class="pagination-list">
- {{ common_elements.page_buttons(number_of_pages, channel_url + '/' + current_tab, parameters_dictionary, include_ends=(current_sort.__str__() == '3')) }}
+ {{ common_elements.page_buttons(number_of_pages, channel_url + '/' + current_tab, parameters_dictionary, include_ends=(current_sort.__str__() in '23')) }}
</nav>
{% elif current_tab == 'playlists' or current_tab == 'search' %}
<nav class="next-previous-button-row">