Finally fix video count on channels accessed through general urls, rather than just channel id.

It was set to a fake value of 1000 previously in order to ensure there would be enough page buttons. This was because two sequential requests are necessary (one to get the channel id corresponding to the custom url, another to get the number of videos from the "all uploaded videos" playlist, the url for which can be generated from the channel id). Since Tor has a high latency, I thought at the time that this would be too slow, but in practice it's not too big of a deal. Introduces cachetools dependency in order to cache the function which gets the number of videos. The get_channel_id function has also been fixed since the ajax api seems to have been removed.
author: James Taylor <user234683@users.noreply.github.com> 2019-12-22 18:29:31 -0800
committer: James Taylor <user234683@users.noreply.github.com> 2019-12-22 18:29:31 -0800
commit: 222117143f7d528efe0362e1b2a68b564393d4c5 (patch)
tree: 1e2e15d83e9230a20071eb72379110ce91a8fcc5 /youtube/channel.py
parent: bafae2837ea2207269d9eea2ea982f4030b37c7f (diff)
download: yt-local-222117143f7d528efe0362e1b2a68b564393d4c5.tar.lz
yt-local-222117143f7d528efe0362e1b2a68b564393d4c5.tar.xz
yt-local-222117143f7d528efe0362e1b2a68b564393d4c5.zip
1 files changed, 34 insertions, 19 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index c6018bc..891aca7 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -10,7 +10,8 @@ import html
 import math
 import gevent
 import re
-import functools
+import cachetools.func
+import traceback
 
 import flask
 from flask import request
@@ -95,23 +96,25 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_st
 
     return content
 
-def get_number_of_videos(channel_id):
+# cache entries expire after 30 minutes
+@cachetools.func.ttl_cache(maxsize=128, ttl=30*60)
+def get_number_of_videos_channel(channel_id):
+    if channel_id is None:
+        return 1000
+
     # Uploads playlist
     playlist_id = 'UU' + channel_id[2:]
     url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
-    print("Getting number of videos")
 
-    # Sometimes retrieving playlist info fails with 403 for no discernable reason
     try:
-        response = util.fetch_url(url, util.mobile_ua + headers_pbj, debug_name='number_of_videos')
+        response = util.fetch_url(url, util.mobile_ua + headers_pbj, 
+            debug_name='number_of_videos', report_text='Got number of videos')
     except urllib.error.HTTPError as e:
-        if e.code != 403:
-            raise
+        traceback.print_exc()
         print("Couldn't retrieve number of videos")
         return 1000
 
     response = response.decode('utf-8')
-    print("Got response for number of videos")
 
     match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response)
     if match:
@@ -119,13 +122,21 @@ def get_number_of_videos(channel_id):
     else:
         return 0
 
-@functools.lru_cache(maxsize=128)
-def get_channel_id(username):
-    # method that gives the smallest possible response at ~10 kb
+channel_id_re = re.compile(r'videos\.xml\?channel_id=([a-zA-Z0-9_-]{24})"')
+@cachetools.func.lru_cache(maxsize=128)
+def get_channel_id(base_url):
+    # method that gives the smallest possible response at ~4 kb
     # needs to be as fast as possible
-    url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
-    response = util.fetch_url(url, util.mobile_ua + headers_1).decode('utf-8')
-    return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
+    base_url = base_url.replace('https://www', 'https://m') # avoid redirect
+    response = util.fetch_url(base_url + '/about?pbj=1', util.mobile_ua + headers_pbj,
+        debug_name='get_channel_id', report_text='Got channel id').decode('utf-8')
+    match = channel_id_re.search(response)
+    if match:
+        return match.group(1)
+    return None
+
+def get_number_of_videos_general(base_url):
+    return get_number_of_videos_channel(get_channel_id(base_url))
 
 def get_channel_search_json(channel_id, query, page):
     params = proto.string(2, 'search') + proto.string(15, str(page))
@@ -164,21 +175,25 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
 
     if tab == 'videos' and channel_id:
         tasks = (
-            gevent.spawn(get_number_of_videos, channel_id ), 
+            gevent.spawn(get_number_of_videos_channel, channel_id), 
             gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
         )
         gevent.joinall(tasks)
         number_of_videos, polymer_json = tasks[0].value, tasks[1].value
     elif tab == 'videos':
-        polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
-        number_of_videos = 1000
+        tasks = (
+            gevent.spawn(get_number_of_videos_general, base_url), 
+            gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
+        )
+        gevent.joinall(tasks)
+        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
     elif tab == 'about':
         polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
     elif tab == 'playlists':
         polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='gen_channel_playlists')
     elif tab == 'search' and channel_id:
         tasks = (
-            gevent.spawn(get_number_of_videos, channel_id ), 
+            gevent.spawn(get_number_of_videos_channel, channel_id ), 
             gevent.spawn(get_channel_search_json, channel_id, query, page_number)
         )
         gevent.joinall(tasks)
@@ -199,11 +214,11 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
     if tab in ('videos', 'search'):
         info['number_of_videos'] = number_of_videos
         info['number_of_pages'] = math.ceil(number_of_videos/30)
-        info['header_playlist_names'] = local_playlist.get_playlist_names()
     if tab in ('videos', 'playlists'):
         info['current_sort'] = sort
     elif tab == 'search':
         info['search_box_value'] = query
+        info['header_playlist_names'] = local_playlist.get_playlist_names()
     info['subscribed'] = subscriptions.is_subscribed(info['channel_id'])
 
     return flask.render_template('channel.html',
author	James Taylor <user234683@users.noreply.github.com>	2019-12-22 18:29:31 -0800
committer	James Taylor <user234683@users.noreply.github.com>	2019-12-22 18:29:31 -0800
commit	222117143f7d528efe0362e1b2a68b564393d4c5 (patch)
tree	1e2e15d83e9230a20071eb72379110ce91a8fcc5 /youtube/channel.py
parent	bafae2837ea2207269d9eea2ea982f4030b37c7f (diff)
download	yt-local-222117143f7d528efe0362e1b2a68b564393d4c5.tar.lz yt-local-222117143f7d528efe0362e1b2a68b564393d4c5.tar.xz yt-local-222117143f7d528efe0362e1b2a68b564393d4c5.zip