Update channel to new ctoken format

Huge thanks to @michaelweiser Different sortings still don't work for videos and playlists
author: Jesus E <heckyel@riseup.net> 2023-05-28 21:04:36 -0400
committer: Jesus E <heckyel@riseup.net> 2023-05-28 21:04:36 -0400
commit: 68752000f0213f801a7c5e8127b77afeba644f7d (patch)
tree: b86637952fdd892bd41bc2f8de922116b6996c6d /youtube
parent: 7b60751e997137f1ce53b94b1a89e9d3dc03c729 (diff)
download: yt-local-68752000f0213f801a7c5e8127b77afeba644f7d.tar.lz
yt-local-68752000f0213f801a7c5e8127b77afeba644f7d.tar.xz
yt-local-68752000f0213f801a7c5e8127b77afeba644f7d.zip
4 files changed, 70 insertions, 17 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index 4393076..a84cf6a 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -31,6 +31,47 @@ headers_mobile = (
 real_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=8XihrAcN1l4'),)
 generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
 
+# https://github.com/user234683/youtube-local/issues/151
+def channel_ctoken_v4(channel_id, page, sort, tab, view=1):
+    new_sort = (2 if sort == 1 else 1)
+    offset = str(30*(int(page) - 1))
+    pointless_nest = proto.string(80226972,
+        proto.string(2, channel_id)
+        + proto.string(3,
+            proto.percent_b64encode(
+                proto.string(110,
+                    proto.string(3,
+                        proto.string(15,
+                            proto.string(1,
+                                proto.string(1,
+                                    proto.unpadded_b64encode(
+                                        proto.string(1,
+                                            proto.unpadded_b64encode(
+                                                proto.string(2,
+                                                    b"ST:"
+                                                    + proto.unpadded_b64encode(
+                                                        proto.string(2, offset)
+                                                    )
+                                                )
+                                            )
+                                        )
+                                    )
+                                )
+                                 # targetId, just needs to be present but
+                                 # doesn't need to be correct
+                                + proto.string(2, "63faaff0-0000-23fe-80f0-582429d11c38")
+                            )
+                            # 1 - newest, 2 - popular
+                            + proto.uint(3, new_sort)
+                        )
+                    )
+                )
+            )
+        )
+    )
+
+    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
 # SORT:
 # videos:
 #    Popular - 1
@@ -75,11 +116,11 @@ def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
         2: 17254859483345278706,
         1: 16570086088270825023,
     }[int(sort)]
-    page_token = proto.string(61, proto.unpadded_b64encode(
-        proto.string(1, proto.uint(1, schema_number) + proto.string(
-            2,
-            proto.string(1, proto.unpadded_b64encode(proto.uint(1, offset)))
-        ))))
+    page_token = proto.string(61, proto.unpadded_b64encode(proto.string(1,
+            proto.uint(1, schema_number) + proto.string(2,
+                proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
+            )
+    )))
 
     tab = proto.string(2, tab)
     sort = proto.uint(3, int(sort))
@@ -118,8 +159,9 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
     message = 'Got channel tab' if print_status else None
 
     if not ctoken:
-        ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
+        ctoken = channel_ctoken_v4(channel_id, page, sort, tab, view)
         ctoken = ctoken.replace('=', '%3D')
+
     # Not sure what the purpose of the key is or whether it will change
     # For now it seems to be constant for the API endpoint, not dependent
     # on the browsing session or channel
@@ -182,8 +224,7 @@ def get_channel_id(base_url):
     # method that gives the smallest possible response at ~4 kb
     # needs to be as fast as possible
     base_url = base_url.replace('https://www', 'https://m') # avoid redirect
-    response = util.fetch_url(
-        base_url + '/about?pbj=1', headers_mobile,
+    response = util.fetch_url(base_url + '/about?pbj=1', headers_mobile,
         debug_name='get_channel_id', report_text='Got channel id').decode('utf-8')
     match = channel_id_re.search(response)
     if match:
@@ -260,6 +301,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
     query = request.args.get('query', '')
     ctoken = request.args.get('ctoken', '')
     default_params = (page_number == 1 and sort == '3' and view == '1')
+    continuation = bool(ctoken) # whether or not we're using a continuation
 
     if tab == 'videos' and channel_id and not default_params:
         tasks = (
@@ -270,6 +312,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
         gevent.joinall(tasks)
         util.check_gevent_exceptions(*tasks)
         number_of_videos, polymer_json = tasks[0].value, tasks[1].value
+        continuation = True
     elif tab == 'videos':
         if channel_id:
             num_videos_call = (get_number_of_videos_channel, channel_id)
@@ -289,6 +332,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
     elif tab == 'playlists':
         polymer_json = get_channel_tab(channel_id, page_number, sort,
                                        'playlists', view)
+        continuation = True
     elif tab == 'search' and channel_id:
         polymer_json = get_channel_search_json(channel_id, query, page_number)
     elif tab == 'search':
@@ -297,7 +341,9 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
     else:
         flask.abort(404, 'Unknown channel tab: ' + tab)
 
-    info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab)
+
+    info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab,
+                                                continuation=continuation)
     if info['error'] is not None:
         return flask.render_template('error.html', error_message=info['error'])
 
@@ -308,6 +354,8 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
         info['header_playlist_names'] = local_playlist.get_playlist_names()
     if tab in ('videos', 'playlists'):
         info['current_sort'] = sort
+        info['channel_url'] = 'https://www.youtube.com/channel/' + channel_id
+        info['channel_id'] = channel_id
     elif tab == 'search':
         info['search_box_value'] = query
         info['header_playlist_names'] = local_playlist.get_playlist_names()
diff --git a/youtube/templates/channel.html b/youtube/templates/channel.html
index 5a567c0..6266aab 100644
--- a/youtube/templates/channel.html
+++ b/youtube/templates/channel.html
@@ -1,7 +1,7 @@
 {% if current_tab == 'search' %}
     {% set page_title = search_box_value + ' - Page ' + page_number|string %}
 {% else %}
-    {% set page_title = channel_name + ' - Channel' %}
+    {% set page_title = channel_name|string + ' - Channel' %}
 {% endif %}
 
 {% extends "base.html" %}
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index f97597c..2e59109 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -542,8 +542,12 @@ def extract_items(response, item_types=_item_types,
                     item_types=item_types)
                 if items:
                     break
-    elif 'onResponseReceivedEndpoints' in response:
-        for endpoint in response.get('onResponseReceivedEndpoints', []):
+    elif ('onResponseReceivedEndpoints' in response
+          or 'onResponseReceivedActions' in response):
+        for endpoint in multi_get(response,
+                                  'onResponseReceivedEndpoints',
+                                  'onResponseReceivedActions',
+                                  []):
             items, ctoken = extract_items_from_renderer_list(
                 multi_deep_get(
                     endpoint,
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index 56a2e68..9a6e31a 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -9,7 +9,7 @@ import re
 import urllib
 from math import ceil
 
-def extract_channel_info(polymer_json, tab):
+def extract_channel_info(polymer_json, tab, continuation=False):
     response, err = extract_response(polymer_json)
     if err:
         return {'error': err}
@@ -23,7 +23,8 @@ def extract_channel_info(polymer_json, tab):
 
     # channel doesn't exist or was terminated
     # example terminated channel: https://www.youtube.com/channel/UCnKJeK_r90jDdIuzHXC0Org
-    if not metadata:
+    # metadata and microformat are not present for continuation requests
+    if not metadata and not continuation:
         if response.get('alerts'):
             error_string = ' '.join(
                 extract_str(deep_get(alert, 'alertRenderer', 'text'), default='')
@@ -44,7 +45,7 @@ def extract_channel_info(polymer_json, tab):
     info['approx_subscriber_count'] = extract_approx_int(deep_get(response,
         'header', 'c4TabbedHeaderRenderer', 'subscriberCountText'))
 
-    # stuff from microformat (info given by youtube for every page on channel)
+    # stuff from microformat (info given by youtube for first page on channel)
     info['short_description'] = metadata.get('description')
     if info['short_description'] and len(info['short_description']) > 730:
         info['short_description'] = info['short_description'][0:730] + '...'
@@ -69,8 +70,8 @@ def extract_channel_info(polymer_json, tab):
     info['ctoken'] = None
 
     # empty channel
-    if 'contents' not in response and 'continuationContents' not in response:
-        return info
+    #if 'contents' not in response and 'continuationContents' not in response:
+    #    return info
 
     if tab in ('videos', 'playlists', 'search'):
         items, ctoken = extract_items(response)
author	Jesus E <heckyel@riseup.net>	2023-05-28 21:04:36 -0400
committer	Jesus E <heckyel@riseup.net>	2023-05-28 21:04:36 -0400
commit	68752000f0213f801a7c5e8127b77afeba644f7d (patch)
tree	b86637952fdd892bd41bc2f8de922116b6996c6d /youtube
parent	7b60751e997137f1ce53b94b1a89e9d3dc03c729 (diff)
download	yt-local-68752000f0213f801a7c5e8127b77afeba644f7d.tar.lz yt-local-68752000f0213f801a7c5e8127b77afeba644f7d.tar.xz yt-local-68752000f0213f801a7c5e8127b77afeba644f7d.zip