ylist: show 100 videos per page instead of 20

Also add an option to the internal playlist ctoken function for filtering out shorts, to be used in future anti-shorts features
author: Astound <kirito@disroot.org> 2024-01-22 05:21:12 +0800
committer: Astound <kirito@disroot.org> 2024-01-22 05:21:12 +0800
commit: d591956baafa6a16b956f814b69ecbf1f5e23aba (patch)
tree: 200366bc4e2d8a5a55b21914c776cef1fe7670dd
parent: 6011a08cdff6a42b7f0f5ce46a7631b0831fa098 (diff)
download: yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.tar.lz
yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.tar.xz
yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.zip
2 files changed, 54 insertions, 44 deletions
diff --git a/youtube/playlist.py b/youtube/playlist.py
index f4096dc..7eb4d22 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -12,12 +12,13 @@ from flask import request
 import flask
 
 
-def playlist_ctoken(playlist_id, offset):
+def playlist_ctoken(playlist_id, offset, include_shorts=True):
 
     offset = proto.uint(1, offset)
-    # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
     offset = b'PT:' + proto.unpadded_b64encode(offset)
     offset = proto.string(15, offset)
+    if not include_shorts:
+        offset += proto.string(104, proto.uint(2, 1))
 
     continuation_info = proto.string(3, proto.percent_b64encode(offset))
 
@@ -26,48 +27,43 @@ def playlist_ctoken(playlist_id, offset):
 
     return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
 
-# initial request types:
-#   polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
-#   ajax json:    https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418
 
-
-# continuation request types:
-#   polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
-#   ajax json:    https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]
-
-
-headers_1 = (
-    ('Accept', '*/*'),
-    ('Accept-Language', 'en-US,en;q=0.5'),
-    ('X-YouTube-Client-Name', '2'),
-    ('X-YouTube-Client-Version', '2.20180614'),
-)
-
-
-def playlist_first_page(playlist_id, report_text="Retrieved playlist"):
-    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
-    content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page')
-    content = json.loads(content.decode('utf-8'))
+def playlist_first_page(playlist_id, report_text="Retrieved playlist", use_mobile=False):
+    if use_mobile:
+        url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
+        content = util.fetch_url(
+            url, util.mobile_xhr_headers,
+            report_text=report_text, debug_name='playlist_first_page'
+        )
+        content = json.loads(content.decode('utf-8'))
+    else:
+        url = 'https://www.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
+        content = util.fetch_url(
+            url, util.desktop_xhr_headers,
+            report_text=report_text, debug_name='playlist_first_page'
+        )
+        content = json.loads(content.decode('utf-8'))
 
     return content
 
 
-#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
-def get_videos(playlist_id, page):
-
-    url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
-    headers = {
-        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
-        'Accept': '*/*',
-        'Accept-Language': 'en-US,en;q=0.5',
-        'X-YouTube-Client-Name': '2',
-        'X-YouTube-Client-Version': '2.20180508',
-    }
-
-    content = util.fetch_url(
-        url, headers,
-        report_text="Retrieved playlist", debug_name='playlist_videos')
-
+def get_videos(playlist_id, page, use_mobile=False):
+    # mobile requests return 20 videos per page
+    if use_mobile:
+        url = "https://m.youtube.com/playlist?ctoken="
+        url += playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
+        content = util.fetch_url(
+            url, util.mobile_xhr_headers,
+            report_text="Retrieved playlist", debug_name='playlist_videos'
+        )
+    # desktop requests return 100 videos per page
+    else:
+        url = "https://www.youtube.com/playlist?ctoken="
+        url += playlist_ctoken(playlist_id, (int(page)-1)*100) + "&pbj=1"
+        content = util.fetch_url(
+            url, util.desktop_xhr_headers,
+            report_text="Retrieved playlist", debug_name='playlist_videos'
+        )
     info = json.loads(content.decode('utf-8'))
     return info
 
@@ -85,7 +81,10 @@ def get_playlist_page():
         this_page_json = first_page_json
     else:
         tasks = (
-            gevent.spawn(playlist_first_page, playlist_id, report_text="Retrieved playlist info" ),
+            gevent.spawn(
+                playlist_first_page, playlist_id,
+                report_text="Retrieved playlist info", use_mobile=True
+            ),
             gevent.spawn(get_videos, playlist_id, page)
         )
         gevent.joinall(tasks)
@@ -118,7 +117,7 @@ def get_playlist_page():
         'playlist.html',
         header_playlist_names=local_playlist.get_playlist_names(),
         video_list=info.get('items', []),
-        num_pages=math.ceil(video_count/20),
+        num_pages = math.ceil(video_count/100),
         parameters_dictionary=request.args,
 
         **info['metadata']
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index 281bc03..7740c67 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -191,6 +191,19 @@ def extract_playlist_metadata(polymer_json):
         elif 'updated' in text:
             metadata['time_published'] = extract_date(text)
 
+    microformat = deep_get(response, 'microformat', 'microformatDataRenderer',
+                           default={})
+    conservative_update(
+        metadata, 'title', extract_str(microformat.get('title'))
+    )
+    conservative_update(
+        metadata, 'description', extract_str(microformat.get('description'))
+    )
+    conservative_update(
+        metadata, 'thumbnail', deep_get(microformat, 'thumbnail',
+                                        'thumbnails', -1, 'url')
+    )
+
     return metadata
 
 def extract_playlist_info(polymer_json):
@@ -198,13 +211,11 @@ def extract_playlist_info(polymer_json):
     if err:
         return {'error': err}
     info = {'error': None}
-    first_page = 'continuationContents' not in response
     video_list, _ = extract_items(response)
 
     info['items'] = [extract_item_info(renderer) for renderer in video_list]
 
-    if first_page:
-        info['metadata'] = extract_playlist_metadata(polymer_json)
+    info['metadata'] = extract_playlist_metadata(polymer_json)
 
     return info
author	Astound <kirito@disroot.org>	2024-01-22 05:21:12 +0800
committer	Astound <kirito@disroot.org>	2024-01-22 05:21:12 +0800
commit	d591956baafa6a16b956f814b69ecbf1f5e23aba (patch)
tree	200366bc4e2d8a5a55b21914c776cef1fe7670dd
parent	6011a08cdff6a42b7f0f5ce46a7631b0831fa098 (diff)
download	yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.tar.lz yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.tar.xz yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.zip