diff options
author | Astound <kirito@disroot.org> | 2024-01-22 05:21:12 +0800 |
---|---|---|
committer | Astound <kirito@disroot.org> | 2024-01-22 05:21:12 +0800 |
commit | d591956baafa6a16b956f814b69ecbf1f5e23aba (patch) | |
tree | 200366bc4e2d8a5a55b21914c776cef1fe7670dd | |
parent | 6011a08cdff6a42b7f0f5ce46a7631b0831fa098 (diff) | |
download | yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.tar.lz yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.tar.xz yt-local-d591956baafa6a16b956f814b69ecbf1f5e23aba.zip |
ylist: show 100 videos per page instead of 20
Also add an option to the internal playlist ctoken function
for filtering out shorts, to be used in future anti-shorts features
-rw-r--r-- | youtube/playlist.py | 81 | ||||
-rw-r--r-- | youtube/yt_data_extract/everything_else.py | 17 |
2 files changed, 54 insertions, 44 deletions
diff --git a/youtube/playlist.py b/youtube/playlist.py index f4096dc..7eb4d22 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -12,12 +12,13 @@ from flask import request import flask -def playlist_ctoken(playlist_id, offset): +def playlist_ctoken(playlist_id, offset, include_shorts=True): offset = proto.uint(1, offset) - # this is just obfuscation as far as I can tell. It doesn't even follow protobuf offset = b'PT:' + proto.unpadded_b64encode(offset) offset = proto.string(15, offset) + if not include_shorts: + offset += proto.string(104, proto.uint(2, 1)) continuation_info = proto.string(3, proto.percent_b64encode(offset)) @@ -26,48 +27,43 @@ def playlist_ctoken(playlist_id, offset): return base64.urlsafe_b64encode(pointless_nest).decode('ascii') -# initial request types: -# polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 -# ajax json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418 - -# continuation request types: -# polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1 -# ajax json: https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...] - - -headers_1 = ( - ('Accept', '*/*'), - ('Accept-Language', 'en-US,en;q=0.5'), - ('X-YouTube-Client-Name', '2'), - ('X-YouTube-Client-Version', '2.20180614'), -) - - -def playlist_first_page(playlist_id, report_text="Retrieved playlist"): - url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' - content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text, debug_name='playlist_first_page') - content = json.loads(content.decode('utf-8')) +def playlist_first_page(playlist_id, report_text="Retrieved playlist", use_mobile=False): + if use_mobile: + url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' + content = util.fetch_url( + url, util.mobile_xhr_headers, + report_text=report_text, debug_name='playlist_first_page' + ) + content = json.loads(content.decode('utf-8')) + else: + url = 'https://www.youtube.com/playlist?list=' + playlist_id + '&pbj=1' + content = util.fetch_url( + url, util.desktop_xhr_headers, + report_text=report_text, debug_name='playlist_first_page' + ) + content = json.loads(content.decode('utf-8')) return content -#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1 -def get_videos(playlist_id, page): - - url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1" - headers = { - 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', - 'Accept': '*/*', - 'Accept-Language': 'en-US,en;q=0.5', - 'X-YouTube-Client-Name': '2', - 'X-YouTube-Client-Version': '2.20180508', - } - - content = util.fetch_url( - url, headers, - report_text="Retrieved playlist", debug_name='playlist_videos') - +def get_videos(playlist_id, page, use_mobile=False): + # mobile requests return 20 videos per page + if use_mobile: + url = "https://m.youtube.com/playlist?ctoken=" + url += playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1" + content = util.fetch_url( + url, util.mobile_xhr_headers, + report_text="Retrieved playlist", debug_name='playlist_videos' + ) + # desktop requests return 100 videos per page + else: + url = "https://www.youtube.com/playlist?ctoken=" + url += playlist_ctoken(playlist_id, (int(page)-1)*100) + "&pbj=1" + content = util.fetch_url( + url, util.desktop_xhr_headers, + report_text="Retrieved playlist", debug_name='playlist_videos' + ) info = json.loads(content.decode('utf-8')) return info @@ -85,7 +81,10 @@ def get_playlist_page(): this_page_json = first_page_json else: tasks = ( - gevent.spawn(playlist_first_page, playlist_id, report_text="Retrieved playlist info" ), + gevent.spawn( + playlist_first_page, playlist_id, + report_text="Retrieved playlist info", use_mobile=True + ), gevent.spawn(get_videos, playlist_id, page) ) gevent.joinall(tasks) @@ -118,7 +117,7 @@ def get_playlist_page(): 'playlist.html', header_playlist_names=local_playlist.get_playlist_names(), video_list=info.get('items', []), - num_pages=math.ceil(video_count/20), + num_pages = math.ceil(video_count/100), parameters_dictionary=request.args, **info['metadata'] diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py index 281bc03..7740c67 100644 --- a/youtube/yt_data_extract/everything_else.py +++ b/youtube/yt_data_extract/everything_else.py @@ -191,6 +191,19 @@ def extract_playlist_metadata(polymer_json): elif 'updated' in text: metadata['time_published'] = extract_date(text) + microformat = deep_get(response, 'microformat', 'microformatDataRenderer', + default={}) + conservative_update( + metadata, 'title', extract_str(microformat.get('title')) + ) + conservative_update( + metadata, 'description', extract_str(microformat.get('description')) + ) + conservative_update( + metadata, 'thumbnail', deep_get(microformat, 'thumbnail', + 'thumbnails', -1, 'url') + ) + return metadata def extract_playlist_info(polymer_json): @@ -198,13 +211,11 @@ def extract_playlist_info(polymer_json): if err: return {'error': err} info = {'error': None} - first_page = 'continuationContents' not in response video_list, _ = extract_items(response) info['items'] = [extract_item_info(renderer) for renderer in video_list] - if first_page: - info['metadata'] = extract_playlist_metadata(polymer_json) + info['metadata'] = extract_playlist_metadata(polymer_json) return info |