youtube/playlist.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

from youtube import util, yt_data_extract, html_common, template, proto

import base64
import urllib
import json
import string
import gevent
import math

with open("yt_playlist_template.html", "r") as file:
    yt_playlist_template = template.Template(file.read())


def playlist_ctoken(playlist_id, offset):  
    
    offset = proto.uint(1, offset)
    # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
    offset = b'PT:' + proto.unpadded_b64encode(offset)
    offset = proto.string(15, offset)

    continuation_info = proto.string( 3, proto.percent_b64encode(offset) )
    
    playlist_id = proto.string(2, 'VL' + playlist_id )
    pointless_nest = proto.string(80226972, playlist_id + continuation_info)

    return base64.urlsafe_b64encode(pointless_nest).decode('ascii')

# initial request types:
#   polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
#   ajax json:    https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418


# continuation request types:
#   polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
#   ajax json:    https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]


headers_1 = (
    ('Accept', '*/*'),
    ('Accept-Language', 'en-US,en;q=0.5'),
    ('X-YouTube-Client-Name', '2'),
    ('X-YouTube-Client-Version', '2.20180614'),
)

def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text)
    '''with open('debug/playlist_debug', 'wb') as f:
        f.write(content)'''
    content = json.loads(util.uppercase_escape(content.decode('utf-8')))

    return content
    

#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
def get_videos(playlist_id, page):

    url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
    headers = {
        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '2',
        'X-YouTube-Client-Version': '2.20180508',
    }

    content = util.fetch_url(url, headers, report_text="Retrieved playlist")
    '''with open('debug/playlist_debug', 'wb') as f:
        f.write(content)'''

    info = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return info


playlist_stat_template = string.Template('''
<div>$stat</div>''')
def get_playlist_page(env, start_response):
    start_response('200 OK', [('Content-type','text/html'),])
    parameters = env['parameters']
    playlist_id = parameters['list'][0]
    page = parameters.get("page", "1")[0]
    if page == "1":
        first_page_json = playlist_first_page(playlist_id)
        this_page_json = first_page_json
    else:
        tasks = (
            gevent.spawn(playlist_first_page, playlist_id, report_text="Retrieved playlist info" ), 
            gevent.spawn(get_videos, playlist_id, page)
        )
        gevent.joinall(tasks)
        first_page_json, this_page_json = tasks[0].value, tasks[1].value
    
    try:    # first page
        video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents']
    except KeyError:    # other pages
        video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
    videos_html = ''
    for video_json in video_list:
        info = yt_data_extract.renderer_info(video_json['playlistVideoRenderer'])
        videos_html += html_common.video_item_html(info, html_common.small_video_item_template)


    metadata = yt_data_extract.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
    video_count = int(metadata['size'].replace(',', ''))
    page_buttons = html_common.page_buttons_html(int(page), math.ceil(video_count/20), util.URL_ORIGIN + "/playlist", env['QUERY_STRING'])

    html_ready = html_common.get_html_ready(metadata)
    html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)

    stats = ''
    stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
    stats += playlist_stat_template.substitute(stat=html_ready['views'])
    return yt_playlist_template.substitute(
        header          = html_common.get_header(),
        videos          = videos_html,
        page_buttons    = page_buttons,
        stats = stats,
        **html_ready
    ).encode('utf-8')