diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/channel.py | 503 | ||||
-rw-r--r-- | youtube/comments.css | 116 | ||||
-rw-r--r-- | youtube/comments.py | 332 | ||||
-rw-r--r-- | youtube/common.py | 1278 | ||||
-rw-r--r-- | youtube/opensearch.xml | 20 | ||||
-rw-r--r-- | youtube/playlist.py | 484 | ||||
-rw-r--r-- | youtube/proto.py | 128 | ||||
-rw-r--r-- | youtube/search.py | 460 | ||||
-rw-r--r-- | youtube/shared.css | 540 | ||||
-rw-r--r-- | youtube/subscriptions.py | 36 | ||||
-rw-r--r-- | youtube/template.py | 262 | ||||
-rw-r--r-- | youtube/watch.py | 586 | ||||
-rw-r--r-- | youtube/watch_later.py | 20 | ||||
-rw-r--r-- | youtube/youtube.py | 118 |
14 files changed, 2442 insertions, 2441 deletions
diff --git a/youtube/channel.py b/youtube/channel.py index d993d3b..b7a4462 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -1,252 +1,253 @@ -import base64
-import youtube.common as common
-from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id
-import urllib
-import json
-from string import Template
-import youtube.proto as proto
-import html
-import math
-import gevent
-import re
-import functools
-
-with open("yt_channel_items_template.html", "r") as file:
- yt_channel_items_template = Template(file.read())
-
-with open("yt_channel_about_template.html", "r") as file:
- yt_channel_about_template = Template(file.read())
-
-'''continuation = Proto(
- Field('optional', 'continuation', 80226972, Proto(
- Field('optional', 'browse_id', 2, String),
- Field('optional', 'params', 3, Base64(Proto(
- Field('optional', 'channel_tab', 2, String),
- Field('optional', 'sort', 3, ENUM
- Field('optional', 'page', 15, String),
- )))
- ))
-)'''
-
-
-'''channel_continuation = Proto(
- Field('optional', 'pointless_nest', 80226972, Proto(
- Field('optional', 'channel_id', 2, String),
- Field('optional', 'continuation_info', 3, Base64(Proto(
- Field('optional', 'channel_tab', 2, String),
- Field('optional', 'sort', 3, ENUM
- Field('optional', 'page', 15, String),
- )))
- ))
-)'''
-
-headers_1 = (
- ('Accept', '*/*'),
- ('Accept-Language', 'en-US,en;q=0.5'),
- ('X-YouTube-Client-Name', '1'),
- ('X-YouTube-Client-Version', '2.20180614'),
-)
-# https://www.youtube.com/browse_ajax?action_continuation=1&direct_render=1&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D
-# https://www.youtube.com/browse_ajax?ctoken=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&itct=CDsQybcCIhMIhZi1krTc2wIVjMicCh2HXQnhKJsc
-
-# grid view: 4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA
-# list view: 4qmFsgJCEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJkVnWjJhV1JsYjNNWUF5QUFNQUk0QVdBQmFnQjZBVEs0QVFBJTNE
-# SORT:
-# Popular - 1
-# Oldest - 2
-# Newest - 3
-
-# view:
-# grid: 0 or 1
-# list: 2
-def channel_ctoken(channel_id, page, sort, tab, view=1):
-
- tab = proto.string(2, tab )
- sort = proto.uint(3, int(sort))
- page = proto.string(15, str(page) )
- view = proto.uint(6, int(view))
- continuation_info = proto.string( 3, proto.percent_b64encode(tab + view + sort + page) )
-
- channel_id = proto.string(2, channel_id )
- pointless_nest = proto.string(80226972, channel_id + continuation_info)
-
- return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
-
-def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
- ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D')
- url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
-
- print("Sending channel tab ajax request")
- content = common.fetch_url(url, headers_1)
- print("Finished recieving channel tab response")
-
- info = json.loads(content)
- return info
-
-
-grid_video_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <span class="views">$views</span>
- <time datetime="$datetime">Uploaded $published</time>
-
- </div>
- <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
- </div>
-''')
-
-def grid_video_item_info(grid_video_renderer, author):
- renderer = grid_video_renderer
- return {
- "title": renderer['title']['simpleText'],
- "id": renderer['videoId'],
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "author": author,
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
- }
-
-def grid_video_item_html(item):
- video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
- return grid_video_item_template.substitute(
- title = html.escape(item["title"]),
- views = item["views"],
- duration = item["duration"],
- url = URL_ORIGIN + "/watch?v=" + item["id"],
- thumbnail = get_thumbnail_url(item['id']),
- video_info = html.escape(json.dumps(video_info)),
- published = item["published"],
- datetime = '', # TODO
- )
-
-def get_number_of_videos(channel_id):
- # Uploads playlist
- playlist_id = 'UU' + channel_id[2:]
- url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
- print("Getting number of videos")
- response = common.fetch_url(url, common.mobile_ua + headers_1)
- with open('playlist_debug_metadata', 'wb') as f:
- f.write(response)
- response = response.decode('utf-8')
- print("Got response for number of videos")
- return int(re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response).group(1).replace(',',''))
-
-@functools.lru_cache(maxsize=128)
-def get_channel_id(username):
- # method that gives the smallest possible response at ~10 kb
- # needs to be as fast as possible
- url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
- response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
- return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
-
-
-def channel_videos_html(polymer_json, current_page=1, number_of_videos = 1000, current_query_string=''):
- microformat = polymer_json[1]['response']['microformat']['microformatDataRenderer']
- channel_url = microformat['urlCanonical'].rstrip('/')
- channel_id = channel_url[channel_url.rfind('/')+1:]
- try:
- items = polymer_json[1]['response']['continuationContents']['gridContinuation']['items']
- except KeyError:
- items = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer']['items']
- items_html = ''
- for video in items:
- items_html += grid_video_item_html(grid_video_item_info(video['gridVideoRenderer'], microformat['title']))
-
- return yt_channel_items_template.substitute(
- channel_title = microformat['title'],
- channel_about_url = URL_ORIGIN + "/channel/" + channel_id + "/about",
- avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
- page_title = microformat['title'] + ' - Channel',
- items = items_html,
- page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string)
- )
-
-channel_link_template = Template('''
-<a href="$url">$text</a>''')
-stat_template = Template('''
-<li>$stat_value</li>''')
-def channel_about_page(polymer_json):
- avatar = '/' + polymer_json[1]['response']['microformat']['microformatDataRenderer']['thumbnail']['thumbnails'][0]['url']
- # my goodness...
- channel_metadata = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][5]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']
- channel_links = ''
- for link_json in channel_metadata['primaryLinks']:
- channel_links += channel_link_template.substitute(
- url = html.escape(link_json['navigationEndpoint']['urlEndpoint']['url']),
- text = common.get_plain_text(link_json['title']),
- )
-
- stats = ''
- for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
- try:
- stat_value = common.get_plain_text(channel_metadata[stat_name])
- except KeyError:
- continue
- else:
- stats += stat_template.substitute(stat_value=stat_value)
- try:
- description = common.format_text_runs(common.get_formatted_text(channel_metadata['description']))
- except KeyError:
- description = ''
- return yt_channel_about_template.substitute(
- page_title = common.get_plain_text(channel_metadata['title']) + ' - About',
- channel_title = common.get_plain_text(channel_metadata['title']),
- avatar = html.escape(avatar),
- description = description,
- links = channel_links,
- stats = stats,
- channel_videos_url = common.URL_ORIGIN + '/channel/' + channel_metadata['channelId'] + '/videos',
- )
-
-def get_channel_page(url, query_string=''):
- path_components = url.rstrip('/').lstrip('/').split('/')
- channel_id = path_components[0]
- try:
- tab = path_components[1]
- except IndexError:
- tab = 'videos'
-
- parameters = urllib.parse.parse_qs(query_string)
- page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
- sort = common.default_multi_get(parameters, 'sort', 0, default='3')
- view = common.default_multi_get(parameters, 'view', 0, default='1')
-
- if tab == 'videos':
- tasks = (
- gevent.spawn(get_number_of_videos, channel_id ),
- gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
- )
- gevent.joinall(tasks)
- number_of_videos, polymer_json = tasks[0].value, tasks[1].value
-
- return channel_videos_html(polymer_json, page_number, number_of_videos, query_string)
- elif tab == 'about':
- polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', headers_1)
- polymer_json = json.loads(polymer_json)
- return channel_about_page(polymer_json)
- else:
- raise ValueError('Unknown channel tab: ' + tab)
-
-def get_user_page(url, query_string=''):
- path_components = url.rstrip('/').lstrip('/').split('/')
- username = path_components[0]
- try:
- page = path_components[1]
- except IndexError:
- page = 'videos'
- if page == 'videos':
- polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/videos?pbj=1', headers_1)
- polymer_json = json.loads(polymer_json)
- return channel_videos_html(polymer_json)
- elif page == 'about':
- polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/about?pbj=1', headers_1)
- polymer_json = json.loads(polymer_json)
- return channel_about_page(polymer_json)
- else:
+import base64 +import youtube.common as common +from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id +import urllib +import json +from string import Template +import youtube.proto as proto +import html +import math +import gevent +import re +import functools + +with open("yt_channel_items_template.html", "r") as file: + yt_channel_items_template = Template(file.read()) + +with open("yt_channel_about_template.html", "r") as file: + yt_channel_about_template = Template(file.read()) + +'''continuation = Proto( + Field('optional', 'continuation', 80226972, Proto( + Field('optional', 'browse_id', 2, String), + Field('optional', 'params', 3, Base64(Proto( + Field('optional', 'channel_tab', 2, String), + Field('optional', 'sort', 3, ENUM + Field('optional', 'page', 15, String), + ))) + )) +)''' + + +'''channel_continuation = Proto( + Field('optional', 'pointless_nest', 80226972, Proto( + Field('optional', 'channel_id', 2, String), + Field('optional', 'continuation_info', 3, Base64(Proto( + Field('optional', 'channel_tab', 2, String), + Field('optional', 'sort', 3, ENUM + Field('optional', 'page', 15, String), + ))) + )) +)''' + +headers_1 = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '1'), + ('X-YouTube-Client-Version', '2.20180614'), +) +# https://www.youtube.com/browse_ajax?action_continuation=1&direct_render=1&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D +# https://www.youtube.com/browse_ajax?ctoken=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&itct=CDsQybcCIhMIhZi1krTc2wIVjMicCh2HXQnhKJsc + +# grid view: 4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA +# list view: 4qmFsgJCEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJkVnWjJhV1JsYjNNWUF5QUFNQUk0QVdBQmFnQjZBVEs0QVFBJTNE +# SORT: +# Popular - 1 +# Oldest - 2 +# Newest - 3 + +# view: +# grid: 0 or 1 +# list: 2 +def channel_ctoken(channel_id, page, sort, tab, view=1): + + tab = proto.string(2, tab ) + sort = proto.uint(3, int(sort)) + page = proto.string(15, str(page) ) + view = proto.uint(6, int(view)) + continuation_info = proto.string( 3, proto.percent_b64encode(tab + view + sort + page) ) + + channel_id = proto.string(2, channel_id ) + pointless_nest = proto.string(80226972, channel_id + continuation_info) + + return base64.urlsafe_b64encode(pointless_nest).decode('ascii') + +def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): + ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D') + url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken + + print("Sending channel tab ajax request") + content = common.fetch_url(url, headers_1) + print("Finished recieving channel tab response") + + info = json.loads(content) + return info + + +grid_video_item_template = Template(''' + <div class="small-item-box"> + <div class="small-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + <a class="title" href="$url" title="$title">$title</a> + + <span class="views">$views</span> + <time datetime="$datetime">Uploaded $published</time> + + </div> + <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add"> + </div> +''') + +def grid_video_item_info(grid_video_renderer, author): + renderer = grid_video_renderer + return { + "title": renderer['title']['simpleText'], + "id": renderer['videoId'], + "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'], + "author": author, + "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''), + } + +def grid_video_item_html(item): + video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')}) + return grid_video_item_template.substitute( + title = html.escape(item["title"]), + views = item["views"], + duration = item["duration"], + url = URL_ORIGIN + "/watch?v=" + item["id"], + thumbnail = get_thumbnail_url(item['id']), + video_info = html.escape(json.dumps(video_info)), + published = item["published"], + datetime = '', # TODO + ) + +def get_number_of_videos(channel_id): + # Uploads playlist + playlist_id = 'UU' + channel_id[2:] + url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true' + print("Getting number of videos") + response = common.fetch_url(url, common.mobile_ua + headers_1) + with open('playlist_debug_metadata', 'wb') as f: + f.write(response) + response = response.decode('utf-8') + print("Got response for number of videos") + return int(re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response).group(1).replace(',','')) + +@functools.lru_cache(maxsize=128) +def get_channel_id(username): + # method that gives the smallest possible response at ~10 kb + # needs to be as fast as possible + url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true' + response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8') + return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1) + + +def channel_videos_html(polymer_json, current_page=1, number_of_videos = 1000, current_query_string=''): + microformat = polymer_json[1]['response']['microformat']['microformatDataRenderer'] + channel_url = microformat['urlCanonical'].rstrip('/') + channel_id = channel_url[channel_url.rfind('/')+1:] + try: + items = polymer_json[1]['response']['continuationContents']['gridContinuation']['items'] + except KeyError: + items = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer']['items'] + items_html = '' + for video in items: + items_html += grid_video_item_html(grid_video_item_info(video['gridVideoRenderer'], microformat['title'])) + + return yt_channel_items_template.substitute( + channel_title = microformat['title'], + channel_about_url = URL_ORIGIN + "/channel/" + channel_id + "/about", + avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], + page_title = microformat['title'] + ' - Channel', + items = items_html, + page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string), + number_of_results = '{:,}'.format(number_of_videos) + " videos", + ) + +channel_link_template = Template(''' +<a href="$url">$text</a>''') +stat_template = Template(''' +<li>$stat_value</li>''') +def channel_about_page(polymer_json): + avatar = '/' + polymer_json[1]['response']['microformat']['microformatDataRenderer']['thumbnail']['thumbnails'][0]['url'] + # my goodness... + channel_metadata = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][5]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'] + channel_links = '' + for link_json in channel_metadata['primaryLinks']: + channel_links += channel_link_template.substitute( + url = html.escape(link_json['navigationEndpoint']['urlEndpoint']['url']), + text = common.get_plain_text(link_json['title']), + ) + + stats = '' + for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'): + try: + stat_value = common.get_plain_text(channel_metadata[stat_name]) + except KeyError: + continue + else: + stats += stat_template.substitute(stat_value=stat_value) + try: + description = common.format_text_runs(common.get_formatted_text(channel_metadata['description'])) + except KeyError: + description = '' + return yt_channel_about_template.substitute( + page_title = common.get_plain_text(channel_metadata['title']) + ' - About', + channel_title = common.get_plain_text(channel_metadata['title']), + avatar = html.escape(avatar), + description = description, + links = channel_links, + stats = stats, + channel_videos_url = common.URL_ORIGIN + '/channel/' + channel_metadata['channelId'] + '/videos', + ) + +def get_channel_page(url, query_string=''): + path_components = url.rstrip('/').lstrip('/').split('/') + channel_id = path_components[0] + try: + tab = path_components[1] + except IndexError: + tab = 'videos' + + parameters = urllib.parse.parse_qs(query_string) + page_number = int(common.default_multi_get(parameters, 'page', 0, default='1')) + sort = common.default_multi_get(parameters, 'sort', 0, default='3') + view = common.default_multi_get(parameters, 'view', 0, default='1') + + if tab == 'videos': + tasks = ( + gevent.spawn(get_number_of_videos, channel_id ), + gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) + ) + gevent.joinall(tasks) + number_of_videos, polymer_json = tasks[0].value, tasks[1].value + + return channel_videos_html(polymer_json, page_number, number_of_videos, query_string) + elif tab == 'about': + polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', headers_1) + polymer_json = json.loads(polymer_json) + return channel_about_page(polymer_json) + else: + raise ValueError('Unknown channel tab: ' + tab) + +def get_user_page(url, query_string=''): + path_components = url.rstrip('/').lstrip('/').split('/') + username = path_components[0] + try: + page = path_components[1] + except IndexError: + page = 'videos' + if page == 'videos': + polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/videos?pbj=1', headers_1) + polymer_json = json.loads(polymer_json) + return channel_videos_html(polymer_json) + elif page == 'about': + polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/about?pbj=1', headers_1) + polymer_json = json.loads(polymer_json) + return channel_about_page(polymer_json) + else: raise ValueError('Unknown channel page: ' + page)
\ No newline at end of file diff --git a/youtube/comments.css b/youtube/comments.css index 93a6495..325a433 100644 --- a/youtube/comments.css +++ b/youtube/comments.css @@ -1,59 +1,59 @@ -.comments{
- grid-row-gap: 10px;
- display: grid;
- align-content:start;
-}
-
-.comment{
- display:grid;
- grid-template-columns: 0fr 0fr 1fr;
- grid-template-rows: 0fr 0fr 0fr 0fr;
- background-color: #dadada;
-}
-
-.comment .author-avatar{
- grid-column: 1;
- grid-row: 1 / span 3;
- align-self: start;
- margin-right: 5px;
-}
-
-.comment address{
- grid-column: 2;
- grid-row: 1;
- margin-right:15px;
- white-space: nowrap;
-}
-
-.comment .text{
- grid-column: 2 / span 2;
- grid-row: 2;
- white-space: pre-line;
- min-width: 0;
-}
-
-.comment time{
- grid-column: 3;
- grid-row: 1;
- white-space: nowrap;
-
-}
-
-
-.comment .likes{
- grid-column:2;
- grid-row:3;
- font-weight:bold;
- white-space: nowrap;
-}
-
-.comment .replies{
- grid-column:2 / span 2;
- grid-row:4;
- justify-self:start;
-}
-
-.more-comments{
- justify-self:center;
-
+.comments{ + grid-row-gap: 10px; + display: grid; + align-content:start; +} + +.comment{ + display:grid; + grid-template-columns: 0fr 0fr 1fr; + grid-template-rows: 0fr 0fr 0fr 0fr; + background-color: #dadada; +} + +.comment .author-avatar{ + grid-column: 1; + grid-row: 1 / span 3; + align-self: start; + margin-right: 5px; +} + +.comment address{ + grid-column: 2; + grid-row: 1; + margin-right:15px; + white-space: nowrap; +} + +.comment .text{ + grid-column: 2 / span 2; + grid-row: 2; + white-space: pre-line; + min-width: 0; +} + +.comment time{ + grid-column: 3; + grid-row: 1; + white-space: nowrap; + +} + + +.comment .likes{ + grid-column:2; + grid-row:3; + font-weight:bold; + white-space: nowrap; +} + +.comment .replies{ + grid-column:2 / span 2; + grid-row:4; + justify-self:start; +} + +.more-comments{ + justify-self:center; + }
\ No newline at end of file diff --git a/youtube/comments.py b/youtube/comments.py index 4b30a48..3f44758 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -1,166 +1,166 @@ -import json
-import youtube.proto as proto
-import base64
-from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url
-from string import Template
-import urllib.request
-import urllib
-import html
-comment_template = Template('''
- <div class="comment-container">
- <div class="comment">
- <a class="author-avatar" href="$author_url" title="$author">
- <img class="author-avatar-img" src="$author_avatar">
- </a>
- <address>
- <a class="author" href="$author_url" title="$author">$author</a>
- </address>
- <span class="text">$text</span>
- <time datetime="$datetime">$published</time>
- <span class="likes">$likes</span>
-$replies
- </div>
-
- </div>
-''')
-reply_link_template = Template('''
- <a href="$url" class="replies">View replies</a>
-''')
-with open("yt_comments_template.html", "r") as file:
- yt_comments_template = Template(file.read())
-
-
-# <a class="replies-link" href="$replies_url">$replies_link_text</a>
-
-
-# Here's what I know about the secret key (starting with ASJN_i)
-# *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers):
-# -Video id
-# -Offset
-# -Sort
-# *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely.
-# *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3")
-# *The encoded data is not valid protobuf
-# *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests
-# *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two.
-# *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since
-# *The ASJN has no relation with any of the data in the response it came from
-
-def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''):
- video_id = proto.as_bytes(video_id)
- secret_key = proto.as_bytes(secret_key)
-
-
- page_info = proto.string(4,video_id) + proto.uint(6, sort)
- offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
- if secret_key:
- offset_information = proto.string(1, secret_key) + offset_information
-
- result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information)
- return base64.urlsafe_b64encode(result).decode('ascii')
-
-mobile_headers = {
- 'Host': 'm.youtube.com',
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '2',
- 'X-YouTube-Client-Version': '1.20180613',
-}
-def request_comments(ctoken, replies=False):
- if replies: # let's make it use different urls for no reason despite all the data being encoded
- base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
- else:
- base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
- url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
- print("Sending comments ajax request")
- for i in range(0,8): # don't retry more than 8 times
- content = fetch_url(url, headers=mobile_headers)
- if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
- content = content[4:]
- elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
- content = b''
- print("got <!DOCTYPE>, retrying")
- continue
- break
- '''with open('comments_debug', 'wb') as f:
- f.write(content)'''
- return content
-
-def parse_comments(content, replies=False):
- try:
- content = json.loads(uppercase_escape(content.decode('utf-8')))
- #print(content)
- comments_raw = content['content']['continuation_contents']['contents']
- ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
-
- comments = []
- for comment_raw in comments_raw:
- replies_url = ''
- if not replies:
- if comment_raw['replies'] is not None:
- ctoken = comment_raw['replies']['continuations'][0]['continuation']
- replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1"
- comment_raw = comment_raw['comment']
- comment = {
- 'author': comment_raw['author']['runs'][0]['text'],
- 'author_url': comment_raw['author_endpoint']['url'],
- 'author_avatar': comment_raw['author_thumbnail']['url'],
- 'likes': comment_raw['like_count'],
- 'published': comment_raw['published_time']['runs'][0]['text'],
- 'text': comment_raw['content']['runs'],
- 'reply_count': '',
- 'replies_url': replies_url,
- }
- comments.append(comment)
- except Exception as e:
- print('Error parsing comments: ' + str(e))
- comments = ()
- ctoken = ''
- else:
- print("Finished getting and parsing comments")
- return {'ctoken': ctoken, 'comments': comments}
-
-def get_comments_html(result):
- html_result = ''
- for comment in result['comments']:
- replies = ''
- if comment['replies_url']:
- replies = reply_link_template.substitute(url=comment['replies_url'])
- html_result += comment_template.substitute(
- author=html.escape(comment['author']),
- author_url = URL_ORIGIN + comment['author_url'],
- author_avatar = '/' + comment['author_avatar'],
- likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
- published = comment['published'],
- text = format_text_runs(comment['text']),
- datetime = '', #TODO
- replies=replies,
- #replies='',
- )
- return html_result, result['ctoken']
-
-def video_comments(video_id, sort=0, offset=0, secret_key=''):
- result = parse_comments(request_comments(make_comment_ctoken(video_id, sort, offset, secret_key)))
- return get_comments_html(result)
-
-more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''')
-
-def get_comments_page(query_string):
- parameters = urllib.parse.parse_qs(query_string)
- ctoken = parameters['ctoken'][0]
- replies = default_multi_get(parameters, 'replies', 0, default="0") == "1"
-
- result = parse_comments(request_comments(ctoken, replies), replies)
- comments_html, ctoken = get_comments_html(result)
- if ctoken == '':
- more_comments_button = ''
- else:
- more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
-
- return yt_comments_template.substitute(
- comments = comments_html,
- page_title = 'Comments',
- more_comments_button=more_comments_button,
- )
-
+import json +import youtube.proto as proto +import base64 +from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url +from string import Template +import urllib.request +import urllib +import html +comment_template = Template(''' + <div class="comment-container"> + <div class="comment"> + <a class="author-avatar" href="$author_url" title="$author"> + <img class="author-avatar-img" src="$author_avatar"> + </a> + <address> + <a class="author" href="$author_url" title="$author">$author</a> + </address> + <span class="text">$text</span> + <time datetime="$datetime">$published</time> + <span class="likes">$likes</span> +$replies + </div> + + </div> +''') +reply_link_template = Template(''' + <a href="$url" class="replies">View replies</a> +''') +with open("yt_comments_template.html", "r") as file: + yt_comments_template = Template(file.read()) + + +# <a class="replies-link" href="$replies_url">$replies_link_text</a> + + +# Here's what I know about the secret key (starting with ASJN_i) +# *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers): +# -Video id +# -Offset +# -Sort +# *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely. +# *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3") +# *The encoded data is not valid protobuf +# *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests +# *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two. +# *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since +# *The ASJN has no relation with any of the data in the response it came from + +def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''): + video_id = proto.as_bytes(video_id) + secret_key = proto.as_bytes(secret_key) + + + page_info = proto.string(4,video_id) + proto.uint(6, sort) + offset_information = proto.nested(4, page_info) + proto.uint(5, offset) + if secret_key: + offset_information = proto.string(1, secret_key) + offset_information + + result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information) + return base64.urlsafe_b64encode(result).decode('ascii') + +mobile_headers = { + 'Host': 'm.youtube.com', + 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', + 'Accept': '*/*', + 'Accept-Language': 'en-US,en;q=0.5', + 'X-YouTube-Client-Name': '2', + 'X-YouTube-Client-Version': '1.20180613', +} +def request_comments(ctoken, replies=False): + if replies: # let's make it use different urls for no reason despite all the data being encoded + base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken=" + else: + base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken=" + url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" + print("Sending comments ajax request") + for i in range(0,8): # don't retry more than 8 times + content = fetch_url(url, headers=mobile_headers) + if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason + content = content[4:] + elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason + content = b'' + print("got <!DOCTYPE>, retrying") + continue + break + '''with open('comments_debug', 'wb') as f: + f.write(content)''' + return content + +def parse_comments(content, replies=False): + try: + content = json.loads(uppercase_escape(content.decode('utf-8'))) + #print(content) + comments_raw = content['content']['continuation_contents']['contents'] + ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='') + + comments = [] + for comment_raw in comments_raw: + replies_url = '' + if not replies: + if comment_raw['replies'] is not None: + ctoken = comment_raw['replies']['continuations'][0]['continuation'] + replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1" + comment_raw = comment_raw['comment'] + comment = { + 'author': comment_raw['author']['runs'][0]['text'], + 'author_url': comment_raw['author_endpoint']['url'], + 'author_avatar': comment_raw['author_thumbnail']['url'], + 'likes': comment_raw['like_count'], + 'published': comment_raw['published_time']['runs'][0]['text'], + 'text': comment_raw['content']['runs'], + 'reply_count': '', + 'replies_url': replies_url, + } + comments.append(comment) + except Exception as e: + print('Error parsing comments: ' + str(e)) + comments = () + ctoken = '' + else: + print("Finished getting and parsing comments") + return {'ctoken': ctoken, 'comments': comments} + +def get_comments_html(result): + html_result = '' + for comment in result['comments']: + replies = '' + if comment['replies_url']: + replies = reply_link_template.substitute(url=comment['replies_url']) + html_result += comment_template.substitute( + author=html.escape(comment['author']), + author_url = URL_ORIGIN + comment['author_url'], + author_avatar = '/' + comment['author_avatar'], + likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '', + published = comment['published'], + text = format_text_runs(comment['text']), + datetime = '', #TODO + replies=replies, + #replies='', + ) + return html_result, result['ctoken'] + +def video_comments(video_id, sort=0, offset=0, secret_key=''): + result = parse_comments(request_comments(make_comment_ctoken(video_id, sort, offset, secret_key))) + return get_comments_html(result) + +more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''') + +def get_comments_page(query_string): + parameters = urllib.parse.parse_qs(query_string) + ctoken = parameters['ctoken'][0] + replies = default_multi_get(parameters, 'replies', 0, default="0") == "1" + + result = parse_comments(request_comments(ctoken, replies), replies) + comments_html, ctoken = get_comments_html(result) + if ctoken == '': + more_comments_button = '' + else: + more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken) + + return yt_comments_template.substitute( + comments = comments_html, + page_title = 'Comments', + more_comments_button=more_comments_button, + ) + diff --git a/youtube/common.py b/youtube/common.py index 67bd81f..3133fed 100644 --- a/youtube/common.py +++ b/youtube/common.py @@ -1,639 +1,639 @@ -from youtube.template import Template
-import html
-import json
-import re
-import urllib.parse
-import gzip
-import brotli
-import time
-
-
-URL_ORIGIN = "/https://www.youtube.com"
-
-
-# videos (all of type str):
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# published
-# duration
-# likes
-# dislikes
-# views
-# playlist_index
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-
-
-
-
-
-
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="current-page-button">$page</a>''')
-
-medium_playlist_item_template = Template('''
- <div class="medium-item">
- <a class="playlist-thumbnail-box" href="$url" title="$title">
- <img class="playlist-thumbnail-img" src="$thumbnail">
- <div class="playlist-thumbnail-info">
- <span>$size</span>
- </div>
- </a>
-
- <a class="title" href="$url" title=$title>$title</a>
-
- <address><a href="$author_url">$author</a></address>
- </div>
-''')
-medium_video_item_template = Template('''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
-
- <a class="title" href="$url">$title</a>
-
- <div class="stats">$stats</div>
- <!--
- <address><a href="$author_url">$author</a></address>
- <span class="views">$views</span>
- <time datetime="$datetime">Uploaded $published</time>-->
-
- <span class="description">$description</span>
- <span class="badges">$badges</span>
- </div>
-''')
-
-small_video_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <address>$author</address>
- <span class="views">$views</span>
-
- </div>
- <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
- </div>
-''')
-
-small_playlist_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="playlist-thumbnail-box" href="$url" title="$title">
- <img class="playlist-thumbnail-img" src="$thumbnail">
- <div class="playlist-thumbnail-info">
- <span>$size</span>
- </div>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <address>$author</address>
- </div>
- </div>
-''')
-
-medium_channel_item_template = Template('''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
-
- <a class="title" href="$url">$title</a>
-
- <span>$subscriber_count</span>
- <span>$size</span>
-
- <span class="description">$description</span>
- </div>
-''')
-
-
-def fetch_url(url, headers=(), timeout=5, report_text=None):
- if isinstance(headers, list):
- headers += [('Accept-Encoding', 'gzip, br')]
- headers = dict(headers)
- elif isinstance(headers, tuple):
- headers += (('Accept-Encoding', 'gzip, br'),)
- headers = dict(headers)
- else:
- headers = headers.copy()
- headers['Accept-Encoding'] = 'gzip, br'
-
- start_time = time.time()
-
- req = urllib.request.Request(url, headers=headers)
- response = urllib.request.urlopen(req, timeout=timeout)
- response_time = time.time()
-
- content = response.read()
- read_finish = time.time()
- if report_text:
- print(report_text, 'Latency:', response_time - start_time, ' Read time:', read_finish - response_time)
- encodings = response.getheader('Content-Encoding', default='identity').replace(' ', '').split(',')
- for encoding in reversed(encodings):
- if encoding == 'identity':
- continue
- if encoding == 'br':
- content = brotli.decompress(content)
- elif encoding == 'gzip':
- content = gzip.decompress(content)
- return content
-
-mobile_ua = (('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'),)
-
-def dict_add(*dicts):
- for dictionary in dicts[1:]:
- dicts[0].update(dictionary)
- return dicts[0]
-
-def video_id(url):
- url_parts = urllib.parse.urlparse(url)
- return urllib.parse.parse_qs(url_parts.query)['v'][0]
-
-def uppercase_escape(s):
- return re.sub(
- r'\\U([0-9a-fA-F]{8})',
- lambda m: chr(int(m.group(1), base=16)), s)
-
-def default_multi_get(object, *keys, default):
- ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
- try:
- for key in keys:
- object = object[key]
- return object
- except (IndexError, KeyError):
- return default
-
-def get_plain_text(node):
- try:
- return html.escape(node['simpleText'])
- except KeyError:
- return unformmated_text_runs(node['runs'])
-
-def unformmated_text_runs(runs):
- result = ''
- for text_run in runs:
- result += html.escape(text_run["text"])
- return result
-
-def format_text_runs(runs):
- if isinstance(runs, str):
- return runs
- result = ''
- for text_run in runs:
- if text_run.get("bold", False):
- result += "<b>" + html.escape(text_run["text"]) + "</b>"
- elif text_run.get('italics', False):
- result += "<i>" + html.escape(text_run["text"]) + "</i>"
- else:
- result += html.escape(text_run["text"])
- return result
-
-# default, sddefault, mqdefault, hqdefault, hq720
-def get_thumbnail_url(video_id):
- return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
-
-def seconds_to_timestamp(seconds):
- seconds = int(seconds)
- hours, seconds = divmod(seconds,3600)
- minutes, seconds = divmod(seconds,60)
- if hours != 0:
- timestamp = str(hours) + ":"
- timestamp += str(minutes).zfill(2) # zfill pads with zeros
- else:
- timestamp = str(minutes)
-
- timestamp += ":" + str(seconds).zfill(2)
- return timestamp
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-def medium_playlist_item_info(playlist_renderer):
- renderer = playlist_renderer
- try:
- author_url = URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- except KeyError: # radioRenderer
- author_url = ''
- try:
- thumbnail = renderer['thumbnails'][0]['thumbnails'][0]['url']
- except KeyError:
- thumbnail = renderer['thumbnail']['thumbnails'][0]['url']
- return {
- "title": renderer["title"]["simpleText"],
- 'id': renderer["playlistId"],
- 'size': renderer.get('videoCount', '50+'),
- "author": default_multi_get(renderer,'longBylineText','runs',0,'text', default='Youtube'),
- "author_url": author_url,
- 'thumbnail': thumbnail,
- }
-
-def medium_video_item_info(video_renderer):
- renderer = video_renderer
- try:
- return {
- "title": renderer["title"]["simpleText"],
- "id": renderer["videoId"],
- "description": renderer.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
- "thumbnail": get_thumbnail_url(renderer["videoId"]),
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "author": renderer['longBylineText']['runs'][0]['text'],
- "author_url": URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
- }
- except KeyError:
- print(renderer)
- raise
-
-def small_video_item_info(compact_video_renderer):
- renderer = compact_video_renderer
- return {
- "title": renderer['title']['simpleText'],
- "id": renderer['videoId'],
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "author": renderer['longBylineText']['runs'][0]['text'],
- "author_url": renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- }
-
-
-# -----
-# HTML
-# -----
-
-def small_video_item_html(item):
- video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
- return small_video_item_template.substitute(
- title = html.escape(item["title"]),
- views = item["views"],
- author = html.escape(item["author"]),
- duration = item["duration"],
- url = URL_ORIGIN + "/watch?v=" + item["id"],
- thumbnail = get_thumbnail_url(item['id']),
- video_info = html.escape(json.dumps(video_info)),
- )
-
-def small_playlist_item_html(item):
- return small_playlist_item_template.substitute(
- title=html.escape(item["title"]),
- size = item['size'],
- author="",
- url = URL_ORIGIN + "/playlist?list=" + item["id"],
- thumbnail= get_thumbnail_url(item['first_video_id']),
- )
-
-def medium_playlist_item_html(item):
- return medium_playlist_item_template.substitute(
- title=html.escape(item["title"]),
- size = item['size'],
- author=item['author'],
- author_url= URL_ORIGIN + item['author_url'],
- url = URL_ORIGIN + "/playlist?list=" + item["id"],
- thumbnail= item['thumbnail'],
- )
-
-def medium_video_item_html(medium_video_info):
- info = medium_video_info
-
- return medium_video_item_template.substitute(
- title=html.escape(info["title"]),
- views=info["views"],
- published = info["published"],
- description = format_text_runs(info["description"]),
- author=html.escape(info["author"]),
- author_url=info["author_url"],
- duration=info["duration"],
- url = URL_ORIGIN + "/watch?v=" + info["id"],
- thumbnail=info['thumbnail'],
- datetime='', # TODO
- )
-
-html_functions = {
- 'compactVideoRenderer': lambda x: small_video_item_html(small_video_item_info(x)),
- 'videoRenderer': lambda x: medium_video_item_html(medium_video_item_info(x)),
- 'compactPlaylistRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
- 'playlistRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
- 'channelRenderer': lambda x: '',
- 'radioRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
- 'compactRadioRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
- 'didYouMeanRenderer': lambda x: '',
-}
-
-
-
-
-
-
-
-def get_url(node):
- try:
- return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- except KeyError:
- return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
-
-
-def get_text(node):
- try:
- return node['simpleText']
- except KeyError:
- return node['runs'][0]['text']
-
-def get_formatted_text(node):
- try:
- return node['runs']
- except KeyError:
- return node['simpleText']
-
-def get_badges(node):
- badges = []
- for badge_node in node:
- badge = badge_node['metadataBadgeRenderer']['label']
- if badge.lower() != 'new':
- badges.append(badge)
- return badges
-
-def get_thumbnail(node):
- try:
- return node['thumbnails'][0]['url'] # polymer format
- except KeyError:
- return node['url'] # ajax format
-
-dispatch = {
-
-# polymer format
- 'title': ('title', get_text),
- 'publishedTimeText': ('published', get_text),
- 'videoId': ('id', lambda node: node),
- 'descriptionSnippet': ('description', get_formatted_text),
- 'lengthText': ('duration', get_text),
- 'thumbnail': ('thumbnail', get_thumbnail),
- 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
-
- 'videoCountText': ('size', get_text),
- 'playlistId': ('id', lambda node: node),
-
- 'subscriberCountText': ('subscriber_count', get_text),
- 'channelId': ('id', lambda node: node),
- 'badges': ('badges', get_badges),
-
-# ajax format
- 'view_count_text': ('views', get_text),
- 'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
- 'owner_text': ('author', get_text),
- 'owner_endpoint': ('author_url', lambda node: node['url']),
- 'description': ('description', get_formatted_text),
- 'index': ('playlist_index', get_text),
- 'short_byline': ('author', get_text),
- 'length': ('duration', get_text),
- 'video_id': ('id', lambda node: node),
-
-}
-
-def renderer_info(renderer):
- try:
- info = {}
- if 'viewCountText' in renderer: # prefer this one as it contains all the digits
- info['views'] = get_text(renderer['viewCountText'])
- elif 'shortViewCountText' in renderer:
- info['views'] = get_text(renderer['shortViewCountText'])
-
- for key, node in renderer.items():
- if key in ('longBylineText', 'shortBylineText'):
- info['author'] = get_text(node)
- try:
- info['author_url'] = get_url(node)
- except KeyError:
- pass
-
- continue
-
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(renderer)
- raise
-
-def ajax_info(item_json):
- try:
- info = {}
- for key, node in item_json.items():
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(item_json)
- raise
-
-def badges_html(badges):
- return ' | '.join(map(html.escape, badges))
-
-
-
-
-
-html_transform_dispatch = {
- 'title': html.escape,
- 'published': html.escape,
- 'id': html.escape,
- 'description': format_text_runs,
- 'duration': html.escape,
- 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
- 'size': html.escape,
- 'author': html.escape,
- 'author_url': lambda url: html.escape(URL_ORIGIN + url),
- 'views': html.escape,
- 'subscriber_count': html.escape,
- 'badges': badges_html,
- 'playlist_index': html.escape,
-}
-
-def get_html_ready(item):
- html_ready = {}
- for key, value in item.items():
- try:
- function = html_transform_dispatch[key]
- except KeyError:
- continue
- html_ready[key] = function(value)
- return html_ready
-
-
-author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''')
-author_template = Template('''<address>By $author</address>''')
-stat_templates = (
- Template('''<span class="views">$views</span>'''),
- Template('''<time datetime="$datetime">$published</time>'''),
-)
-def get_video_stats(html_ready):
- stats = []
- if 'author' in html_ready:
- if 'author_url' in html_ready:
- stats.append(author_template_url.substitute(html_ready))
- else:
- stats.append(author_template.substitute(html_ready))
- for stat in stat_templates:
- try:
- stats.append(stat.strict_substitute(html_ready))
- except KeyError:
- pass
- return ' | '.join(stats)
-
-def video_item_html(item, template):
- html_ready = get_html_ready(item)
- video_info = {}
- for key in ('id', 'title', 'author'):
- try:
- video_info[key] = html_ready[key]
- except KeyError:
- video_info[key] = ''
- try:
- video_info['duration'] = html_ready['duration']
- except KeyError:
- video_info['duration'] = 'Live' # livestreams don't have a duration
-
- html_ready['video_info'] = html.escape(json.dumps(video_info) )
- html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
- html_ready['datetime'] = '' #TODO
-
- html_ready['stats'] = get_video_stats(html_ready)
-
- return template.substitute(html_ready)
-
-
-def playlist_item_html(item, template):
- html_ready = get_html_ready(item)
-
- html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
- html_ready['datetime'] = '' #TODO
- return template.substitute(html_ready)
-
-
-
-
-
-
-def make_query_string(query_string):
- return '&'.join(key + '=' + ','.join(values) for key,values in query_string.items())
-
-def update_query_string(query_string, items):
- parameters = urllib.parse.parse_qs(query_string)
- parameters.update(items)
- return make_query_string(parameters)
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-
-def page_buttons_html(current_page, estimated_pages, url, current_query_string):
- if current_page <= 5:
- page_start = 1
- page_end = min(9, estimated_pages)
- else:
- page_start = current_page - 4
- page_end = min(current_page + 4, estimated_pages)
-
- result = ""
- for page in range(page_start, page_end+1):
- if page == current_page:
- template = current_page_button_template
- else:
- template = page_button_template
- result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
- return result
-
-
-
-
-
-
-
-showing_results_for = Template('''
- <div class="showing-results-for">
- <div>Showing results for <a>$corrected_query</a></div>
- <div>Search instead for <a href="$original_query_url">$original_query</a></div>
- </div>
-''')
-
-did_you_mean = Template('''
- <div class="did-you-mean">
- <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
- </div>
-''')
-
-def renderer_html(renderer, additional_info={}, current_query_string=''):
- type = list(renderer.keys())[0]
- renderer = renderer[type]
- if type in ('videoRenderer', 'playlistRenderer', 'radioRenderer', 'compactVideoRenderer', 'compactPlaylistRenderer', 'compactRadioRenderer', 'gridVideoRenderer', 'gridPlaylistRenderer', 'gridRadioRenderer'):
- info = renderer_info(renderer)
- info.update(additional_info)
- if type == 'compactVideoRenderer':
- return video_item_html(info, small_video_item_template)
- if type in ('compactPlaylistRenderer', 'compactRadioRenderer'):
- return playlist_item_html(info, small_playlist_item_template)
- if type in ('videoRenderer', 'gridVideoRenderer'):
- return video_item_html(info, medium_video_item_template)
- if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer'):
- return playlist_item_html(info, medium_playlist_item_template)
-
- if type == 'channelRenderer':
- info = renderer_info(renderer)
- html_ready = get_html_ready(info)
- html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
- return medium_channel_item_template.substitute(html_ready)
-
- if type == 'movieRenderer':
- return ''
- print(renderer)
- raise NotImplementedError('Unknown renderer type: ' + type)
-
-
-'videoRenderer'
-'playlistRenderer'
-'channelRenderer'
-'radioRenderer'
-'gridVideoRenderer'
-'gridPlaylistRenderer'
-
-'didYouMeanRenderer'
-'showingResultsForRenderer'
+from youtube.template import Template +import html +import json +import re +import urllib.parse +import gzip +import brotli +import time + + +URL_ORIGIN = "/https://www.youtube.com" + + +# videos (all of type str): + +# id +# title +# url +# author +# author_url +# thumbnail +# description +# published +# duration +# likes +# dislikes +# views +# playlist_index + +# playlists: + +# id +# title +# url +# author +# author_url +# thumbnail +# description +# updated +# size +# first_video_id + + + + + + + +page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') +current_page_button_template = Template('''<div class="current-page-button">$page</a>''') + +medium_playlist_item_template = Template(''' + <div class="medium-item"> + <a class="playlist-thumbnail-box" href="$url" title="$title"> + <img class="playlist-thumbnail-img" src="$thumbnail"> + <div class="playlist-thumbnail-info"> + <span>$size</span> + </div> + </a> + + <a class="title" href="$url" title=$title>$title</a> + + <address><a href="$author_url">$author</a></address> + </div> +''') +medium_video_item_template = Template(''' + <div class="medium-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + + <a class="title" href="$url">$title</a> + + <div class="stats">$stats</div> + <!-- + <address><a href="$author_url">$author</a></address> + <span class="views">$views</span> + <time datetime="$datetime">Uploaded $published</time>--> + + <span class="description">$description</span> + <span class="badges">$badges</span> + </div> +''') + +small_video_item_template = Template(''' + <div class="small-item-box"> + <div class="small-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + <a class="title" href="$url" title="$title">$title</a> + + <address>$author</address> + <span class="views">$views</span> + + </div> + <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add"> + </div> +''') + +small_playlist_item_template = Template(''' + <div class="small-item-box"> + <div class="small-item"> + <a class="playlist-thumbnail-box" href="$url" title="$title"> + <img class="playlist-thumbnail-img" src="$thumbnail"> + <div class="playlist-thumbnail-info"> + <span>$size</span> + </div> + </a> + <a class="title" href="$url" title="$title">$title</a> + + <address>$author</address> + </div> + </div> +''') + +medium_channel_item_template = Template(''' + <div class="medium-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + + <a class="title" href="$url">$title</a> + + <span>$subscriber_count</span> + <span>$size</span> + + <span class="description">$description</span> + </div> +''') + + +def fetch_url(url, headers=(), timeout=5, report_text=None): + if isinstance(headers, list): + headers += [('Accept-Encoding', 'gzip, br')] + headers = dict(headers) + elif isinstance(headers, tuple): + headers += (('Accept-Encoding', 'gzip, br'),) + headers = dict(headers) + else: + headers = headers.copy() + headers['Accept-Encoding'] = 'gzip, br' + + start_time = time.time() + + req = urllib.request.Request(url, headers=headers) + response = urllib.request.urlopen(req, timeout=timeout) + response_time = time.time() + + content = response.read() + read_finish = time.time() + if report_text: + print(report_text, 'Latency:', response_time - start_time, ' Read time:', read_finish - response_time) + encodings = response.getheader('Content-Encoding', default='identity').replace(' ', '').split(',') + for encoding in reversed(encodings): + if encoding == 'identity': + continue + if encoding == 'br': + content = brotli.decompress(content) + elif encoding == 'gzip': + content = gzip.decompress(content) + return content + +mobile_ua = (('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'),) + +def dict_add(*dicts): + for dictionary in dicts[1:]: + dicts[0].update(dictionary) + return dicts[0] + +def video_id(url): + url_parts = urllib.parse.urlparse(url) + return urllib.parse.parse_qs(url_parts.query)['v'][0] + +def uppercase_escape(s): + return re.sub( + r'\\U([0-9a-fA-F]{8})', + lambda m: chr(int(m.group(1), base=16)), s) + +def default_multi_get(object, *keys, default): + ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors ''' + try: + for key in keys: + object = object[key] + return object + except (IndexError, KeyError): + return default + +def get_plain_text(node): + try: + return html.escape(node['simpleText']) + except KeyError: + return unformmated_text_runs(node['runs']) + +def unformmated_text_runs(runs): + result = '' + for text_run in runs: + result += html.escape(text_run["text"]) + return result + +def format_text_runs(runs): + if isinstance(runs, str): + return runs + result = '' + for text_run in runs: + if text_run.get("bold", False): + result += "<b>" + html.escape(text_run["text"]) + "</b>" + elif text_run.get('italics', False): + result += "<i>" + html.escape(text_run["text"]) + "</i>" + else: + result += html.escape(text_run["text"]) + return result + +# default, sddefault, mqdefault, hqdefault, hq720 +def get_thumbnail_url(video_id): + return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + +def seconds_to_timestamp(seconds): + seconds = int(seconds) + hours, seconds = divmod(seconds,3600) + minutes, seconds = divmod(seconds,60) + if hours != 0: + timestamp = str(hours) + ":" + timestamp += str(minutes).zfill(2) # zfill pads with zeros + else: + timestamp = str(minutes) + + timestamp += ":" + str(seconds).zfill(2) + return timestamp + +# playlists: + +# id +# title +# url +# author +# author_url +# thumbnail +# description +# updated +# size +# first_video_id +def medium_playlist_item_info(playlist_renderer): + renderer = playlist_renderer + try: + author_url = URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + except KeyError: # radioRenderer + author_url = '' + try: + thumbnail = renderer['thumbnails'][0]['thumbnails'][0]['url'] + except KeyError: + thumbnail = renderer['thumbnail']['thumbnails'][0]['url'] + return { + "title": renderer["title"]["simpleText"], + 'id': renderer["playlistId"], + 'size': renderer.get('videoCount', '50+'), + "author": default_multi_get(renderer,'longBylineText','runs',0,'text', default='Youtube'), + "author_url": author_url, + 'thumbnail': thumbnail, + } + +def medium_video_item_info(video_renderer): + renderer = video_renderer + try: + return { + "title": renderer["title"]["simpleText"], + "id": renderer["videoId"], + "description": renderer.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text + "thumbnail": get_thumbnail_url(renderer["videoId"]), + "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'], + "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "author": renderer['longBylineText']['runs'][0]['text'], + "author_url": URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''), + } + except KeyError: + print(renderer) + raise + +def small_video_item_info(compact_video_renderer): + renderer = compact_video_renderer + return { + "title": renderer['title']['simpleText'], + "id": renderer['videoId'], + "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'], + "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "author": renderer['longBylineText']['runs'][0]['text'], + "author_url": renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + } + + +# ----- +# HTML +# ----- + +def small_video_item_html(item): + video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')}) + return small_video_item_template.substitute( + title = html.escape(item["title"]), + views = item["views"], + author = html.escape(item["author"]), + duration = item["duration"], + url = URL_ORIGIN + "/watch?v=" + item["id"], + thumbnail = get_thumbnail_url(item['id']), + video_info = html.escape(json.dumps(video_info)), + ) + +def small_playlist_item_html(item): + return small_playlist_item_template.substitute( + title=html.escape(item["title"]), + size = item['size'], + author="", + url = URL_ORIGIN + "/playlist?list=" + item["id"], + thumbnail= get_thumbnail_url(item['first_video_id']), + ) + +def medium_playlist_item_html(item): + return medium_playlist_item_template.substitute( + title=html.escape(item["title"]), + size = item['size'], + author=item['author'], + author_url= URL_ORIGIN + item['author_url'], + url = URL_ORIGIN + "/playlist?list=" + item["id"], + thumbnail= item['thumbnail'], + ) + +def medium_video_item_html(medium_video_info): + info = medium_video_info + + return medium_video_item_template.substitute( + title=html.escape(info["title"]), + views=info["views"], + published = info["published"], + description = format_text_runs(info["description"]), + author=html.escape(info["author"]), + author_url=info["author_url"], + duration=info["duration"], + url = URL_ORIGIN + "/watch?v=" + info["id"], + thumbnail=info['thumbnail'], + datetime='', # TODO + ) + +html_functions = { + 'compactVideoRenderer': lambda x: small_video_item_html(small_video_item_info(x)), + 'videoRenderer': lambda x: medium_video_item_html(medium_video_item_info(x)), + 'compactPlaylistRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)), + 'playlistRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)), + 'channelRenderer': lambda x: '', + 'radioRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)), + 'compactRadioRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)), + 'didYouMeanRenderer': lambda x: '', +} + + + + + + + +def get_url(node): + try: + return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + except KeyError: + return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + + +def get_text(node): + try: + return node['simpleText'] + except KeyError: + return node['runs'][0]['text'] + +def get_formatted_text(node): + try: + return node['runs'] + except KeyError: + return node['simpleText'] + +def get_badges(node): + badges = [] + for badge_node in node: + badge = badge_node['metadataBadgeRenderer']['label'] + if badge.lower() != 'new': + badges.append(badge) + return badges + +def get_thumbnail(node): + try: + return node['thumbnails'][0]['url'] # polymer format + except KeyError: + return node['url'] # ajax format + +dispatch = { + +# polymer format + 'title': ('title', get_text), + 'publishedTimeText': ('published', get_text), + 'videoId': ('id', lambda node: node), + 'descriptionSnippet': ('description', get_formatted_text), + 'lengthText': ('duration', get_text), + 'thumbnail': ('thumbnail', get_thumbnail), + 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']), + + 'videoCountText': ('size', get_text), + 'playlistId': ('id', lambda node: node), + + 'subscriberCountText': ('subscriber_count', get_text), + 'channelId': ('id', lambda node: node), + 'badges': ('badges', get_badges), + +# ajax format + 'view_count_text': ('views', get_text), + 'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]), + 'owner_text': ('author', get_text), + 'owner_endpoint': ('author_url', lambda node: node['url']), + 'description': ('description', get_formatted_text), + 'index': ('playlist_index', get_text), + 'short_byline': ('author', get_text), + 'length': ('duration', get_text), + 'video_id': ('id', lambda node: node), + +} + +def renderer_info(renderer): + try: + info = {} + if 'viewCountText' in renderer: # prefer this one as it contains all the digits + info['views'] = get_text(renderer['viewCountText']) + elif 'shortViewCountText' in renderer: + info['views'] = get_text(renderer['shortViewCountText']) + + for key, node in renderer.items(): + if key in ('longBylineText', 'shortBylineText'): + info['author'] = get_text(node) + try: + info['author_url'] = get_url(node) + except KeyError: + pass + + continue + + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(renderer) + raise + +def ajax_info(item_json): + try: + info = {} + for key, node in item_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(item_json) + raise + +def badges_html(badges): + return ' | '.join(map(html.escape, badges)) + + + + + +html_transform_dispatch = { + 'title': html.escape, + 'published': html.escape, + 'id': html.escape, + 'description': format_text_runs, + 'duration': html.escape, + 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')), + 'size': html.escape, + 'author': html.escape, + 'author_url': lambda url: html.escape(URL_ORIGIN + url), + 'views': html.escape, + 'subscriber_count': html.escape, + 'badges': badges_html, + 'playlist_index': html.escape, +} + +def get_html_ready(item): + html_ready = {} + for key, value in item.items(): + try: + function = html_transform_dispatch[key] + except KeyError: + continue + html_ready[key] = function(value) + return html_ready + + +author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''') +author_template = Template('''<address>By $author</address>''') +stat_templates = ( + Template('''<span class="views">$views</span>'''), + Template('''<time datetime="$datetime">$published</time>'''), +) +def get_video_stats(html_ready): + stats = [] + if 'author' in html_ready: + if 'author_url' in html_ready: + stats.append(author_template_url.substitute(html_ready)) + else: + stats.append(author_template.substitute(html_ready)) + for stat in stat_templates: + try: + stats.append(stat.strict_substitute(html_ready)) + except KeyError: + pass + return ' | '.join(stats) + +def video_item_html(item, template): + html_ready = get_html_ready(item) + video_info = {} + for key in ('id', 'title', 'author'): + try: + video_info[key] = html_ready[key] + except KeyError: + video_info[key] = '' + try: + video_info['duration'] = html_ready['duration'] + except KeyError: + video_info['duration'] = 'Live' # livestreams don't have a duration + + html_ready['video_info'] = html.escape(json.dumps(video_info) ) + html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id'] + html_ready['datetime'] = '' #TODO + + html_ready['stats'] = get_video_stats(html_ready) + + return template.substitute(html_ready) + + +def playlist_item_html(item, template): + html_ready = get_html_ready(item) + + html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id'] + html_ready['datetime'] = '' #TODO + return template.substitute(html_ready) + + + + + + +def make_query_string(query_string): + return '&'.join(key + '=' + ','.join(values) for key,values in query_string.items()) + +def update_query_string(query_string, items): + parameters = urllib.parse.parse_qs(query_string) + parameters.update(items) + return make_query_string(parameters) + +page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') +current_page_button_template = Template('''<div class="page-button">$page</div>''') + +def page_buttons_html(current_page, estimated_pages, url, current_query_string): + if current_page <= 5: + page_start = 1 + page_end = min(9, estimated_pages) + else: + page_start = current_page - 4 + page_end = min(current_page + 4, estimated_pages) + + result = "" + for page in range(page_start, page_end+1): + if page == current_page: + template = current_page_button_template + else: + template = page_button_template + result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) ) + return result + + + + + + + +showing_results_for = Template(''' + <div class="showing-results-for"> + <div>Showing results for <a>$corrected_query</a></div> + <div>Search instead for <a href="$original_query_url">$original_query</a></div> + </div> +''') + +did_you_mean = Template(''' + <div class="did-you-mean"> + <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> + </div> +''') + +def renderer_html(renderer, additional_info={}, current_query_string=''): + type = list(renderer.keys())[0] + renderer = renderer[type] + if type in ('videoRenderer', 'playlistRenderer', 'radioRenderer', 'compactVideoRenderer', 'compactPlaylistRenderer', 'compactRadioRenderer', 'gridVideoRenderer', 'gridPlaylistRenderer', 'gridRadioRenderer'): + info = renderer_info(renderer) + info.update(additional_info) + if type == 'compactVideoRenderer': + return video_item_html(info, small_video_item_template) + if type in ('compactPlaylistRenderer', 'compactRadioRenderer'): + return playlist_item_html(info, small_playlist_item_template) + if type in ('videoRenderer', 'gridVideoRenderer'): + return video_item_html(info, medium_video_item_template) + if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer'): + return playlist_item_html(info, medium_playlist_item_template) + + if type == 'channelRenderer': + info = renderer_info(renderer) + html_ready = get_html_ready(info) + html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id'] + return medium_channel_item_template.substitute(html_ready) + + if type == 'movieRenderer': + return '' + print(renderer) + raise NotImplementedError('Unknown renderer type: ' + type) + + +'videoRenderer' +'playlistRenderer' +'channelRenderer' +'radioRenderer' +'gridVideoRenderer' +'gridPlaylistRenderer' + +'didYouMeanRenderer' +'showingResultsForRenderer' diff --git a/youtube/opensearch.xml b/youtube/opensearch.xml index 1764138..c9de40c 100644 --- a/youtube/opensearch.xml +++ b/youtube/opensearch.xml @@ -1,11 +1,11 @@ -<SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/">
-<ShortName>Youtube local</ShortName>
-<Description>no CIA shit in the background</Description>
-<InputEncoding>UTF-8</InputEncoding>
-<Image width="16" height="16"></Image>
-
-<Url type="text/html" method="GET" template="http://localhost/youtube.com/search">
- <Param name="query" value="{searchTerms}"/>
-</Url>
-<SearchForm>http://localhost/youtube.com/search</SearchForm>
+<SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/"> +<ShortName>Youtube local</ShortName> +<Description>no CIA shit in the background</Description> +<InputEncoding>UTF-8</InputEncoding> +<Image width="16" height="16"></Image> + +<Url type="text/html" method="GET" template="http://localhost/youtube.com/search"> + <Param name="query" value="{searchTerms}"/> +</Url> +<SearchForm>http://localhost/youtube.com/search</SearchForm> </SearchPlugin>
\ No newline at end of file diff --git a/youtube/playlist.py b/youtube/playlist.py index fc09191..592d1b4 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -1,243 +1,243 @@ -import base64
-import youtube.common as common
-import urllib
-import json
-from string import Template
-import youtube.proto as proto
-import gevent
-import math
-
-with open("yt_playlist_template.html", "r") as file:
- yt_playlist_template = Template(file.read())
-
-
-
-
-
-
-def youtube_obfuscated_endian(offset):
- if offset < 128:
- return bytes((offset,))
- first_byte = 255 & offset
- second_byte = 255 & (offset >> 7)
- second_byte = second_byte | 1
-
- # The next 2 bytes encode the offset in little endian order,
- # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
- # of the offset. Instead, to get the number which the two bytes encode, that LSB
- # of the second byte is combined with the most significant bit (MSB) of the first byte
- # in a logical AND. Replace the two bits with the result of the AND to get the two little endian
- # bytes that represent the offset.
-
- return bytes((first_byte, second_byte))
-
-
-
-# just some garbage that's required, don't know what it means, if it means anything.
-ctoken_header = b'\xe2\xa9\x85\xb2\x02' # e2 a9 85 b2 02
-
-def byte(x):
- return bytes((x,))
-
-# TL;DR: the offset is hidden inside 3 nested base 64 encodes with random junk data added on the side periodically
-def create_ctoken(playlist_id, offset):
- obfuscated_offset = b'\x08' + youtube_obfuscated_endian(offset) # 0x08 slapped on for no apparent reason
- obfuscated_offset = b'PT:' + base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'')
- obfuscated_offset = b'z' + byte(len(obfuscated_offset)) + obfuscated_offset
- obfuscated_offset = base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'%3D')
-
- playlist_bytes = b'VL' + bytes(playlist_id, 'ascii')
- main_info = b'\x12' + byte(len(playlist_bytes)) + playlist_bytes + b'\x1a' + byte(len(obfuscated_offset)) + obfuscated_offset
-
- ctoken = base64.urlsafe_b64encode(ctoken_header + byte(len(main_info)) + main_info)
-
- return ctoken.decode('ascii')
-
-def playlist_ctoken(playlist_id, offset):
-
- offset = proto.uint(1, offset)
- # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
- offset = b'PT:' + proto.unpadded_b64encode(offset)
- offset = proto.string(15, offset)
-
- continuation_info = proto.string( 3, proto.percent_b64encode(offset) )
-
- playlist_id = proto.string(2, 'VL' + playlist_id )
- pointless_nest = proto.string(80226972, playlist_id + continuation_info)
-
- return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
-
-# initial request types:
-# polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
-# ajax json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418
-
-
-# continuation request types:
-# polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
-# ajax json: https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]
-
-
-headers_1 = (
- ('Accept', '*/*'),
- ('Accept-Language', 'en-US,en;q=0.5'),
- ('X-YouTube-Client-Name', '1'),
- ('X-YouTube-Client-Version', '2.20180614'),
-)
-
-def playlist_first_page(playlist_id):
- url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
- content = common.fetch_url(url, common.mobile_ua + headers_1)
- if content[0:4] == b")]}'":
- content = content[4:]
- content = json.loads(common.uppercase_escape(content.decode('utf-8')))
- return content
-
-ajax_info_dispatch = {
- 'view_count_text': ('views', common.get_text),
- 'num_videos_text': ('size', lambda node: common.get_text(node).split(' ')[0]),
- 'thumbnail': ('thumbnail', lambda node: node.url),
- 'title': ('title', common.get_text),
- 'owner_text': ('author', common.get_text),
- 'owner_endpoint': ('author_url', lambda node: node.url),
- 'description': ('description', common.get_formatted_text),
-
-}
-def metadata_info(ajax_json):
- info = {}
- try:
- for key, node in ajax_json.items():
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except (KeyError,IndexError):
- print(ajax_json)
- raise
-
-
-
-
-#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
-def get_videos_ajax(playlist_id, page):
-
- url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20)
- headers = {
- 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '2',
- 'X-YouTube-Client-Version': '1.20180508',
- }
- print("Sending playlist ajax request")
- content = common.fetch_url(url, headers)
- with open('playlist_debug', 'wb') as f:
- f.write(content)
- content = content[4:]
- print("Finished recieving playlist response")
-
- info = json.loads(common.uppercase_escape(content.decode('utf-8')))
- return info
-
-def get_playlist_videos(ajax_json):
- videos = []
- #info = get_bloated_playlist_videos(playlist_id, page)
- #print(info)
- video_list = ajax_json['content']['continuation_contents']['contents']
-
-
- for video_json_crap in video_list:
- try:
- videos.append({
- "title": video_json_crap["title"]['runs'][0]['text'],
- "id": video_json_crap["video_id"],
- "views": "",
- "duration": common.default_multi_get(video_json_crap, 'length', 'runs', 0, 'text', default=''), # livestreams dont have a length
- "author": video_json_crap['short_byline']['runs'][0]['text'],
- "author_url": '',
- "published": '',
- 'playlist_index': '',
-
- })
- except (KeyError, IndexError):
- print(video_json_crap)
- raise
- return videos
-
-def get_playlist_videos_format2(playlist_id, page):
- videos = []
- info = get_bloated_playlist_videos(playlist_id, page)
- video_list = info['response']['continuationContents']['playlistVideoListContinuation']['contents']
-
- for video_json_crap in video_list:
-
- video_json_crap = video_json_crap['videoRenderer']
-
- try:
- videos.append({
- "title": video_json_crap["title"]['runs'][0]['text'],
- "video_id": video_json_crap["videoId"],
- "views": "",
- "duration": common.default_multi_get(video_json_crap, 'lengthText', 'runs', 0, 'text', default=''), # livestreams dont have a length
- "uploader": video_json_crap['shortBylineText']['runs'][0]['text'],
- "uploader_url": common.ORIGIN_URL + video_json_crap['shortBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published": common.default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
- 'playlist_index': video_json_crap['index']['runs'][0]['text'],
-
- })
- except (KeyError, IndexError):
- print(video_json_crap)
- raise
- return videos
-
-
-def playlist_videos_html(ajax_json):
- result = ''
- for info in get_playlist_videos(ajax_json):
- result += common.small_video_item_html(info)
- return result
-
-playlist_stat_template = Template('''
-<div>$stat</div>''')
-def get_playlist_page(query_string):
- parameters = urllib.parse.parse_qs(query_string)
- playlist_id = parameters['list'][0]
- page = parameters.get("page", "1")[0]
- if page == "1":
- first_page_json = playlist_first_page(playlist_id)
- this_page_json = first_page_json
- else:
- tasks = (
- gevent.spawn(playlist_first_page, playlist_id ),
- gevent.spawn(get_videos_ajax, playlist_id, page)
- )
- gevent.joinall(tasks)
- first_page_json, this_page_json = tasks[0].value, tasks[1].value
-
- try:
- video_list = this_page_json['content']['section_list']['contents'][0]['contents'][0]['contents']
- except KeyError:
- video_list = this_page_json['content']['continuation_contents']['contents']
- videos_html = ''
- for video_json in video_list:
- info = common.ajax_info(video_json)
- videos_html += common.video_item_html(info, common.small_video_item_template)
-
-
- metadata = common.ajax_info(first_page_json['content']['playlist_header'])
- video_count = int(metadata['size'].replace(',', ''))
- page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", query_string)
-
- html_ready = common.get_html_ready(metadata)
- html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
-
- stats = ''
- stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
- stats += playlist_stat_template.substitute(stat=html_ready['views'])
- return yt_playlist_template.substitute(
- videos = videos_html,
- page_buttons = page_buttons,
- stats = stats,
- **html_ready
+import base64 +import youtube.common as common +import urllib +import json +from string import Template +import youtube.proto as proto +import gevent +import math + +with open("yt_playlist_template.html", "r") as file: + yt_playlist_template = Template(file.read()) + + + + + + +def youtube_obfuscated_endian(offset): + if offset < 128: + return bytes((offset,)) + first_byte = 255 & offset + second_byte = 255 & (offset >> 7) + second_byte = second_byte | 1 + + # The next 2 bytes encode the offset in little endian order, + # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part + # of the offset. Instead, to get the number which the two bytes encode, that LSB + # of the second byte is combined with the most significant bit (MSB) of the first byte + # in a logical AND. Replace the two bits with the result of the AND to get the two little endian + # bytes that represent the offset. + + return bytes((first_byte, second_byte)) + + + +# just some garbage that's required, don't know what it means, if it means anything. +ctoken_header = b'\xe2\xa9\x85\xb2\x02' # e2 a9 85 b2 02 + +def byte(x): + return bytes((x,)) + +# TL;DR: the offset is hidden inside 3 nested base 64 encodes with random junk data added on the side periodically +def create_ctoken(playlist_id, offset): + obfuscated_offset = b'\x08' + youtube_obfuscated_endian(offset) # 0x08 slapped on for no apparent reason + obfuscated_offset = b'PT:' + base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'') + obfuscated_offset = b'z' + byte(len(obfuscated_offset)) + obfuscated_offset + obfuscated_offset = base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'%3D') + + playlist_bytes = b'VL' + bytes(playlist_id, 'ascii') + main_info = b'\x12' + byte(len(playlist_bytes)) + playlist_bytes + b'\x1a' + byte(len(obfuscated_offset)) + obfuscated_offset + + ctoken = base64.urlsafe_b64encode(ctoken_header + byte(len(main_info)) + main_info) + + return ctoken.decode('ascii') + +def playlist_ctoken(playlist_id, offset): + + offset = proto.uint(1, offset) + # this is just obfuscation as far as I can tell. It doesn't even follow protobuf + offset = b'PT:' + proto.unpadded_b64encode(offset) + offset = proto.string(15, offset) + + continuation_info = proto.string( 3, proto.percent_b64encode(offset) ) + + playlist_id = proto.string(2, 'VL' + playlist_id ) + pointless_nest = proto.string(80226972, playlist_id + continuation_info) + + return base64.urlsafe_b64encode(pointless_nest).decode('ascii') + +# initial request types: +# polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 +# ajax json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418 + + +# continuation request types: +# polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1 +# ajax json: https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...] + + +headers_1 = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '1'), + ('X-YouTube-Client-Version', '2.20180614'), +) + +def playlist_first_page(playlist_id): + url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true' + content = common.fetch_url(url, common.mobile_ua + headers_1) + if content[0:4] == b")]}'": + content = content[4:] + content = json.loads(common.uppercase_escape(content.decode('utf-8'))) + return content + +ajax_info_dispatch = { + 'view_count_text': ('views', common.get_text), + 'num_videos_text': ('size', lambda node: common.get_text(node).split(' ')[0]), + 'thumbnail': ('thumbnail', lambda node: node.url), + 'title': ('title', common.get_text), + 'owner_text': ('author', common.get_text), + 'owner_endpoint': ('author_url', lambda node: node.url), + 'description': ('description', common.get_formatted_text), + +} +def metadata_info(ajax_json): + info = {} + try: + for key, node in ajax_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except (KeyError,IndexError): + print(ajax_json) + raise + + + + +#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1 +def get_videos_ajax(playlist_id, page): + + url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + headers = { + 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', + 'Accept': '*/*', + 'Accept-Language': 'en-US,en;q=0.5', + 'X-YouTube-Client-Name': '2', + 'X-YouTube-Client-Version': '1.20180508', + } + print("Sending playlist ajax request") + content = common.fetch_url(url, headers) + with open('playlist_debug', 'wb') as f: + f.write(content) + content = content[4:] + print("Finished recieving playlist response") + + info = json.loads(common.uppercase_escape(content.decode('utf-8'))) + return info + +def get_playlist_videos(ajax_json): + videos = [] + #info = get_bloated_playlist_videos(playlist_id, page) + #print(info) + video_list = ajax_json['content']['continuation_contents']['contents'] + + + for video_json_crap in video_list: + try: + videos.append({ + "title": video_json_crap["title"]['runs'][0]['text'], + "id": video_json_crap["video_id"], + "views": "", + "duration": common.default_multi_get(video_json_crap, 'length', 'runs', 0, 'text', default=''), # livestreams dont have a length + "author": video_json_crap['short_byline']['runs'][0]['text'], + "author_url": '', + "published": '', + 'playlist_index': '', + + }) + except (KeyError, IndexError): + print(video_json_crap) + raise + return videos + +def get_playlist_videos_format2(playlist_id, page): + videos = [] + info = get_bloated_playlist_videos(playlist_id, page) + video_list = info['response']['continuationContents']['playlistVideoListContinuation']['contents'] + + for video_json_crap in video_list: + + video_json_crap = video_json_crap['videoRenderer'] + + try: + videos.append({ + "title": video_json_crap["title"]['runs'][0]['text'], + "video_id": video_json_crap["videoId"], + "views": "", + "duration": common.default_multi_get(video_json_crap, 'lengthText', 'runs', 0, 'text', default=''), # livestreams dont have a length + "uploader": video_json_crap['shortBylineText']['runs'][0]['text'], + "uploader_url": common.ORIGIN_URL + video_json_crap['shortBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + "published": common.default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''), + 'playlist_index': video_json_crap['index']['runs'][0]['text'], + + }) + except (KeyError, IndexError): + print(video_json_crap) + raise + return videos + + +def playlist_videos_html(ajax_json): + result = '' + for info in get_playlist_videos(ajax_json): + result += common.small_video_item_html(info) + return result + +playlist_stat_template = Template(''' +<div>$stat</div>''') +def get_playlist_page(query_string): + parameters = urllib.parse.parse_qs(query_string) + playlist_id = parameters['list'][0] + page = parameters.get("page", "1")[0] + if page == "1": + first_page_json = playlist_first_page(playlist_id) + this_page_json = first_page_json + else: + tasks = ( + gevent.spawn(playlist_first_page, playlist_id ), + gevent.spawn(get_videos_ajax, playlist_id, page) + ) + gevent.joinall(tasks) + first_page_json, this_page_json = tasks[0].value, tasks[1].value + + try: + video_list = this_page_json['content']['section_list']['contents'][0]['contents'][0]['contents'] + except KeyError: + video_list = this_page_json['content']['continuation_contents']['contents'] + videos_html = '' + for video_json in video_list: + info = common.ajax_info(video_json) + videos_html += common.video_item_html(info, common.small_video_item_template) + + + metadata = common.ajax_info(first_page_json['content']['playlist_header']) + video_count = int(metadata['size'].replace(',', '')) + page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", query_string) + + html_ready = common.get_html_ready(metadata) + html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page) + + stats = '' + stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos') + stats += playlist_stat_template.substitute(stat=html_ready['views']) + return yt_playlist_template.substitute( + videos = videos_html, + page_buttons = page_buttons, + stats = stats, + **html_ready )
\ No newline at end of file diff --git a/youtube/proto.py b/youtube/proto.py index 9f9dbcc..6230e51 100644 --- a/youtube/proto.py +++ b/youtube/proto.py @@ -1,65 +1,65 @@ -from math import ceil
-import base64
-
-def byte(n):
- return bytes((n,))
-
-
-def varint_encode(offset):
- '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
- The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
- aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
- 1ccccccc 1bbbbbbb 0aaaaaaa
-
- This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
- See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
- needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
- encoded_bytes = bytearray(needed_bytes)
- for i in range(0, needed_bytes - 1):
- encoded_bytes[i] = (offset & 127) | 128 # 7 least significant bits
- offset = offset >> 7
- encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
-
- return bytes(encoded_bytes)
-
-
-def varint_decode(encoded):
- decoded = 0
- for i, byte in enumerate(encoded):
- decoded |= (byte & 127) << 7*i
-
- if not (byte & 128):
- break
- return decoded
-
-
-def string(field_number, data):
- data = as_bytes(data)
- return _proto_field(2, field_number, varint_encode(len(data)) + data)
-nested = string
-
-def uint(field_number, value):
- return _proto_field(0, field_number, varint_encode(value))
-
-
-
-
-def _proto_field(wire_type, field_number, data):
- ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
- return varint_encode( (field_number << 3) | wire_type) + data
-
-
-
-def percent_b64encode(data):
- return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
-
-
-def unpadded_b64encode(data):
- return base64.urlsafe_b64encode(data).replace(b'=', b'')
-
-def as_bytes(value):
- if isinstance(value, str):
- return value.encode('ascii')
- return value
-
+from math import ceil +import base64 + +def byte(n): + return bytes((n,)) + + +def varint_encode(offset): + '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one. + The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is + aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as: + 1ccccccc 1bbbbbbb 0aaaaaaa + + This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data. + See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.''' + needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case. + encoded_bytes = bytearray(needed_bytes) + for i in range(0, needed_bytes - 1): + encoded_bytes[i] = (offset & 127) | 128 # 7 least significant bits + offset = offset >> 7 + encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte + + return bytes(encoded_bytes) + + +def varint_decode(encoded): + decoded = 0 + for i, byte in enumerate(encoded): + decoded |= (byte & 127) << 7*i + + if not (byte & 128): + break + return decoded + + +def string(field_number, data): + data = as_bytes(data) + return _proto_field(2, field_number, varint_encode(len(data)) + data) +nested = string + +def uint(field_number, value): + return _proto_field(0, field_number, varint_encode(value)) + + + + +def _proto_field(wire_type, field_number, data): + ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure ''' + return varint_encode( (field_number << 3) | wire_type) + data + + + +def percent_b64encode(data): + return base64.urlsafe_b64encode(data).replace(b'=', b'%3D') + + +def unpadded_b64encode(data): + return base64.urlsafe_b64encode(data).replace(b'=', b'') + +def as_bytes(value): + if isinstance(value, str): + return value.encode('ascii') + return value +
\ No newline at end of file diff --git a/youtube/search.py b/youtube/search.py index 5268dbe..5982d9b 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -1,231 +1,231 @@ -import json
-import urllib
-import html
-from string import Template
-import base64
-from math import ceil
-from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
-import youtube.common as common
-
-with open("yt_search_results_template.html", "r") as file:
- yt_search_results_template = file.read()
-
-with open("yt_search_template.html", "r") as file:
- yt_search_template = file.read()
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-video_result_template = '''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$video_url" title="$video_title">
- <img class="video-thumbnail-img" src="$thumbnail_url">
- <span class="video-duration">$length</span>
- </a>
-
- <a class="title" href="$video_url">$video_title</a>
-
- <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address>
- <span class="views">$views</span>
-
-
- <time datetime="$datetime">Uploaded $upload_date</time>
-
- <span class="description">$description</span>
- </div>
-'''
-
-
-
-# Sort: 1
- # Upload date: 2
- # View count: 3
- # Rating: 1
-# Offset: 9
-# Filters: 2
- # Upload date: 1
- # Type: 2
- # Duration: 3
-
-
-features = {
- '4k': 14,
- 'hd': 4,
- 'hdr': 25,
- 'subtitles': 5,
- 'creative_commons': 6,
- '3d': 7,
- 'live': 8,
- 'purchased': 9,
- '360': 15,
- 'location': 23,
-}
-
-def page_number_to_sp_parameter(page):
- offset = (int(page) - 1)*20 # 20 results per page
- first_byte = 255 & offset
- second_byte = 255 & (offset >> 7)
- second_byte = second_byte | 1
-
- # 0b01001000 is required, and is always the same.
- # The next 2 bytes encode the offset in little endian order,
- # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
- # of the offset. Instead, to get the number which the two bytes encode, that LSB
- # of the second byte is combined with the most significant bit (MSB) of the first byte
- # in a logical AND. Replace the two bits with the result of the AND to get the two little endian
- # bytes that represent the offset.
- # I figured this out by trial and error on the sp parameter. I don't know why it's done like this;
- # perhaps it's just obfuscation.
- param_bytes = bytes((0b01001000, first_byte, second_byte))
- param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes))
- return param_encoded
-
-def get_search_json(query, page):
- url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
- headers = {
- 'Host': 'www.youtube.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '1',
- 'X-YouTube-Client-Version': '2.20180418',
- }
- url += "&pbj=1&sp=" + page_number_to_sp_parameter(page)
- content = common.fetch_url(url, headers=headers)
- info = json.loads(content)
- return info
-
-"""def get_search_info(query, page):
- result_info = dict()
- info = get_bloated_search_info(query, page)
-
- estimated_results = int(info[1]['response']['estimatedResults'])
- estimated_pages = ceil(estimated_results/20)
- result_info['estimated_results'] = estimated_results
- result_info['estimated_pages'] = estimated_pages
-
- result_info['results'] = []
- # this is what you get when you hire H-1B's
- video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
-
-
- for video_json_crap in video_list:
- # they have a dictionary whose only content is another dictionary...
- try:
- type = list(video_json_crap.keys())[0]
- except KeyError:
- continue #channelRenderer or playlistRenderer
- '''description = ""
- for text_run in video_json_crap["descriptionSnippet"]["runs"]:
- if text_run.get("bold", False):
- description += "<b>" + html.escape'''
- try:
- result_info['results'].append({
- "title": video_json_crap["title"]["simpleText"],
- "video_id": video_json_crap["videoId"],
- "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
- "thumbnail": get_thumbnail_url(video_json_crap["videoId"]),
- "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'],
- "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "uploader": video_json_crap['longBylineText']['runs'][0]['text'],
- "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
-
- })
- except KeyError:
- print(video_json_crap)
- raise
- return result_info"""
-
-
-def page_buttons_html(page_start, page_end, current_page, query):
- result = ""
- for page in range(page_start, page_end+1):
- if page == current_page:
- template = current_page_button_template
- else:
- template = page_button_template
- result += template.substitute(page=page, href=URL_ORIGIN + "/search?query=" + urllib.parse.quote_plus(query) + "&page=" + str(page))
- return result
-
-showing_results_for = Template('''
- <div>Showing results for <a>$corrected_query</a></div>
- <div>Search instead for <a href="$original_query_url">$original_query</a></div>
-''')
-did_you_mean = Template('''
- <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
-''')
-def get_search_page(query_string, parameters=()):
- qs_query = urllib.parse.parse_qs(query_string)
- if len(qs_query) == 0:
- return yt_search_template
- query = qs_query["query"][0]
- page = qs_query.get("page", "1")[0]
-
- info = get_search_json(query, page)
-
- estimated_results = int(info[1]['response']['estimatedResults'])
- estimated_pages = ceil(estimated_results/20)
- results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
-
- corrections = ''
- result_list_html = ""
- for renderer in results:
- type = list(renderer.keys())[0]
- if type == 'shelfRenderer':
- continue
- if type == 'didYouMeanRenderer':
- renderer = renderer[type]
- corrected_query_string = urllib.parse.parse_qs(query_string)
- corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
- corrected_query_url = URL_ORIGIN + '/search?' + common.make_query_string(corrected_query_string)
- corrections = did_you_mean.substitute(
- corrected_query_url = corrected_query_url,
- corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
- )
- continue
- if type == 'showingResultsForRenderer':
- renderer = renderer[type]
- no_autocorrect_query_string = urllib.parse.parse_qs(query_string)
- no_autocorrect_query_string['autocorrect'] = ['0']
- no_autocorrect_query_url = URL_ORIGIN + '/search?' + common.make_query_string(no_autocorrect_query_string)
- corrections = showing_results_for.substitute(
- corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
- original_query_url = no_autocorrect_query_url,
- original_query = html.escape(renderer['originalQuery']['simpleText']),
- )
- continue
- result_list_html += common.renderer_html(renderer, current_query_string=query_string)
- '''type = list(result.keys())[0]
- result = result[type]
- if type == "showingResultsForRenderer":
- url = URL_ORIGIN + "/search"
- if len(parameters) > 0:
- url += ';' + ';'.join(parameters)
- url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items())
-
- result_list_html += showing_results_for_template.substitute(
- corrected_query=common.format_text_runs(result['correctedQuery']['runs']),
-
- )
- else:
- result_list_html += common.html_functions[type](result)'''
-
- page = int(page)
- if page <= 5:
- page_start = 1
- page_end = min(9, estimated_pages)
- else:
- page_start = page - 4
- page_end = min(page + 4, estimated_pages)
-
-
- result = Template(yt_search_results_template).substitute(
- results = result_list_html,
- page_title = query + " - Search",
- search_box_value = html.escape(query),
- number_of_results = '{:,}'.format(estimated_results),
- number_of_pages = '{:,}'.format(estimated_pages),
- page_buttons = page_buttons_html(page_start, page_end, page, query),
- corrections = corrections
- )
+import json +import urllib +import html +from string import Template +import base64 +from math import ceil +from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN +import youtube.common as common + +with open("yt_search_results_template.html", "r") as file: + yt_search_results_template = file.read() + +with open("yt_search_template.html", "r") as file: + yt_search_template = file.read() + +page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') +current_page_button_template = Template('''<div class="page-button">$page</div>''') +video_result_template = ''' + <div class="medium-item"> + <a class="video-thumbnail-box" href="$video_url" title="$video_title"> + <img class="video-thumbnail-img" src="$thumbnail_url"> + <span class="video-duration">$length</span> + </a> + + <a class="title" href="$video_url">$video_title</a> + + <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address> + <span class="views">$views</span> + + + <time datetime="$datetime">Uploaded $upload_date</time> + + <span class="description">$description</span> + </div> +''' + + + +# Sort: 1 + # Upload date: 2 + # View count: 3 + # Rating: 1 +# Offset: 9 +# Filters: 2 + # Upload date: 1 + # Type: 2 + # Duration: 3 + + +features = { + '4k': 14, + 'hd': 4, + 'hdr': 25, + 'subtitles': 5, + 'creative_commons': 6, + '3d': 7, + 'live': 8, + 'purchased': 9, + '360': 15, + 'location': 23, +} + +def page_number_to_sp_parameter(page): + offset = (int(page) - 1)*20 # 20 results per page + first_byte = 255 & offset + second_byte = 255 & (offset >> 7) + second_byte = second_byte | 1 + + # 0b01001000 is required, and is always the same. + # The next 2 bytes encode the offset in little endian order, + # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part + # of the offset. Instead, to get the number which the two bytes encode, that LSB + # of the second byte is combined with the most significant bit (MSB) of the first byte + # in a logical AND. Replace the two bits with the result of the AND to get the two little endian + # bytes that represent the offset. + # I figured this out by trial and error on the sp parameter. I don't know why it's done like this; + # perhaps it's just obfuscation. + param_bytes = bytes((0b01001000, first_byte, second_byte)) + param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes)) + return param_encoded + +def get_search_json(query, page): + url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query) + headers = { + 'Host': 'www.youtube.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', + 'Accept': '*/*', + 'Accept-Language': 'en-US,en;q=0.5', + 'X-YouTube-Client-Name': '1', + 'X-YouTube-Client-Version': '2.20180418', + } + url += "&pbj=1&sp=" + page_number_to_sp_parameter(page) + content = common.fetch_url(url, headers=headers) + info = json.loads(content) + return info + +"""def get_search_info(query, page): + result_info = dict() + info = get_bloated_search_info(query, page) + + estimated_results = int(info[1]['response']['estimatedResults']) + estimated_pages = ceil(estimated_results/20) + result_info['estimated_results'] = estimated_results + result_info['estimated_pages'] = estimated_pages + + result_info['results'] = [] + # this is what you get when you hire H-1B's + video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] + + + for video_json_crap in video_list: + # they have a dictionary whose only content is another dictionary... + try: + type = list(video_json_crap.keys())[0] + except KeyError: + continue #channelRenderer or playlistRenderer + '''description = "" + for text_run in video_json_crap["descriptionSnippet"]["runs"]: + if text_run.get("bold", False): + description += "<b>" + html.escape''' + try: + result_info['results'].append({ + "title": video_json_crap["title"]["simpleText"], + "video_id": video_json_crap["videoId"], + "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text + "thumbnail": get_thumbnail_url(video_json_crap["videoId"]), + "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'], + "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "uploader": video_json_crap['longBylineText']['runs'][0]['text'], + "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''), + + }) + except KeyError: + print(video_json_crap) + raise + return result_info""" + + +def page_buttons_html(page_start, page_end, current_page, query): + result = "" + for page in range(page_start, page_end+1): + if page == current_page: + template = current_page_button_template + else: + template = page_button_template + result += template.substitute(page=page, href=URL_ORIGIN + "/search?query=" + urllib.parse.quote_plus(query) + "&page=" + str(page)) + return result + +showing_results_for = Template(''' + <div>Showing results for <a>$corrected_query</a></div> + <div>Search instead for <a href="$original_query_url">$original_query</a></div> +''') +did_you_mean = Template(''' + <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> +''') +def get_search_page(query_string, parameters=()): + qs_query = urllib.parse.parse_qs(query_string) + if len(qs_query) == 0: + return yt_search_template + query = qs_query["query"][0] + page = qs_query.get("page", "1")[0] + + info = get_search_json(query, page) + + estimated_results = int(info[1]['response']['estimatedResults']) + estimated_pages = ceil(estimated_results/20) + results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] + + corrections = '' + result_list_html = "" + for renderer in results: + type = list(renderer.keys())[0] + if type == 'shelfRenderer': + continue + if type == 'didYouMeanRenderer': + renderer = renderer[type] + corrected_query_string = urllib.parse.parse_qs(query_string) + corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] + corrected_query_url = URL_ORIGIN + '/search?' + common.make_query_string(corrected_query_string) + corrections = did_you_mean.substitute( + corrected_query_url = corrected_query_url, + corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']), + ) + continue + if type == 'showingResultsForRenderer': + renderer = renderer[type] + no_autocorrect_query_string = urllib.parse.parse_qs(query_string) + no_autocorrect_query_string['autocorrect'] = ['0'] + no_autocorrect_query_url = URL_ORIGIN + '/search?' + common.make_query_string(no_autocorrect_query_string) + corrections = showing_results_for.substitute( + corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']), + original_query_url = no_autocorrect_query_url, + original_query = html.escape(renderer['originalQuery']['simpleText']), + ) + continue + result_list_html += common.renderer_html(renderer, current_query_string=query_string) + '''type = list(result.keys())[0] + result = result[type] + if type == "showingResultsForRenderer": + url = URL_ORIGIN + "/search" + if len(parameters) > 0: + url += ';' + ';'.join(parameters) + url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items()) + + result_list_html += showing_results_for_template.substitute( + corrected_query=common.format_text_runs(result['correctedQuery']['runs']), + + ) + else: + result_list_html += common.html_functions[type](result)''' + + page = int(page) + if page <= 5: + page_start = 1 + page_end = min(9, estimated_pages) + else: + page_start = page - 4 + page_end = min(page + 4, estimated_pages) + + + result = Template(yt_search_results_template).substitute( + results = result_list_html, + page_title = query + " - Search", + search_box_value = html.escape(query), + number_of_results = '{:,}'.format(estimated_results), + number_of_pages = '{:,}'.format(estimated_pages), + page_buttons = page_buttons_html(page_start, page_end, page, query), + corrections = corrections + ) return result
\ No newline at end of file diff --git a/youtube/shared.css b/youtube/shared.css index 39e76f4..2ea511a 100644 --- a/youtube/shared.css +++ b/youtube/shared.css @@ -1,271 +1,271 @@ -h1, h2, h3, h4, h5, h6, div{
- margin:0;
- padding:0;
-
-}
-
-
-body{
- margin:0;
- padding: 0;
- color:#222;
-
-
- background-color:#cccccc;
-
- min-height:100vh;
-
- display:grid;
- grid-template-rows: 50px 1fr;
-}
-
- header{
- background-color:#333333;
-
- grid-row: 1;
- }
-
- main{
- grid-row: 2;
- }
-
-button{
- padding:0; /* Fuck browser-specific styling. Fix your shit mozilla */
-}
-address{
- font-style:normal;
-}
-#site-search{
- display: grid;
- grid-template-columns: 1fr 0fr;
-
-}
-
- #site-search .search-box{
- align-self:center;
- height:25px;
- border:0;
-
- grid-column: 1;
- }
- #site-search .search-button{
- grid-column: 2;
- align-self:center;
- height:25px;
-
- border-style:solid;
- border-width:1px;
- }
-
-
-.full-item{
- display: grid;
- grid-template-rows: 0fr 0fr 0fr 0fr 0fr;
- grid-template-columns: 1fr 1fr;
-
-}
- .full-item video{
- grid-column: 1 / span 2;
- grid-row: 1;
- }
- .full-item .title{
- grid-column: 1 / span 2;
- grid-row:2;
- min-width: 0;
- }
- .full-item address{
- grid-column: 1;
- grid-row: 3;
- justify-self: start;
- }
- .full-item .views{
- grid-column: 2;
- grid-row: 3;
- justify-self:end;
- }
- .full-item time{
- grid-column: 1;
- grid-row: 4;
- justify-self:start;
- }
- .full-item .likes-dislikes{
- grid-column: 2;
- grid-row: 4;
- justify-self:end;
- }
- .full-item .description{
- background-color:#d0d0d0;
- margin-top:8px;
- white-space: pre-line;
- min-width: 0;
-
- grid-column: 1 / span 2;
- grid-row: 5;
- }
-
-.medium-item{
- background-color:#bcbcbc;
- display: grid;
- align-content: start;
- grid-template-columns: 246px 1fr 0fr;
- grid-template-rows: 0fr 0fr 0fr 0fr 0fr 1fr;
-}
- .medium-item .title{
- grid-column:2 / span 2;
- grid-row:1;
- min-width: 0;
- }
- .medium-item address{
- display:inline;
- }
- /*.medium-item .views{
- grid-column: 3;
- grid-row: 2;
- justify-self:end;
- }
- .medium-item time{
- grid-column: 2;
- grid-row: 3;
- justify-self:start;
- }*/
- .medium-item .stats{
- grid-column: 2 / span 2;
- grid-row: 2;
- }
-
- .medium-item .description{
- grid-column: 2 / span 2;
- grid-row: 4;
- }
- .medium-item .badges{
- grid-column: 2 / span 2;
- grid-row: 5;
- }
- /* thumbnail size */
- .medium-item img{
- /*height:138px;
- width:246px;*/
- height:100%;
- justify-self:center;
- }
-
-.small-item-box{
- color: #767676;
- font-size: 12px;
-
- display:grid;
- grid-template-columns: 1fr 0fr;
- grid-template-rows: 94px;
-}
-
-.small-item{
- background-color:#bcbcbc;
- align-content: start;
- text-decoration:none;
-
- display: grid;
- grid-template-columns: 168px 1fr;
- grid-column-gap: 5px;
- grid-template-rows: 0fr 0fr 0fr 1fr;
-}
- .small-item .title{
- grid-column:2;
- grid-row:1;
- margin:0;
-
- color: #333;
- font-size: 16px;
- font-weight: 500;
- text-decoration:initial;
- min-width: 0;
- }
- .small-item address{
- grid-column: 2;
- grid-row: 2;
- justify-self: start;
- }
-
- .small-item .views{
- grid-column: 2;
- grid-row: 3;
- justify-self:start;
- }
- /* thumbnail size */
- .small-item img{
- /*height:94px;
- width:168px;*/
- height:100%;
- justify-self:center;
- }
-
-.item-checkbox{
- justify-self:start;
- align-self:center;
- height:30px;
- width:30px;
-
- grid-column: 2;
-}
-
-/* ---Thumbnails for videos---- */
-.video-thumbnail-box{
- grid-column:1;
- grid-row:1 / span 6;
-
- display:grid;
- grid-template-columns: 1fr 0fr;
-}
- .video-thumbnail-img{
- grid-column:1 / span 2;
- grid-row:1;
- }
- .video-duration{
- grid-column: 2;
- grid-row: 1;
- align-self: end;
- opacity: .8;
- color: #ffffff;
- font-size: 12px;
- background-color: #000000;
- }
-
-/* ---Thumbnails for playlists---- */
-.playlist-thumbnail-box{
- grid-column:1;
- grid-row:1 / span 5;
-
- display:grid;
- grid-template-columns: 3fr 2fr;
-}
- .playlist-thumbnail-img{
- grid-column:1 / span 2;
- grid-row:1;
- }
- .playlist-thumbnail-info{
- grid-column:2;
- grid-row:1;
-
- display: grid;
- align-items:center;
-
- text-align:center;
- white-space: pre-line;
- opacity: .8;
- color: #cfcfcf;
- background-color: #000000;
- }
-
-.page-button-row{
- justify-self:center;
- display: grid;
- grid-auto-columns: 40px;
- grid-auto-flow: column;
- height: 40px;
-}
- .page-button{
- background-color: #e9e9e9;
- border-style: outset;
- border-width: 2px;
- font-weight: bold;
- text-align: center;
+h1, h2, h3, h4, h5, h6, div{ + margin:0; + padding:0; + +} + + +body{ + margin:0; + padding: 0; + color:#222; + + + background-color:#cccccc; + + min-height:100vh; + + display:grid; + grid-template-rows: 50px 1fr; +} + + header{ + background-color:#333333; + + grid-row: 1; + } + + main{ + grid-row: 2; + } + +button{ + padding:0; /* Fuck browser-specific styling. Fix your shit mozilla */ +} +address{ + font-style:normal; +} +#site-search{ + display: grid; + grid-template-columns: 1fr 0fr; + +} + + #site-search .search-box{ + align-self:center; + height:25px; + border:0; + + grid-column: 1; + } + #site-search .search-button{ + grid-column: 2; + align-self:center; + height:25px; + + border-style:solid; + border-width:1px; + } + + +.full-item{ + display: grid; + grid-template-rows: 0fr 0fr 0fr 0fr 0fr; + grid-template-columns: 1fr 1fr; + +} + .full-item video{ + grid-column: 1 / span 2; + grid-row: 1; + } + .full-item .title{ + grid-column: 1 / span 2; + grid-row:2; + min-width: 0; + } + .full-item address{ + grid-column: 1; + grid-row: 3; + justify-self: start; + } + .full-item .views{ + grid-column: 2; + grid-row: 3; + justify-self:end; + } + .full-item time{ + grid-column: 1; + grid-row: 4; + justify-self:start; + } + .full-item .likes-dislikes{ + grid-column: 2; + grid-row: 4; + justify-self:end; + } + .full-item .description{ + background-color:#d0d0d0; + margin-top:8px; + white-space: pre-line; + min-width: 0; + + grid-column: 1 / span 2; + grid-row: 5; + } + +.medium-item{ + background-color:#bcbcbc; + display: grid; + align-content: start; + grid-template-columns: 246px 1fr 0fr; + grid-template-rows: 0fr 0fr 0fr 0fr 0fr 1fr; +} + .medium-item .title{ + grid-column:2 / span 2; + grid-row:1; + min-width: 0; + } + .medium-item address{ + display:inline; + } + /*.medium-item .views{ + grid-column: 3; + grid-row: 2; + justify-self:end; + } + .medium-item time{ + grid-column: 2; + grid-row: 3; + justify-self:start; + }*/ + .medium-item .stats{ + grid-column: 2 / span 2; + grid-row: 2; + } + + .medium-item .description{ + grid-column: 2 / span 2; + grid-row: 4; + } + .medium-item .badges{ + grid-column: 2 / span 2; + grid-row: 5; + } + /* thumbnail size */ + .medium-item img{ + /*height:138px; + width:246px;*/ + height:100%; + justify-self:center; + } + +.small-item-box{ + color: #767676; + font-size: 12px; + + display:grid; + grid-template-columns: 1fr 0fr; + grid-template-rows: 94px; +} + +.small-item{ + background-color:#bcbcbc; + align-content: start; + text-decoration:none; + + display: grid; + grid-template-columns: 168px 1fr; + grid-column-gap: 5px; + grid-template-rows: 0fr 0fr 0fr 1fr; +} + .small-item .title{ + grid-column:2; + grid-row:1; + margin:0; + + color: #333; + font-size: 16px; + font-weight: 500; + text-decoration:initial; + min-width: 0; + } + .small-item address{ + grid-column: 2; + grid-row: 2; + justify-self: start; + } + + .small-item .views{ + grid-column: 2; + grid-row: 3; + justify-self:start; + } + /* thumbnail size */ + .small-item img{ + /*height:94px; + width:168px;*/ + height:100%; + justify-self:center; + } + +.item-checkbox{ + justify-self:start; + align-self:center; + height:30px; + width:30px; + + grid-column: 2; +} + +/* ---Thumbnails for videos---- */ +.video-thumbnail-box{ + grid-column:1; + grid-row:1 / span 6; + + display:grid; + grid-template-columns: 1fr 0fr; +} + .video-thumbnail-img{ + grid-column:1 / span 2; + grid-row:1; + } + .video-duration{ + grid-column: 2; + grid-row: 1; + align-self: end; + opacity: .8; + color: #ffffff; + font-size: 12px; + background-color: #000000; + } + +/* ---Thumbnails for playlists---- */ +.playlist-thumbnail-box{ + grid-column:1; + grid-row:1 / span 5; + + display:grid; + grid-template-columns: 3fr 2fr; +} + .playlist-thumbnail-img{ + grid-column:1 / span 2; + grid-row:1; + } + .playlist-thumbnail-info{ + grid-column:2; + grid-row:1; + + display: grid; + align-items:center; + + text-align:center; + white-space: pre-line; + opacity: .8; + color: #cfcfcf; + background-color: #000000; + } + +.page-button-row{ + justify-self:center; + display: grid; + grid-auto-columns: 40px; + grid-auto-flow: column; + height: 40px; +} + .page-button{ + background-color: #e9e9e9; + border-style: outset; + border-width: 2px; + font-weight: bold; + text-align: center; }
\ No newline at end of file diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 5edf6fc..47f1ea3 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -1,18 +1,18 @@ -import urllib
-
-with open("subscriptions.txt", 'r', encoding='utf-8') as file:
- subscriptions = file.read()
-
-# Line format: "channel_id channel_name"
-# Example:
-# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown
-
-subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines())
-
-def get_new_videos():
- for channel_id, channel_name in subscriptions:
-
-
-
-
-def get_subscriptions_page():
+import urllib + +with open("subscriptions.txt", 'r', encoding='utf-8') as file: + subscriptions = file.read() + +# Line format: "channel_id channel_name" +# Example: +# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown + +subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines()) + +def get_new_videos(): + for channel_id, channel_name in subscriptions: + + + + +def get_subscriptions_page(): diff --git a/youtube/template.py b/youtube/template.py index 7f13415..b6df1ef 100644 --- a/youtube/template.py +++ b/youtube/template.py @@ -1,132 +1,132 @@ -
-import re as _re
-from collections import ChainMap as _ChainMap
-
-class _TemplateMetaclass(type):
- pattern = r"""
- %(delim)s(?:
- (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
- (?P<named>%(id)s) | # delimiter and a Python identifier
- {(?P<braced>%(id)s)} | # delimiter and a braced identifier
- (?P<invalid>) # Other ill-formed delimiter exprs
- )
- """
-
- def __init__(cls, name, bases, dct):
- super(_TemplateMetaclass, cls).__init__(name, bases, dct)
- if 'pattern' in dct:
- pattern = cls.pattern
- else:
- pattern = _TemplateMetaclass.pattern % {
- 'delim' : _re.escape(cls.delimiter),
- 'id' : cls.idpattern,
- }
- cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
-
-
-class Template(metaclass=_TemplateMetaclass):
- """A string class for supporting $-substitutions."""
-
- delimiter = '$'
- idpattern = r'[_a-z][_a-z0-9]*'
- flags = _re.IGNORECASE
-
- def __init__(self, template):
- self.template = template
-
- # Search for $$, $identifier, ${identifier}, and any bare $'s
-
- def _invalid(self, mo):
- i = mo.start('invalid')
- lines = self.template[:i].splitlines(keepends=True)
- if not lines:
- colno = 1
- lineno = 1
- else:
- colno = i - len(''.join(lines[:-1]))
- lineno = len(lines)
- raise ValueError('Invalid placeholder in string: line %d, col %d' %
- (lineno, colno))
-
- def substitute(*args, **kws):
- if not args:
- raise TypeError("descriptor 'substitute' of 'Template' object "
- "needs an argument")
- self, *args = args # allow the "self" keyword be passed
- if len(args) > 1:
- raise TypeError('Too many positional arguments')
- if not args:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, args[0])
- else:
- mapping = args[0]
- # Helper function for .sub()
- def convert(mo):
- # Check the most common path first.
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- return str(mapping.get(named,''))
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- self._invalid(mo)
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
- return self.pattern.sub(convert, self.template)
-
- def strict_substitute(*args, **kws):
- if not args:
- raise TypeError("descriptor 'substitute' of 'Template' object "
- "needs an argument")
- self, *args = args # allow the "self" keyword be passed
- if len(args) > 1:
- raise TypeError('Too many positional arguments')
- if not args:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, args[0])
- else:
- mapping = args[0]
- # Helper function for .sub()
- def convert(mo):
- # Check the most common path first.
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- return str(mapping[named])
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- self._invalid(mo)
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
- return self.pattern.sub(convert, self.template)
-
- def safe_substitute(*args, **kws):
- if not args:
- raise TypeError("descriptor 'safe_substitute' of 'Template' object "
- "needs an argument")
- self, *args = args # allow the "self" keyword be passed
- if len(args) > 1:
- raise TypeError('Too many positional arguments')
- if not args:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, args[0])
- else:
- mapping = args[0]
- # Helper function for .sub()
- def convert(mo):
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- try:
- return str(mapping[named])
- except KeyError:
- return mo.group()
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- return mo.group()
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
+ +import re as _re +from collections import ChainMap as _ChainMap + +class _TemplateMetaclass(type): + pattern = r""" + %(delim)s(?: + (?P<escaped>%(delim)s) | # Escape sequence of two delimiters + (?P<named>%(id)s) | # delimiter and a Python identifier + {(?P<braced>%(id)s)} | # delimiter and a braced identifier + (?P<invalid>) # Other ill-formed delimiter exprs + ) + """ + + def __init__(cls, name, bases, dct): + super(_TemplateMetaclass, cls).__init__(name, bases, dct) + if 'pattern' in dct: + pattern = cls.pattern + else: + pattern = _TemplateMetaclass.pattern % { + 'delim' : _re.escape(cls.delimiter), + 'id' : cls.idpattern, + } + cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) + + +class Template(metaclass=_TemplateMetaclass): + """A string class for supporting $-substitutions.""" + + delimiter = '$' + idpattern = r'[_a-z][_a-z0-9]*' + flags = _re.IGNORECASE + + def __init__(self, template): + self.template = template + + # Search for $$, $identifier, ${identifier}, and any bare $'s + + def _invalid(self, mo): + i = mo.start('invalid') + lines = self.template[:i].splitlines(keepends=True) + if not lines: + colno = 1 + lineno = 1 + else: + colno = i - len(''.join(lines[:-1])) + lineno = len(lines) + raise ValueError('Invalid placeholder in string: line %d, col %d' % + (lineno, colno)) + + def substitute(*args, **kws): + if not args: + raise TypeError("descriptor 'substitute' of 'Template' object " + "needs an argument") + self, *args = args # allow the "self" keyword be passed + if len(args) > 1: + raise TypeError('Too many positional arguments') + if not args: + mapping = kws + elif kws: + mapping = _ChainMap(kws, args[0]) + else: + mapping = args[0] + # Helper function for .sub() + def convert(mo): + # Check the most common path first. + named = mo.group('named') or mo.group('braced') + if named is not None: + return str(mapping.get(named,'')) + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + self._invalid(mo) + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return self.pattern.sub(convert, self.template) + + def strict_substitute(*args, **kws): + if not args: + raise TypeError("descriptor 'substitute' of 'Template' object " + "needs an argument") + self, *args = args # allow the "self" keyword be passed + if len(args) > 1: + raise TypeError('Too many positional arguments') + if not args: + mapping = kws + elif kws: + mapping = _ChainMap(kws, args[0]) + else: + mapping = args[0] + # Helper function for .sub() + def convert(mo): + # Check the most common path first. + named = mo.group('named') or mo.group('braced') + if named is not None: + return str(mapping[named]) + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + self._invalid(mo) + raise ValueError('Unrecognized named group in pattern', + self.pattern) + return self.pattern.sub(convert, self.template) + + def safe_substitute(*args, **kws): + if not args: + raise TypeError("descriptor 'safe_substitute' of 'Template' object " + "needs an argument") + self, *args = args # allow the "self" keyword be passed + if len(args) > 1: + raise TypeError('Too many positional arguments') + if not args: + mapping = kws + elif kws: + mapping = _ChainMap(kws, args[0]) + else: + mapping = args[0] + # Helper function for .sub() + def convert(mo): + named = mo.group('named') or mo.group('braced') + if named is not None: + try: + return str(mapping[named]) + except KeyError: + return mo.group() + if mo.group('escaped') is not None: + return self.delimiter + if mo.group('invalid') is not None: + return mo.group() + raise ValueError('Unrecognized named group in pattern', + self.pattern) return self.pattern.sub(convert, self.template)
\ No newline at end of file diff --git a/youtube/watch.py b/youtube/watch.py index b8aa17d..6e1efbc 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -1,294 +1,294 @@ -from youtube_dl.YoutubeDL import YoutubeDL
-import json
-import urllib
-from string import Template
-import html
-import youtube.common as common
-from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN
-import youtube.comments as comments
-import gevent
-
-video_height_priority = (360, 480, 240, 720, 1080)
-
-
-_formats = {
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-}
-
-
-
-
-source_tag_template = Template('''
-<source src="$src" type="$type">''')
-
-with open("yt_watch_template.html", "r") as file:
- yt_watch_template = Template(file.read())
-
-
-
-# example:
-#https://www.youtube.com/related_ajax?ctoken=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&continuation=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&itct=CCkQybcCIhMIg8PShInX2gIVgdvBCh15WA0ZKPgd
-def get_bloated_more_related_videos(video_url, related_videos_token, id_token):
- related_videos_token = urllib.parse.quote(related_videos_token)
- url = "https://www.youtube.com/related_ajax?ctoken=" + related_videos_token + "&continuation=" + related_videos_token
- headers = {
- 'Host': 'www.youtube.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'Referer': video_url,
- 'X-YouTube-Client-Name': '1',
- 'X-YouTube-Client-Version': '2.20180418',
- 'X-Youtube-Identity-Token': id_token,
-
- }
- #print(url)
- req = urllib.request.Request(url, headers=headers)
- response = urllib.request.urlopen(req, timeout = 5)
- content = response.read()
- info = json.loads(content)
- return info
-
-def get_more_related_videos_info(video_url, related_videos_token, id_token):
- results = []
- info = get_bloated_more_related_videos(video_url, related_videos_token, id_token)
- bloated_results = info[1]['response']['continuationContents']['watchNextSecondaryResultsContinuation']['results']
- for bloated_result in bloated_results:
- bloated_result = bloated_result['compactVideoRenderer']
- results.append({
- "title": bloated_result['title']['simpleText'],
- "video_id": bloated_result['videoId'],
- "views_text": bloated_result['viewCountText']['simpleText'],
- "length_text": default_multi_get(bloated_result, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "length_text": bloated_result['lengthText']['simpleText'],
- "uploader_name": bloated_result['longBylineText']['runs'][0]['text'],
- "uploader_url": bloated_result['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- })
- return results
-
-def more_related_videos_html(video_info):
- related_videos = get_related_videos(url, 1, video_info['related_videos_token'], video_info['id_token'])
-
- related_videos_html = ""
- for video in related_videos:
- related_videos_html += Template(video_related_template).substitute(
- video_title=html.escape(video["title"]),
- views=video["views_text"],
- uploader=html.escape(video["uploader_name"]),
- uploader_channel_url=video["uploader_url"],
- length=video["length_text"],
- video_url = "/youtube.com/watch?v=" + video["video_id"],
- thumbnail_url= get_thumbnail_url(video['video_id']),
- )
- return related_videos_html
-
-
-
-def get_related_items_html(info):
- result = ""
- for item in info['related_vids']:
- if 'list' in item: # playlist:
- result += common.small_playlist_item_html(watch_page_related_playlist_info(item))
- else:
- result += common.small_video_item_html(watch_page_related_video_info(item))
- return result
-
-
-# json of related items retrieved directly from the watch page has different names for everything
-# converts these to standard names
-def watch_page_related_video_info(item):
- result = {key: item[key] for key in ('id', 'title', 'author')}
- result['duration'] = common.seconds_to_timestamp(item['length_seconds'])
- try:
- result['views'] = item['short_view_count_text']
- except KeyError:
- result['views'] = ''
- return result
-
-def watch_page_related_playlist_info(item):
- return {
- 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+",
- 'title': item['playlist_title'],
- 'id': item['list'],
- 'first_video_id': item['video_id'],
- }
-
-
-def sort_formats(info):
- info['formats'].sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0))
- for index, format in enumerate(info['formats']):
- if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
- break
- info['formats'] = info['formats'][index:] + info['formats'][0:index]
- info['formats'] = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
-
-def formats_html(info):
- result = ''
- for format in info['formats']:
- result += source_tag_template.substitute(
- src=format['url'],
- type='audio/' + format['ext'] if format['vcodec'] == "none" else 'video/' + format['ext'],
- )
- return result
-
-def choose_format(info):
- suitable_formats = []
- with open('teste.txt', 'w', encoding='utf-8') as f:
- f.write(json.dumps(info['formats']))
- for format in info['formats']:
- if (format["ext"] in ("mp4", "webm")
- and format["acodec"] != "none"
- and format["vcodec"] != "none"
- and format.get("height","none") in video_height_priority):
- suitable_formats.append(format)
-
- current_best = (suitable_formats[0],video_height_priority.index(suitable_formats[0]["height"]))
- for format in suitable_formats:
- video_priority_index = video_height_priority.index(format["height"])
- if video_priority_index < current_best[1]:
- current_best = (format, video_priority_index)
- return current_best[0]
-
-more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''')
-def get_watch_page(query_string):
- id = urllib.parse.parse_qs(query_string)['v'][0]
- tasks = (
- gevent.spawn(comments.video_comments, id ),
- gevent.spawn(YoutubeDL(params={'youtube_include_dash_manifest':False}).extract_info, "https://www.youtube.com/watch?v=" + id, download=False)
- )
- gevent.joinall(tasks)
- comments_info, info = tasks[0].value, tasks[1].value
- comments_html, ctoken = comments_info
-
- if ctoken == '':
- more_comments_button = ''
- else:
- more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
- #comments_html = comments.comments_html(video_id(url))
- #info = YoutubeDL().extract_info(url, download=False)
-
- #chosen_format = choose_format(info)
- sort_formats(info)
-
-
-
- upload_year = info["upload_date"][0:4]
- upload_month = info["upload_date"][4:6]
- upload_day = info["upload_date"][6:8]
- upload_date = upload_month + "/" + upload_day + "/" + upload_year
-
- related_videos_html = get_related_items_html(info)
-
- page = yt_watch_template.substitute(
- video_title=html.escape(info["title"]),
- page_title=html.escape(info["title"]),
- uploader=html.escape(info["uploader"]),
- uploader_channel_url='/' + info["uploader_url"],
- #upload_date=datetime.datetime.fromtimestamp(info["timestamp"]).strftime("%d %b %Y %H:%M:%S"),
- upload_date = upload_date,
- views='{:,}'.format(info["view_count"]),
- likes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["like_count"]),
- dislikes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["dislike_count"]),
- description=html.escape(info["description"]),
- video_sources=formats_html(info),
- related = related_videos_html,
- comments=comments_html,
- more_comments_button = more_comments_button,
- )
+from youtube_dl.YoutubeDL import YoutubeDL +import json +import urllib +from string import Template +import html +import youtube.common as common +from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN +import youtube.comments as comments +import gevent + +video_height_priority = (360, 480, 240, 720, 1080) + + +_formats = { + '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, + '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, + '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'}, + '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'}, + '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well + '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'}, + '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'}, + '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'}, + '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'}, + '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'}, + + + # 3D videos + '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20}, + '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20}, + '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20}, + '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, + + # Apple HTTP Live Streaming + '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, + '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10}, + '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, + '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10}, + + # DASH mp4 video + '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559) + '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'}, + + # Dash mp4 audio + '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'}, + '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'}, + '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'}, + '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'}, + '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'}, + + # Dash webm + '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60}, + + # Dash webm audio + '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128}, + '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256}, + + # Dash webm audio with opus inside + '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50}, + '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70}, + '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160}, + + # RTMP (unnamed) + '_rtmp': {'protocol': 'rtmp'}, +} + + + + +source_tag_template = Template(''' +<source src="$src" type="$type">''') + +with open("yt_watch_template.html", "r") as file: + yt_watch_template = Template(file.read()) + + + +# example: +#https://www.youtube.com/related_ajax?ctoken=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&continuation=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&itct=CCkQybcCIhMIg8PShInX2gIVgdvBCh15WA0ZKPgd +def get_bloated_more_related_videos(video_url, related_videos_token, id_token): + related_videos_token = urllib.parse.quote(related_videos_token) + url = "https://www.youtube.com/related_ajax?ctoken=" + related_videos_token + "&continuation=" + related_videos_token + headers = { + 'Host': 'www.youtube.com', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', + 'Accept': '*/*', + 'Accept-Language': 'en-US,en;q=0.5', + 'Referer': video_url, + 'X-YouTube-Client-Name': '1', + 'X-YouTube-Client-Version': '2.20180418', + 'X-Youtube-Identity-Token': id_token, + + } + #print(url) + req = urllib.request.Request(url, headers=headers) + response = urllib.request.urlopen(req, timeout = 5) + content = response.read() + info = json.loads(content) + return info + +def get_more_related_videos_info(video_url, related_videos_token, id_token): + results = [] + info = get_bloated_more_related_videos(video_url, related_videos_token, id_token) + bloated_results = info[1]['response']['continuationContents']['watchNextSecondaryResultsContinuation']['results'] + for bloated_result in bloated_results: + bloated_result = bloated_result['compactVideoRenderer'] + results.append({ + "title": bloated_result['title']['simpleText'], + "video_id": bloated_result['videoId'], + "views_text": bloated_result['viewCountText']['simpleText'], + "length_text": default_multi_get(bloated_result, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "length_text": bloated_result['lengthText']['simpleText'], + "uploader_name": bloated_result['longBylineText']['runs'][0]['text'], + "uploader_url": bloated_result['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + }) + return results + +def more_related_videos_html(video_info): + related_videos = get_related_videos(url, 1, video_info['related_videos_token'], video_info['id_token']) + + related_videos_html = "" + for video in related_videos: + related_videos_html += Template(video_related_template).substitute( + video_title=html.escape(video["title"]), + views=video["views_text"], + uploader=html.escape(video["uploader_name"]), + uploader_channel_url=video["uploader_url"], + length=video["length_text"], + video_url = "/youtube.com/watch?v=" + video["video_id"], + thumbnail_url= get_thumbnail_url(video['video_id']), + ) + return related_videos_html + + + +def get_related_items_html(info): + result = "" + for item in info['related_vids']: + if 'list' in item: # playlist: + result += common.small_playlist_item_html(watch_page_related_playlist_info(item)) + else: + result += common.small_video_item_html(watch_page_related_video_info(item)) + return result + + +# json of related items retrieved directly from the watch page has different names for everything +# converts these to standard names +def watch_page_related_video_info(item): + result = {key: item[key] for key in ('id', 'title', 'author')} + result['duration'] = common.seconds_to_timestamp(item['length_seconds']) + try: + result['views'] = item['short_view_count_text'] + except KeyError: + result['views'] = '' + return result + +def watch_page_related_playlist_info(item): + return { + 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+", + 'title': item['playlist_title'], + 'id': item['list'], + 'first_video_id': item['video_id'], + } + + +def sort_formats(info): + info['formats'].sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0)) + for index, format in enumerate(info['formats']): + if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360: + break + info['formats'] = info['formats'][index:] + info['formats'][0:index] + info['formats'] = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none'] + +def formats_html(info): + result = '' + for format in info['formats']: + result += source_tag_template.substitute( + src=format['url'], + type='audio/' + format['ext'] if format['vcodec'] == "none" else 'video/' + format['ext'], + ) + return result + +def choose_format(info): + suitable_formats = [] + with open('teste.txt', 'w', encoding='utf-8') as f: + f.write(json.dumps(info['formats'])) + for format in info['formats']: + if (format["ext"] in ("mp4", "webm") + and format["acodec"] != "none" + and format["vcodec"] != "none" + and format.get("height","none") in video_height_priority): + suitable_formats.append(format) + + current_best = (suitable_formats[0],video_height_priority.index(suitable_formats[0]["height"])) + for format in suitable_formats: + video_priority_index = video_height_priority.index(format["height"]) + if video_priority_index < current_best[1]: + current_best = (format, video_priority_index) + return current_best[0] + +more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''') +def get_watch_page(query_string): + id = urllib.parse.parse_qs(query_string)['v'][0] + tasks = ( + gevent.spawn(comments.video_comments, id ), + gevent.spawn(YoutubeDL(params={'youtube_include_dash_manifest':False}).extract_info, "https://www.youtube.com/watch?v=" + id, download=False) + ) + gevent.joinall(tasks) + comments_info, info = tasks[0].value, tasks[1].value + comments_html, ctoken = comments_info + + if ctoken == '': + more_comments_button = '' + else: + more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken) + #comments_html = comments.comments_html(video_id(url)) + #info = YoutubeDL().extract_info(url, download=False) + + #chosen_format = choose_format(info) + sort_formats(info) + + + + upload_year = info["upload_date"][0:4] + upload_month = info["upload_date"][4:6] + upload_day = info["upload_date"][6:8] + upload_date = upload_month + "/" + upload_day + "/" + upload_year + + related_videos_html = get_related_items_html(info) + + page = yt_watch_template.substitute( + video_title=html.escape(info["title"]), + page_title=html.escape(info["title"]), + uploader=html.escape(info["uploader"]), + uploader_channel_url='/' + info["uploader_url"], + #upload_date=datetime.datetime.fromtimestamp(info["timestamp"]).strftime("%d %b %Y %H:%M:%S"), + upload_date = upload_date, + views='{:,}'.format(info["view_count"]), + likes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["like_count"]), + dislikes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["dislike_count"]), + description=html.escape(info["description"]), + video_sources=formats_html(info), + related = related_videos_html, + comments=comments_html, + more_comments_button = more_comments_button, + ) return page
\ No newline at end of file diff --git a/youtube/watch_later.py b/youtube/watch_later.py index 126fb6e..4bb421c 100644 --- a/youtube/watch_later.py +++ b/youtube/watch_later.py @@ -1,11 +1,11 @@ -import os.path
-import json
-watch_later_file = os.path.normpath("youtube/watch_later.txt")
-def add_to_watch_later(video_info_list):
- with open(watch_later_file, "a", encoding='utf-8') as file:
- for info in video_info_list:
- file.write(info + "\n")
-
-
-def get_watch_later_page():
+import os.path +import json +watch_later_file = os.path.normpath("youtube/watch_later.txt") +def add_to_watch_later(video_info_list): + with open(watch_later_file, "a", encoding='utf-8') as file: + for info in video_info_list: + file.write(info + "\n") + + +def get_watch_later_page(): pass
\ No newline at end of file diff --git a/youtube/youtube.py b/youtube/youtube.py index 7ec75c0..a7cc204 100644 --- a/youtube/youtube.py +++ b/youtube/youtube.py @@ -1,60 +1,60 @@ -import mimetypes
-import urllib.parse
-from youtube import watch_later, watch, search, playlist, channel, comments
-YOUTUBE_FILES = (
- "/shared.css",
- "/opensearch.xml",
- '/comments.css',
-)
-
-def youtube(env, start_response):
- path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING']
- if method == "GET":
- if path in YOUTUBE_FILES:
- with open("youtube" + path, 'rb') as f:
- mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
- start_response('200 OK', (('Content-type',mime_type),) )
- return f.read()
-
- elif path == "/comments":
- start_response('200 OK', (('Content-type','text/html'),) )
- return comments.get_comments_page(query_string).encode()
-
- elif path == "/watch":
- start_response('200 OK', (('Content-type','text/html'),) )
- return watch.get_watch_page(query_string).encode()
-
- elif path == "/search":
- start_response('200 OK', (('Content-type','text/html'),) )
- return search.get_search_page(query_string).encode()
-
- elif path == "/playlist":
- start_response('200 OK', (('Content-type','text/html'),) )
- return playlist.get_playlist_page(query_string).encode()
-
- elif path.startswith("/channel/"):
- start_response('200 OK', (('Content-type','text/html'),) )
- return channel.get_channel_page(path[9:], query_string=query_string).encode()
-
- elif path.startswith("/user/"):
- start_response('200 OK', (('Content-type','text/html'),) )
- return channel.get_user_page(path[6:], query_string=query_string).encode()
-
- else:
- start_response('404 Not Found', () )
- return b'404 Not Found'
-
- elif method == "POST":
- if path == "/edit_playlist":
- fields = urllib.parse.parse_qs(env['wsgi.input'].read().decode())
- if fields['action'][0] == 'add' and fields['playlist_name'][0] == 'watch_later':
- watch_later.add_to_watch_later(fields['video_info_list'])
-
- start_response('204 No Content', ())
- else:
- start_response('404 Not Found', ())
- return b'404 Not Found'
-
- else:
- start_response('501 Not Implemented', ())
+import mimetypes +import urllib.parse +from youtube import watch_later, watch, search, playlist, channel, comments +YOUTUBE_FILES = ( + "/shared.css", + "/opensearch.xml", + '/comments.css', +) + +def youtube(env, start_response): + path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING'] + if method == "GET": + if path in YOUTUBE_FILES: + with open("youtube" + path, 'rb') as f: + mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream' + start_response('200 OK', (('Content-type',mime_type),) ) + return f.read() + + elif path == "/comments": + start_response('200 OK', (('Content-type','text/html'),) ) + return comments.get_comments_page(query_string).encode() + + elif path == "/watch": + start_response('200 OK', (('Content-type','text/html'),) ) + return watch.get_watch_page(query_string).encode() + + elif path == "/search": + start_response('200 OK', (('Content-type','text/html'),) ) + return search.get_search_page(query_string).encode() + + elif path == "/playlist": + start_response('200 OK', (('Content-type','text/html'),) ) + return playlist.get_playlist_page(query_string).encode() + + elif path.startswith("/channel/"): + start_response('200 OK', (('Content-type','text/html'),) ) + return channel.get_channel_page(path[9:], query_string=query_string).encode() + + elif path.startswith("/user/"): + start_response('200 OK', (('Content-type','text/html'),) ) + return channel.get_user_page(path[6:], query_string=query_string).encode() + + else: + start_response('404 Not Found', () ) + return b'404 Not Found' + + elif method == "POST": + if path == "/edit_playlist": + fields = urllib.parse.parse_qs(env['wsgi.input'].read().decode()) + if fields['action'][0] == 'add' and fields['playlist_name'][0] == 'watch_later': + watch_later.add_to_watch_later(fields['video_info_list']) + + start_response('204 No Content', ()) + else: + start_response('404 Not Found', ()) + return b'404 Not Found' + + else: + start_response('501 Not Implemented', ()) return b'501 Not Implemented'
\ No newline at end of file |