aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
Diffstat (limited to 'youtube')
-rw-r--r--youtube/channel.py503
-rw-r--r--youtube/comments.css116
-rw-r--r--youtube/comments.py332
-rw-r--r--youtube/common.py1278
-rw-r--r--youtube/opensearch.xml20
-rw-r--r--youtube/playlist.py484
-rw-r--r--youtube/proto.py128
-rw-r--r--youtube/search.py460
-rw-r--r--youtube/shared.css540
-rw-r--r--youtube/subscriptions.py36
-rw-r--r--youtube/template.py262
-rw-r--r--youtube/watch.py586
-rw-r--r--youtube/watch_later.py20
-rw-r--r--youtube/youtube.py118
14 files changed, 2442 insertions, 2441 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index d993d3b..b7a4462 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -1,252 +1,253 @@
-import base64
-import youtube.common as common
-from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id
-import urllib
-import json
-from string import Template
-import youtube.proto as proto
-import html
-import math
-import gevent
-import re
-import functools
-
-with open("yt_channel_items_template.html", "r") as file:
- yt_channel_items_template = Template(file.read())
-
-with open("yt_channel_about_template.html", "r") as file:
- yt_channel_about_template = Template(file.read())
-
-'''continuation = Proto(
- Field('optional', 'continuation', 80226972, Proto(
- Field('optional', 'browse_id', 2, String),
- Field('optional', 'params', 3, Base64(Proto(
- Field('optional', 'channel_tab', 2, String),
- Field('optional', 'sort', 3, ENUM
- Field('optional', 'page', 15, String),
- )))
- ))
-)'''
-
-
-'''channel_continuation = Proto(
- Field('optional', 'pointless_nest', 80226972, Proto(
- Field('optional', 'channel_id', 2, String),
- Field('optional', 'continuation_info', 3, Base64(Proto(
- Field('optional', 'channel_tab', 2, String),
- Field('optional', 'sort', 3, ENUM
- Field('optional', 'page', 15, String),
- )))
- ))
-)'''
-
-headers_1 = (
- ('Accept', '*/*'),
- ('Accept-Language', 'en-US,en;q=0.5'),
- ('X-YouTube-Client-Name', '1'),
- ('X-YouTube-Client-Version', '2.20180614'),
-)
-# https://www.youtube.com/browse_ajax?action_continuation=1&direct_render=1&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D
-# https://www.youtube.com/browse_ajax?ctoken=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&itct=CDsQybcCIhMIhZi1krTc2wIVjMicCh2HXQnhKJsc
-
-# grid view: 4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA
-# list view: 4qmFsgJCEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJkVnWjJhV1JsYjNNWUF5QUFNQUk0QVdBQmFnQjZBVEs0QVFBJTNE
-# SORT:
-# Popular - 1
-# Oldest - 2
-# Newest - 3
-
-# view:
-# grid: 0 or 1
-# list: 2
-def channel_ctoken(channel_id, page, sort, tab, view=1):
-
- tab = proto.string(2, tab )
- sort = proto.uint(3, int(sort))
- page = proto.string(15, str(page) )
- view = proto.uint(6, int(view))
- continuation_info = proto.string( 3, proto.percent_b64encode(tab + view + sort + page) )
-
- channel_id = proto.string(2, channel_id )
- pointless_nest = proto.string(80226972, channel_id + continuation_info)
-
- return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
-
-def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
- ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D')
- url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
-
- print("Sending channel tab ajax request")
- content = common.fetch_url(url, headers_1)
- print("Finished recieving channel tab response")
-
- info = json.loads(content)
- return info
-
-
-grid_video_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <span class="views">$views</span>
- <time datetime="$datetime">Uploaded $published</time>
-
- </div>
- <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
- </div>
-''')
-
-def grid_video_item_info(grid_video_renderer, author):
- renderer = grid_video_renderer
- return {
- "title": renderer['title']['simpleText'],
- "id": renderer['videoId'],
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "author": author,
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
- }
-
-def grid_video_item_html(item):
- video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
- return grid_video_item_template.substitute(
- title = html.escape(item["title"]),
- views = item["views"],
- duration = item["duration"],
- url = URL_ORIGIN + "/watch?v=" + item["id"],
- thumbnail = get_thumbnail_url(item['id']),
- video_info = html.escape(json.dumps(video_info)),
- published = item["published"],
- datetime = '', # TODO
- )
-
-def get_number_of_videos(channel_id):
- # Uploads playlist
- playlist_id = 'UU' + channel_id[2:]
- url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
- print("Getting number of videos")
- response = common.fetch_url(url, common.mobile_ua + headers_1)
- with open('playlist_debug_metadata', 'wb') as f:
- f.write(response)
- response = response.decode('utf-8')
- print("Got response for number of videos")
- return int(re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response).group(1).replace(',',''))
-
-@functools.lru_cache(maxsize=128)
-def get_channel_id(username):
- # method that gives the smallest possible response at ~10 kb
- # needs to be as fast as possible
- url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
- response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
- return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
-
-
-def channel_videos_html(polymer_json, current_page=1, number_of_videos = 1000, current_query_string=''):
- microformat = polymer_json[1]['response']['microformat']['microformatDataRenderer']
- channel_url = microformat['urlCanonical'].rstrip('/')
- channel_id = channel_url[channel_url.rfind('/')+1:]
- try:
- items = polymer_json[1]['response']['continuationContents']['gridContinuation']['items']
- except KeyError:
- items = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer']['items']
- items_html = ''
- for video in items:
- items_html += grid_video_item_html(grid_video_item_info(video['gridVideoRenderer'], microformat['title']))
-
- return yt_channel_items_template.substitute(
- channel_title = microformat['title'],
- channel_about_url = URL_ORIGIN + "/channel/" + channel_id + "/about",
- avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
- page_title = microformat['title'] + ' - Channel',
- items = items_html,
- page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string)
- )
-
-channel_link_template = Template('''
-<a href="$url">$text</a>''')
-stat_template = Template('''
-<li>$stat_value</li>''')
-def channel_about_page(polymer_json):
- avatar = '/' + polymer_json[1]['response']['microformat']['microformatDataRenderer']['thumbnail']['thumbnails'][0]['url']
- # my goodness...
- channel_metadata = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][5]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']
- channel_links = ''
- for link_json in channel_metadata['primaryLinks']:
- channel_links += channel_link_template.substitute(
- url = html.escape(link_json['navigationEndpoint']['urlEndpoint']['url']),
- text = common.get_plain_text(link_json['title']),
- )
-
- stats = ''
- for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
- try:
- stat_value = common.get_plain_text(channel_metadata[stat_name])
- except KeyError:
- continue
- else:
- stats += stat_template.substitute(stat_value=stat_value)
- try:
- description = common.format_text_runs(common.get_formatted_text(channel_metadata['description']))
- except KeyError:
- description = ''
- return yt_channel_about_template.substitute(
- page_title = common.get_plain_text(channel_metadata['title']) + ' - About',
- channel_title = common.get_plain_text(channel_metadata['title']),
- avatar = html.escape(avatar),
- description = description,
- links = channel_links,
- stats = stats,
- channel_videos_url = common.URL_ORIGIN + '/channel/' + channel_metadata['channelId'] + '/videos',
- )
-
-def get_channel_page(url, query_string=''):
- path_components = url.rstrip('/').lstrip('/').split('/')
- channel_id = path_components[0]
- try:
- tab = path_components[1]
- except IndexError:
- tab = 'videos'
-
- parameters = urllib.parse.parse_qs(query_string)
- page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
- sort = common.default_multi_get(parameters, 'sort', 0, default='3')
- view = common.default_multi_get(parameters, 'view', 0, default='1')
-
- if tab == 'videos':
- tasks = (
- gevent.spawn(get_number_of_videos, channel_id ),
- gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
- )
- gevent.joinall(tasks)
- number_of_videos, polymer_json = tasks[0].value, tasks[1].value
-
- return channel_videos_html(polymer_json, page_number, number_of_videos, query_string)
- elif tab == 'about':
- polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', headers_1)
- polymer_json = json.loads(polymer_json)
- return channel_about_page(polymer_json)
- else:
- raise ValueError('Unknown channel tab: ' + tab)
-
-def get_user_page(url, query_string=''):
- path_components = url.rstrip('/').lstrip('/').split('/')
- username = path_components[0]
- try:
- page = path_components[1]
- except IndexError:
- page = 'videos'
- if page == 'videos':
- polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/videos?pbj=1', headers_1)
- polymer_json = json.loads(polymer_json)
- return channel_videos_html(polymer_json)
- elif page == 'about':
- polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/about?pbj=1', headers_1)
- polymer_json = json.loads(polymer_json)
- return channel_about_page(polymer_json)
- else:
+import base64
+import youtube.common as common
+from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id
+import urllib
+import json
+from string import Template
+import youtube.proto as proto
+import html
+import math
+import gevent
+import re
+import functools
+
+with open("yt_channel_items_template.html", "r") as file:
+ yt_channel_items_template = Template(file.read())
+
+with open("yt_channel_about_template.html", "r") as file:
+ yt_channel_about_template = Template(file.read())
+
+'''continuation = Proto(
+ Field('optional', 'continuation', 80226972, Proto(
+ Field('optional', 'browse_id', 2, String),
+ Field('optional', 'params', 3, Base64(Proto(
+ Field('optional', 'channel_tab', 2, String),
+ Field('optional', 'sort', 3, ENUM
+ Field('optional', 'page', 15, String),
+ )))
+ ))
+)'''
+
+
+'''channel_continuation = Proto(
+ Field('optional', 'pointless_nest', 80226972, Proto(
+ Field('optional', 'channel_id', 2, String),
+ Field('optional', 'continuation_info', 3, Base64(Proto(
+ Field('optional', 'channel_tab', 2, String),
+ Field('optional', 'sort', 3, ENUM
+ Field('optional', 'page', 15, String),
+ )))
+ ))
+)'''
+
+headers_1 = (
+ ('Accept', '*/*'),
+ ('Accept-Language', 'en-US,en;q=0.5'),
+ ('X-YouTube-Client-Name', '1'),
+ ('X-YouTube-Client-Version', '2.20180614'),
+)
+# https://www.youtube.com/browse_ajax?action_continuation=1&direct_render=1&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D
+# https://www.youtube.com/browse_ajax?ctoken=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&itct=CDsQybcCIhMIhZi1krTc2wIVjMicCh2HXQnhKJsc
+
+# grid view: 4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA
+# list view: 4qmFsgJCEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJkVnWjJhV1JsYjNNWUF5QUFNQUk0QVdBQmFnQjZBVEs0QVFBJTNE
+# SORT:
+# Popular - 1
+# Oldest - 2
+# Newest - 3
+
+# view:
+# grid: 0 or 1
+# list: 2
+def channel_ctoken(channel_id, page, sort, tab, view=1):
+
+ tab = proto.string(2, tab )
+ sort = proto.uint(3, int(sort))
+ page = proto.string(15, str(page) )
+ view = proto.uint(6, int(view))
+ continuation_info = proto.string( 3, proto.percent_b64encode(tab + view + sort + page) )
+
+ channel_id = proto.string(2, channel_id )
+ pointless_nest = proto.string(80226972, channel_id + continuation_info)
+
+ return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
+ ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D')
+ url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
+
+ print("Sending channel tab ajax request")
+ content = common.fetch_url(url, headers_1)
+ print("Finished recieving channel tab response")
+
+ info = json.loads(content)
+ return info
+
+
+grid_video_item_template = Template('''
+ <div class="small-item-box">
+ <div class="small-item">
+ <a class="video-thumbnail-box" href="$url" title="$title">
+ <img class="video-thumbnail-img" src="$thumbnail">
+ <span class="video-duration">$duration</span>
+ </a>
+ <a class="title" href="$url" title="$title">$title</a>
+
+ <span class="views">$views</span>
+ <time datetime="$datetime">Uploaded $published</time>
+
+ </div>
+ <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
+ </div>
+''')
+
+def grid_video_item_info(grid_video_renderer, author):
+ renderer = grid_video_renderer
+ return {
+ "title": renderer['title']['simpleText'],
+ "id": renderer['videoId'],
+ "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
+ "author": author,
+ "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
+ "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
+ }
+
+def grid_video_item_html(item):
+ video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
+ return grid_video_item_template.substitute(
+ title = html.escape(item["title"]),
+ views = item["views"],
+ duration = item["duration"],
+ url = URL_ORIGIN + "/watch?v=" + item["id"],
+ thumbnail = get_thumbnail_url(item['id']),
+ video_info = html.escape(json.dumps(video_info)),
+ published = item["published"],
+ datetime = '', # TODO
+ )
+
+def get_number_of_videos(channel_id):
+ # Uploads playlist
+ playlist_id = 'UU' + channel_id[2:]
+ url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
+ print("Getting number of videos")
+ response = common.fetch_url(url, common.mobile_ua + headers_1)
+ with open('playlist_debug_metadata', 'wb') as f:
+ f.write(response)
+ response = response.decode('utf-8')
+ print("Got response for number of videos")
+ return int(re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response).group(1).replace(',',''))
+
+@functools.lru_cache(maxsize=128)
+def get_channel_id(username):
+ # method that gives the smallest possible response at ~10 kb
+ # needs to be as fast as possible
+ url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
+ response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
+ return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
+
+
+def channel_videos_html(polymer_json, current_page=1, number_of_videos = 1000, current_query_string=''):
+ microformat = polymer_json[1]['response']['microformat']['microformatDataRenderer']
+ channel_url = microformat['urlCanonical'].rstrip('/')
+ channel_id = channel_url[channel_url.rfind('/')+1:]
+ try:
+ items = polymer_json[1]['response']['continuationContents']['gridContinuation']['items']
+ except KeyError:
+ items = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer']['items']
+ items_html = ''
+ for video in items:
+ items_html += grid_video_item_html(grid_video_item_info(video['gridVideoRenderer'], microformat['title']))
+
+ return yt_channel_items_template.substitute(
+ channel_title = microformat['title'],
+ channel_about_url = URL_ORIGIN + "/channel/" + channel_id + "/about",
+ avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
+ page_title = microformat['title'] + ' - Channel',
+ items = items_html,
+ page_buttons = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
+ number_of_results = '{:,}'.format(number_of_videos) + " videos",
+ )
+
+channel_link_template = Template('''
+<a href="$url">$text</a>''')
+stat_template = Template('''
+<li>$stat_value</li>''')
+def channel_about_page(polymer_json):
+ avatar = '/' + polymer_json[1]['response']['microformat']['microformatDataRenderer']['thumbnail']['thumbnails'][0]['url']
+ # my goodness...
+ channel_metadata = polymer_json[1]['response']['contents']['twoColumnBrowseResultsRenderer']['tabs'][5]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']
+ channel_links = ''
+ for link_json in channel_metadata['primaryLinks']:
+ channel_links += channel_link_template.substitute(
+ url = html.escape(link_json['navigationEndpoint']['urlEndpoint']['url']),
+ text = common.get_plain_text(link_json['title']),
+ )
+
+ stats = ''
+ for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
+ try:
+ stat_value = common.get_plain_text(channel_metadata[stat_name])
+ except KeyError:
+ continue
+ else:
+ stats += stat_template.substitute(stat_value=stat_value)
+ try:
+ description = common.format_text_runs(common.get_formatted_text(channel_metadata['description']))
+ except KeyError:
+ description = ''
+ return yt_channel_about_template.substitute(
+ page_title = common.get_plain_text(channel_metadata['title']) + ' - About',
+ channel_title = common.get_plain_text(channel_metadata['title']),
+ avatar = html.escape(avatar),
+ description = description,
+ links = channel_links,
+ stats = stats,
+ channel_videos_url = common.URL_ORIGIN + '/channel/' + channel_metadata['channelId'] + '/videos',
+ )
+
+def get_channel_page(url, query_string=''):
+ path_components = url.rstrip('/').lstrip('/').split('/')
+ channel_id = path_components[0]
+ try:
+ tab = path_components[1]
+ except IndexError:
+ tab = 'videos'
+
+ parameters = urllib.parse.parse_qs(query_string)
+ page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
+ sort = common.default_multi_get(parameters, 'sort', 0, default='3')
+ view = common.default_multi_get(parameters, 'view', 0, default='1')
+
+ if tab == 'videos':
+ tasks = (
+ gevent.spawn(get_number_of_videos, channel_id ),
+ gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
+ )
+ gevent.joinall(tasks)
+ number_of_videos, polymer_json = tasks[0].value, tasks[1].value
+
+ return channel_videos_html(polymer_json, page_number, number_of_videos, query_string)
+ elif tab == 'about':
+ polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', headers_1)
+ polymer_json = json.loads(polymer_json)
+ return channel_about_page(polymer_json)
+ else:
+ raise ValueError('Unknown channel tab: ' + tab)
+
+def get_user_page(url, query_string=''):
+ path_components = url.rstrip('/').lstrip('/').split('/')
+ username = path_components[0]
+ try:
+ page = path_components[1]
+ except IndexError:
+ page = 'videos'
+ if page == 'videos':
+ polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/videos?pbj=1', headers_1)
+ polymer_json = json.loads(polymer_json)
+ return channel_videos_html(polymer_json)
+ elif page == 'about':
+ polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/about?pbj=1', headers_1)
+ polymer_json = json.loads(polymer_json)
+ return channel_about_page(polymer_json)
+ else:
raise ValueError('Unknown channel page: ' + page) \ No newline at end of file
diff --git a/youtube/comments.css b/youtube/comments.css
index 93a6495..325a433 100644
--- a/youtube/comments.css
+++ b/youtube/comments.css
@@ -1,59 +1,59 @@
-.comments{
- grid-row-gap: 10px;
- display: grid;
- align-content:start;
-}
-
-.comment{
- display:grid;
- grid-template-columns: 0fr 0fr 1fr;
- grid-template-rows: 0fr 0fr 0fr 0fr;
- background-color: #dadada;
-}
-
-.comment .author-avatar{
- grid-column: 1;
- grid-row: 1 / span 3;
- align-self: start;
- margin-right: 5px;
-}
-
-.comment address{
- grid-column: 2;
- grid-row: 1;
- margin-right:15px;
- white-space: nowrap;
-}
-
-.comment .text{
- grid-column: 2 / span 2;
- grid-row: 2;
- white-space: pre-line;
- min-width: 0;
-}
-
-.comment time{
- grid-column: 3;
- grid-row: 1;
- white-space: nowrap;
-
-}
-
-
-.comment .likes{
- grid-column:2;
- grid-row:3;
- font-weight:bold;
- white-space: nowrap;
-}
-
-.comment .replies{
- grid-column:2 / span 2;
- grid-row:4;
- justify-self:start;
-}
-
-.more-comments{
- justify-self:center;
-
+.comments{
+ grid-row-gap: 10px;
+ display: grid;
+ align-content:start;
+}
+
+.comment{
+ display:grid;
+ grid-template-columns: 0fr 0fr 1fr;
+ grid-template-rows: 0fr 0fr 0fr 0fr;
+ background-color: #dadada;
+}
+
+.comment .author-avatar{
+ grid-column: 1;
+ grid-row: 1 / span 3;
+ align-self: start;
+ margin-right: 5px;
+}
+
+.comment address{
+ grid-column: 2;
+ grid-row: 1;
+ margin-right:15px;
+ white-space: nowrap;
+}
+
+.comment .text{
+ grid-column: 2 / span 2;
+ grid-row: 2;
+ white-space: pre-line;
+ min-width: 0;
+}
+
+.comment time{
+ grid-column: 3;
+ grid-row: 1;
+ white-space: nowrap;
+
+}
+
+
+.comment .likes{
+ grid-column:2;
+ grid-row:3;
+ font-weight:bold;
+ white-space: nowrap;
+}
+
+.comment .replies{
+ grid-column:2 / span 2;
+ grid-row:4;
+ justify-self:start;
+}
+
+.more-comments{
+ justify-self:center;
+
} \ No newline at end of file
diff --git a/youtube/comments.py b/youtube/comments.py
index 4b30a48..3f44758 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -1,166 +1,166 @@
-import json
-import youtube.proto as proto
-import base64
-from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url
-from string import Template
-import urllib.request
-import urllib
-import html
-comment_template = Template('''
- <div class="comment-container">
- <div class="comment">
- <a class="author-avatar" href="$author_url" title="$author">
- <img class="author-avatar-img" src="$author_avatar">
- </a>
- <address>
- <a class="author" href="$author_url" title="$author">$author</a>
- </address>
- <span class="text">$text</span>
- <time datetime="$datetime">$published</time>
- <span class="likes">$likes</span>
-$replies
- </div>
-
- </div>
-''')
-reply_link_template = Template('''
- <a href="$url" class="replies">View replies</a>
-''')
-with open("yt_comments_template.html", "r") as file:
- yt_comments_template = Template(file.read())
-
-
-# <a class="replies-link" href="$replies_url">$replies_link_text</a>
-
-
-# Here's what I know about the secret key (starting with ASJN_i)
-# *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers):
-# -Video id
-# -Offset
-# -Sort
-# *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely.
-# *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3")
-# *The encoded data is not valid protobuf
-# *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests
-# *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two.
-# *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since
-# *The ASJN has no relation with any of the data in the response it came from
-
-def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''):
- video_id = proto.as_bytes(video_id)
- secret_key = proto.as_bytes(secret_key)
-
-
- page_info = proto.string(4,video_id) + proto.uint(6, sort)
- offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
- if secret_key:
- offset_information = proto.string(1, secret_key) + offset_information
-
- result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information)
- return base64.urlsafe_b64encode(result).decode('ascii')
-
-mobile_headers = {
- 'Host': 'm.youtube.com',
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '2',
- 'X-YouTube-Client-Version': '1.20180613',
-}
-def request_comments(ctoken, replies=False):
- if replies: # let's make it use different urls for no reason despite all the data being encoded
- base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
- else:
- base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
- url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
- print("Sending comments ajax request")
- for i in range(0,8): # don't retry more than 8 times
- content = fetch_url(url, headers=mobile_headers)
- if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
- content = content[4:]
- elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
- content = b''
- print("got <!DOCTYPE>, retrying")
- continue
- break
- '''with open('comments_debug', 'wb') as f:
- f.write(content)'''
- return content
-
-def parse_comments(content, replies=False):
- try:
- content = json.loads(uppercase_escape(content.decode('utf-8')))
- #print(content)
- comments_raw = content['content']['continuation_contents']['contents']
- ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
-
- comments = []
- for comment_raw in comments_raw:
- replies_url = ''
- if not replies:
- if comment_raw['replies'] is not None:
- ctoken = comment_raw['replies']['continuations'][0]['continuation']
- replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1"
- comment_raw = comment_raw['comment']
- comment = {
- 'author': comment_raw['author']['runs'][0]['text'],
- 'author_url': comment_raw['author_endpoint']['url'],
- 'author_avatar': comment_raw['author_thumbnail']['url'],
- 'likes': comment_raw['like_count'],
- 'published': comment_raw['published_time']['runs'][0]['text'],
- 'text': comment_raw['content']['runs'],
- 'reply_count': '',
- 'replies_url': replies_url,
- }
- comments.append(comment)
- except Exception as e:
- print('Error parsing comments: ' + str(e))
- comments = ()
- ctoken = ''
- else:
- print("Finished getting and parsing comments")
- return {'ctoken': ctoken, 'comments': comments}
-
-def get_comments_html(result):
- html_result = ''
- for comment in result['comments']:
- replies = ''
- if comment['replies_url']:
- replies = reply_link_template.substitute(url=comment['replies_url'])
- html_result += comment_template.substitute(
- author=html.escape(comment['author']),
- author_url = URL_ORIGIN + comment['author_url'],
- author_avatar = '/' + comment['author_avatar'],
- likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
- published = comment['published'],
- text = format_text_runs(comment['text']),
- datetime = '', #TODO
- replies=replies,
- #replies='',
- )
- return html_result, result['ctoken']
-
-def video_comments(video_id, sort=0, offset=0, secret_key=''):
- result = parse_comments(request_comments(make_comment_ctoken(video_id, sort, offset, secret_key)))
- return get_comments_html(result)
-
-more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''')
-
-def get_comments_page(query_string):
- parameters = urllib.parse.parse_qs(query_string)
- ctoken = parameters['ctoken'][0]
- replies = default_multi_get(parameters, 'replies', 0, default="0") == "1"
-
- result = parse_comments(request_comments(ctoken, replies), replies)
- comments_html, ctoken = get_comments_html(result)
- if ctoken == '':
- more_comments_button = ''
- else:
- more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
-
- return yt_comments_template.substitute(
- comments = comments_html,
- page_title = 'Comments',
- more_comments_button=more_comments_button,
- )
-
+import json
+import youtube.proto as proto
+import base64
+from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url
+from string import Template
+import urllib.request
+import urllib
+import html
+comment_template = Template('''
+ <div class="comment-container">
+ <div class="comment">
+ <a class="author-avatar" href="$author_url" title="$author">
+ <img class="author-avatar-img" src="$author_avatar">
+ </a>
+ <address>
+ <a class="author" href="$author_url" title="$author">$author</a>
+ </address>
+ <span class="text">$text</span>
+ <time datetime="$datetime">$published</time>
+ <span class="likes">$likes</span>
+$replies
+ </div>
+
+ </div>
+''')
+reply_link_template = Template('''
+ <a href="$url" class="replies">View replies</a>
+''')
+with open("yt_comments_template.html", "r") as file:
+ yt_comments_template = Template(file.read())
+
+
+# <a class="replies-link" href="$replies_url">$replies_link_text</a>
+
+
+# Here's what I know about the secret key (starting with ASJN_i)
+# *The secret key definitely contains the following information (or perhaps the information is stored at youtube's servers):
+# -Video id
+# -Offset
+# -Sort
+# *If the video id or sort in the ctoken contradicts the ASJN, the response is an error. The offset encoded outside the ASJN is ignored entirely.
+# *The ASJN is base64 encoded data, indicated by the fact that the character after "ASJN_i" is one of ("0", "1", "2", "3")
+# *The encoded data is not valid protobuf
+# *The encoded data (after the 5 or so bytes that are always the same) is indistinguishable from random data according to a battery of randomness tests
+# *The ASJN in the ctoken provided by a response changes in regular intervals of about a second or two.
+# *Old ASJN's continue to work, and start at the same comment even if new comments have been posted since
+# *The ASJN has no relation with any of the data in the response it came from
+
+def make_comment_ctoken(video_id, sort=0, offset=0, secret_key=''):
+ video_id = proto.as_bytes(video_id)
+ secret_key = proto.as_bytes(secret_key)
+
+
+ page_info = proto.string(4,video_id) + proto.uint(6, sort)
+ offset_information = proto.nested(4, page_info) + proto.uint(5, offset)
+ if secret_key:
+ offset_information = proto.string(1, secret_key) + offset_information
+
+ result = proto.nested(2, proto.string(2, video_id)) + proto.uint(3,6) + proto.nested(6, offset_information)
+ return base64.urlsafe_b64encode(result).decode('ascii')
+
+mobile_headers = {
+ 'Host': 'm.youtube.com',
+ 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
+ 'Accept': '*/*',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'X-YouTube-Client-Name': '2',
+ 'X-YouTube-Client-Version': '1.20180613',
+}
+def request_comments(ctoken, replies=False):
+ if replies: # let's make it use different urls for no reason despite all the data being encoded
+ base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
+ else:
+ base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
+ url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
+ print("Sending comments ajax request")
+ for i in range(0,8): # don't retry more than 8 times
+ content = fetch_url(url, headers=mobile_headers)
+ if content[0:4] == b")]}'": # random closing characters included at beginning of response for some reason
+ content = content[4:]
+ elif content[0:10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason
+ content = b''
+ print("got <!DOCTYPE>, retrying")
+ continue
+ break
+ '''with open('comments_debug', 'wb') as f:
+ f.write(content)'''
+ return content
+
+def parse_comments(content, replies=False):
+ try:
+ content = json.loads(uppercase_escape(content.decode('utf-8')))
+ #print(content)
+ comments_raw = content['content']['continuation_contents']['contents']
+ ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
+
+ comments = []
+ for comment_raw in comments_raw:
+ replies_url = ''
+ if not replies:
+ if comment_raw['replies'] is not None:
+ ctoken = comment_raw['replies']['continuations'][0]['continuation']
+ replies_url = URL_ORIGIN + '/comments?ctoken=' + ctoken + "&replies=1"
+ comment_raw = comment_raw['comment']
+ comment = {
+ 'author': comment_raw['author']['runs'][0]['text'],
+ 'author_url': comment_raw['author_endpoint']['url'],
+ 'author_avatar': comment_raw['author_thumbnail']['url'],
+ 'likes': comment_raw['like_count'],
+ 'published': comment_raw['published_time']['runs'][0]['text'],
+ 'text': comment_raw['content']['runs'],
+ 'reply_count': '',
+ 'replies_url': replies_url,
+ }
+ comments.append(comment)
+ except Exception as e:
+ print('Error parsing comments: ' + str(e))
+ comments = ()
+ ctoken = ''
+ else:
+ print("Finished getting and parsing comments")
+ return {'ctoken': ctoken, 'comments': comments}
+
+def get_comments_html(result):
+ html_result = ''
+ for comment in result['comments']:
+ replies = ''
+ if comment['replies_url']:
+ replies = reply_link_template.substitute(url=comment['replies_url'])
+ html_result += comment_template.substitute(
+ author=html.escape(comment['author']),
+ author_url = URL_ORIGIN + comment['author_url'],
+ author_avatar = '/' + comment['author_avatar'],
+ likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
+ published = comment['published'],
+ text = format_text_runs(comment['text']),
+ datetime = '', #TODO
+ replies=replies,
+ #replies='',
+ )
+ return html_result, result['ctoken']
+
+def video_comments(video_id, sort=0, offset=0, secret_key=''):
+ result = parse_comments(request_comments(make_comment_ctoken(video_id, sort, offset, secret_key)))
+ return get_comments_html(result)
+
+more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''')
+
+def get_comments_page(query_string):
+ parameters = urllib.parse.parse_qs(query_string)
+ ctoken = parameters['ctoken'][0]
+ replies = default_multi_get(parameters, 'replies', 0, default="0") == "1"
+
+ result = parse_comments(request_comments(ctoken, replies), replies)
+ comments_html, ctoken = get_comments_html(result)
+ if ctoken == '':
+ more_comments_button = ''
+ else:
+ more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
+
+ return yt_comments_template.substitute(
+ comments = comments_html,
+ page_title = 'Comments',
+ more_comments_button=more_comments_button,
+ )
+
diff --git a/youtube/common.py b/youtube/common.py
index 67bd81f..3133fed 100644
--- a/youtube/common.py
+++ b/youtube/common.py
@@ -1,639 +1,639 @@
-from youtube.template import Template
-import html
-import json
-import re
-import urllib.parse
-import gzip
-import brotli
-import time
-
-
-URL_ORIGIN = "/https://www.youtube.com"
-
-
-# videos (all of type str):
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# published
-# duration
-# likes
-# dislikes
-# views
-# playlist_index
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-
-
-
-
-
-
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="current-page-button">$page</a>''')
-
-medium_playlist_item_template = Template('''
- <div class="medium-item">
- <a class="playlist-thumbnail-box" href="$url" title="$title">
- <img class="playlist-thumbnail-img" src="$thumbnail">
- <div class="playlist-thumbnail-info">
- <span>$size</span>
- </div>
- </a>
-
- <a class="title" href="$url" title=$title>$title</a>
-
- <address><a href="$author_url">$author</a></address>
- </div>
-''')
-medium_video_item_template = Template('''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
-
- <a class="title" href="$url">$title</a>
-
- <div class="stats">$stats</div>
- <!--
- <address><a href="$author_url">$author</a></address>
- <span class="views">$views</span>
- <time datetime="$datetime">Uploaded $published</time>-->
-
- <span class="description">$description</span>
- <span class="badges">$badges</span>
- </div>
-''')
-
-small_video_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <address>$author</address>
- <span class="views">$views</span>
-
- </div>
- <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
- </div>
-''')
-
-small_playlist_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="playlist-thumbnail-box" href="$url" title="$title">
- <img class="playlist-thumbnail-img" src="$thumbnail">
- <div class="playlist-thumbnail-info">
- <span>$size</span>
- </div>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <address>$author</address>
- </div>
- </div>
-''')
-
-medium_channel_item_template = Template('''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
-
- <a class="title" href="$url">$title</a>
-
- <span>$subscriber_count</span>
- <span>$size</span>
-
- <span class="description">$description</span>
- </div>
-''')
-
-
-def fetch_url(url, headers=(), timeout=5, report_text=None):
- if isinstance(headers, list):
- headers += [('Accept-Encoding', 'gzip, br')]
- headers = dict(headers)
- elif isinstance(headers, tuple):
- headers += (('Accept-Encoding', 'gzip, br'),)
- headers = dict(headers)
- else:
- headers = headers.copy()
- headers['Accept-Encoding'] = 'gzip, br'
-
- start_time = time.time()
-
- req = urllib.request.Request(url, headers=headers)
- response = urllib.request.urlopen(req, timeout=timeout)
- response_time = time.time()
-
- content = response.read()
- read_finish = time.time()
- if report_text:
- print(report_text, 'Latency:', response_time - start_time, ' Read time:', read_finish - response_time)
- encodings = response.getheader('Content-Encoding', default='identity').replace(' ', '').split(',')
- for encoding in reversed(encodings):
- if encoding == 'identity':
- continue
- if encoding == 'br':
- content = brotli.decompress(content)
- elif encoding == 'gzip':
- content = gzip.decompress(content)
- return content
-
-mobile_ua = (('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'),)
-
-def dict_add(*dicts):
- for dictionary in dicts[1:]:
- dicts[0].update(dictionary)
- return dicts[0]
-
-def video_id(url):
- url_parts = urllib.parse.urlparse(url)
- return urllib.parse.parse_qs(url_parts.query)['v'][0]
-
-def uppercase_escape(s):
- return re.sub(
- r'\\U([0-9a-fA-F]{8})',
- lambda m: chr(int(m.group(1), base=16)), s)
-
-def default_multi_get(object, *keys, default):
- ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
- try:
- for key in keys:
- object = object[key]
- return object
- except (IndexError, KeyError):
- return default
-
-def get_plain_text(node):
- try:
- return html.escape(node['simpleText'])
- except KeyError:
- return unformmated_text_runs(node['runs'])
-
-def unformmated_text_runs(runs):
- result = ''
- for text_run in runs:
- result += html.escape(text_run["text"])
- return result
-
-def format_text_runs(runs):
- if isinstance(runs, str):
- return runs
- result = ''
- for text_run in runs:
- if text_run.get("bold", False):
- result += "<b>" + html.escape(text_run["text"]) + "</b>"
- elif text_run.get('italics', False):
- result += "<i>" + html.escape(text_run["text"]) + "</i>"
- else:
- result += html.escape(text_run["text"])
- return result
-
-# default, sddefault, mqdefault, hqdefault, hq720
-def get_thumbnail_url(video_id):
- return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
-
-def seconds_to_timestamp(seconds):
- seconds = int(seconds)
- hours, seconds = divmod(seconds,3600)
- minutes, seconds = divmod(seconds,60)
- if hours != 0:
- timestamp = str(hours) + ":"
- timestamp += str(minutes).zfill(2) # zfill pads with zeros
- else:
- timestamp = str(minutes)
-
- timestamp += ":" + str(seconds).zfill(2)
- return timestamp
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-def medium_playlist_item_info(playlist_renderer):
- renderer = playlist_renderer
- try:
- author_url = URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- except KeyError: # radioRenderer
- author_url = ''
- try:
- thumbnail = renderer['thumbnails'][0]['thumbnails'][0]['url']
- except KeyError:
- thumbnail = renderer['thumbnail']['thumbnails'][0]['url']
- return {
- "title": renderer["title"]["simpleText"],
- 'id': renderer["playlistId"],
- 'size': renderer.get('videoCount', '50+'),
- "author": default_multi_get(renderer,'longBylineText','runs',0,'text', default='Youtube'),
- "author_url": author_url,
- 'thumbnail': thumbnail,
- }
-
-def medium_video_item_info(video_renderer):
- renderer = video_renderer
- try:
- return {
- "title": renderer["title"]["simpleText"],
- "id": renderer["videoId"],
- "description": renderer.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
- "thumbnail": get_thumbnail_url(renderer["videoId"]),
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "author": renderer['longBylineText']['runs'][0]['text'],
- "author_url": URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
- }
- except KeyError:
- print(renderer)
- raise
-
-def small_video_item_info(compact_video_renderer):
- renderer = compact_video_renderer
- return {
- "title": renderer['title']['simpleText'],
- "id": renderer['videoId'],
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "author": renderer['longBylineText']['runs'][0]['text'],
- "author_url": renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- }
-
-
-# -----
-# HTML
-# -----
-
-def small_video_item_html(item):
- video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
- return small_video_item_template.substitute(
- title = html.escape(item["title"]),
- views = item["views"],
- author = html.escape(item["author"]),
- duration = item["duration"],
- url = URL_ORIGIN + "/watch?v=" + item["id"],
- thumbnail = get_thumbnail_url(item['id']),
- video_info = html.escape(json.dumps(video_info)),
- )
-
-def small_playlist_item_html(item):
- return small_playlist_item_template.substitute(
- title=html.escape(item["title"]),
- size = item['size'],
- author="",
- url = URL_ORIGIN + "/playlist?list=" + item["id"],
- thumbnail= get_thumbnail_url(item['first_video_id']),
- )
-
-def medium_playlist_item_html(item):
- return medium_playlist_item_template.substitute(
- title=html.escape(item["title"]),
- size = item['size'],
- author=item['author'],
- author_url= URL_ORIGIN + item['author_url'],
- url = URL_ORIGIN + "/playlist?list=" + item["id"],
- thumbnail= item['thumbnail'],
- )
-
-def medium_video_item_html(medium_video_info):
- info = medium_video_info
-
- return medium_video_item_template.substitute(
- title=html.escape(info["title"]),
- views=info["views"],
- published = info["published"],
- description = format_text_runs(info["description"]),
- author=html.escape(info["author"]),
- author_url=info["author_url"],
- duration=info["duration"],
- url = URL_ORIGIN + "/watch?v=" + info["id"],
- thumbnail=info['thumbnail'],
- datetime='', # TODO
- )
-
-html_functions = {
- 'compactVideoRenderer': lambda x: small_video_item_html(small_video_item_info(x)),
- 'videoRenderer': lambda x: medium_video_item_html(medium_video_item_info(x)),
- 'compactPlaylistRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
- 'playlistRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
- 'channelRenderer': lambda x: '',
- 'radioRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
- 'compactRadioRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
- 'didYouMeanRenderer': lambda x: '',
-}
-
-
-
-
-
-
-
-def get_url(node):
- try:
- return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- except KeyError:
- return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
-
-
-def get_text(node):
- try:
- return node['simpleText']
- except KeyError:
- return node['runs'][0]['text']
-
-def get_formatted_text(node):
- try:
- return node['runs']
- except KeyError:
- return node['simpleText']
-
-def get_badges(node):
- badges = []
- for badge_node in node:
- badge = badge_node['metadataBadgeRenderer']['label']
- if badge.lower() != 'new':
- badges.append(badge)
- return badges
-
-def get_thumbnail(node):
- try:
- return node['thumbnails'][0]['url'] # polymer format
- except KeyError:
- return node['url'] # ajax format
-
-dispatch = {
-
-# polymer format
- 'title': ('title', get_text),
- 'publishedTimeText': ('published', get_text),
- 'videoId': ('id', lambda node: node),
- 'descriptionSnippet': ('description', get_formatted_text),
- 'lengthText': ('duration', get_text),
- 'thumbnail': ('thumbnail', get_thumbnail),
- 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
-
- 'videoCountText': ('size', get_text),
- 'playlistId': ('id', lambda node: node),
-
- 'subscriberCountText': ('subscriber_count', get_text),
- 'channelId': ('id', lambda node: node),
- 'badges': ('badges', get_badges),
-
-# ajax format
- 'view_count_text': ('views', get_text),
- 'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
- 'owner_text': ('author', get_text),
- 'owner_endpoint': ('author_url', lambda node: node['url']),
- 'description': ('description', get_formatted_text),
- 'index': ('playlist_index', get_text),
- 'short_byline': ('author', get_text),
- 'length': ('duration', get_text),
- 'video_id': ('id', lambda node: node),
-
-}
-
-def renderer_info(renderer):
- try:
- info = {}
- if 'viewCountText' in renderer: # prefer this one as it contains all the digits
- info['views'] = get_text(renderer['viewCountText'])
- elif 'shortViewCountText' in renderer:
- info['views'] = get_text(renderer['shortViewCountText'])
-
- for key, node in renderer.items():
- if key in ('longBylineText', 'shortBylineText'):
- info['author'] = get_text(node)
- try:
- info['author_url'] = get_url(node)
- except KeyError:
- pass
-
- continue
-
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(renderer)
- raise
-
-def ajax_info(item_json):
- try:
- info = {}
- for key, node in item_json.items():
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(item_json)
- raise
-
-def badges_html(badges):
- return ' | '.join(map(html.escape, badges))
-
-
-
-
-
-html_transform_dispatch = {
- 'title': html.escape,
- 'published': html.escape,
- 'id': html.escape,
- 'description': format_text_runs,
- 'duration': html.escape,
- 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
- 'size': html.escape,
- 'author': html.escape,
- 'author_url': lambda url: html.escape(URL_ORIGIN + url),
- 'views': html.escape,
- 'subscriber_count': html.escape,
- 'badges': badges_html,
- 'playlist_index': html.escape,
-}
-
-def get_html_ready(item):
- html_ready = {}
- for key, value in item.items():
- try:
- function = html_transform_dispatch[key]
- except KeyError:
- continue
- html_ready[key] = function(value)
- return html_ready
-
-
-author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''')
-author_template = Template('''<address>By $author</address>''')
-stat_templates = (
- Template('''<span class="views">$views</span>'''),
- Template('''<time datetime="$datetime">$published</time>'''),
-)
-def get_video_stats(html_ready):
- stats = []
- if 'author' in html_ready:
- if 'author_url' in html_ready:
- stats.append(author_template_url.substitute(html_ready))
- else:
- stats.append(author_template.substitute(html_ready))
- for stat in stat_templates:
- try:
- stats.append(stat.strict_substitute(html_ready))
- except KeyError:
- pass
- return ' | '.join(stats)
-
-def video_item_html(item, template):
- html_ready = get_html_ready(item)
- video_info = {}
- for key in ('id', 'title', 'author'):
- try:
- video_info[key] = html_ready[key]
- except KeyError:
- video_info[key] = ''
- try:
- video_info['duration'] = html_ready['duration']
- except KeyError:
- video_info['duration'] = 'Live' # livestreams don't have a duration
-
- html_ready['video_info'] = html.escape(json.dumps(video_info) )
- html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
- html_ready['datetime'] = '' #TODO
-
- html_ready['stats'] = get_video_stats(html_ready)
-
- return template.substitute(html_ready)
-
-
-def playlist_item_html(item, template):
- html_ready = get_html_ready(item)
-
- html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
- html_ready['datetime'] = '' #TODO
- return template.substitute(html_ready)
-
-
-
-
-
-
-def make_query_string(query_string):
- return '&'.join(key + '=' + ','.join(values) for key,values in query_string.items())
-
-def update_query_string(query_string, items):
- parameters = urllib.parse.parse_qs(query_string)
- parameters.update(items)
- return make_query_string(parameters)
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-
-def page_buttons_html(current_page, estimated_pages, url, current_query_string):
- if current_page <= 5:
- page_start = 1
- page_end = min(9, estimated_pages)
- else:
- page_start = current_page - 4
- page_end = min(current_page + 4, estimated_pages)
-
- result = ""
- for page in range(page_start, page_end+1):
- if page == current_page:
- template = current_page_button_template
- else:
- template = page_button_template
- result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
- return result
-
-
-
-
-
-
-
-showing_results_for = Template('''
- <div class="showing-results-for">
- <div>Showing results for <a>$corrected_query</a></div>
- <div>Search instead for <a href="$original_query_url">$original_query</a></div>
- </div>
-''')
-
-did_you_mean = Template('''
- <div class="did-you-mean">
- <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
- </div>
-''')
-
-def renderer_html(renderer, additional_info={}, current_query_string=''):
- type = list(renderer.keys())[0]
- renderer = renderer[type]
- if type in ('videoRenderer', 'playlistRenderer', 'radioRenderer', 'compactVideoRenderer', 'compactPlaylistRenderer', 'compactRadioRenderer', 'gridVideoRenderer', 'gridPlaylistRenderer', 'gridRadioRenderer'):
- info = renderer_info(renderer)
- info.update(additional_info)
- if type == 'compactVideoRenderer':
- return video_item_html(info, small_video_item_template)
- if type in ('compactPlaylistRenderer', 'compactRadioRenderer'):
- return playlist_item_html(info, small_playlist_item_template)
- if type in ('videoRenderer', 'gridVideoRenderer'):
- return video_item_html(info, medium_video_item_template)
- if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer'):
- return playlist_item_html(info, medium_playlist_item_template)
-
- if type == 'channelRenderer':
- info = renderer_info(renderer)
- html_ready = get_html_ready(info)
- html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
- return medium_channel_item_template.substitute(html_ready)
-
- if type == 'movieRenderer':
- return ''
- print(renderer)
- raise NotImplementedError('Unknown renderer type: ' + type)
-
-
-'videoRenderer'
-'playlistRenderer'
-'channelRenderer'
-'radioRenderer'
-'gridVideoRenderer'
-'gridPlaylistRenderer'
-
-'didYouMeanRenderer'
-'showingResultsForRenderer'
+from youtube.template import Template
+import html
+import json
+import re
+import urllib.parse
+import gzip
+import brotli
+import time
+
+
+URL_ORIGIN = "/https://www.youtube.com"
+
+
+# videos (all of type str):
+
+# id
+# title
+# url
+# author
+# author_url
+# thumbnail
+# description
+# published
+# duration
+# likes
+# dislikes
+# views
+# playlist_index
+
+# playlists:
+
+# id
+# title
+# url
+# author
+# author_url
+# thumbnail
+# description
+# updated
+# size
+# first_video_id
+
+
+
+
+
+
+
+page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
+current_page_button_template = Template('''<div class="current-page-button">$page</a>''')
+
+medium_playlist_item_template = Template('''
+ <div class="medium-item">
+ <a class="playlist-thumbnail-box" href="$url" title="$title">
+ <img class="playlist-thumbnail-img" src="$thumbnail">
+ <div class="playlist-thumbnail-info">
+ <span>$size</span>
+ </div>
+ </a>
+
+ <a class="title" href="$url" title=$title>$title</a>
+
+ <address><a href="$author_url">$author</a></address>
+ </div>
+''')
+medium_video_item_template = Template('''
+ <div class="medium-item">
+ <a class="video-thumbnail-box" href="$url" title="$title">
+ <img class="video-thumbnail-img" src="$thumbnail">
+ <span class="video-duration">$duration</span>
+ </a>
+
+ <a class="title" href="$url">$title</a>
+
+ <div class="stats">$stats</div>
+ <!--
+ <address><a href="$author_url">$author</a></address>
+ <span class="views">$views</span>
+ <time datetime="$datetime">Uploaded $published</time>-->
+
+ <span class="description">$description</span>
+ <span class="badges">$badges</span>
+ </div>
+''')
+
+small_video_item_template = Template('''
+ <div class="small-item-box">
+ <div class="small-item">
+ <a class="video-thumbnail-box" href="$url" title="$title">
+ <img class="video-thumbnail-img" src="$thumbnail">
+ <span class="video-duration">$duration</span>
+ </a>
+ <a class="title" href="$url" title="$title">$title</a>
+
+ <address>$author</address>
+ <span class="views">$views</span>
+
+ </div>
+ <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
+ </div>
+''')
+
+small_playlist_item_template = Template('''
+ <div class="small-item-box">
+ <div class="small-item">
+ <a class="playlist-thumbnail-box" href="$url" title="$title">
+ <img class="playlist-thumbnail-img" src="$thumbnail">
+ <div class="playlist-thumbnail-info">
+ <span>$size</span>
+ </div>
+ </a>
+ <a class="title" href="$url" title="$title">$title</a>
+
+ <address>$author</address>
+ </div>
+ </div>
+''')
+
+medium_channel_item_template = Template('''
+ <div class="medium-item">
+ <a class="video-thumbnail-box" href="$url" title="$title">
+ <img class="video-thumbnail-img" src="$thumbnail">
+ <span class="video-duration">$duration</span>
+ </a>
+
+ <a class="title" href="$url">$title</a>
+
+ <span>$subscriber_count</span>
+ <span>$size</span>
+
+ <span class="description">$description</span>
+ </div>
+''')
+
+
+def fetch_url(url, headers=(), timeout=5, report_text=None):
+ if isinstance(headers, list):
+ headers += [('Accept-Encoding', 'gzip, br')]
+ headers = dict(headers)
+ elif isinstance(headers, tuple):
+ headers += (('Accept-Encoding', 'gzip, br'),)
+ headers = dict(headers)
+ else:
+ headers = headers.copy()
+ headers['Accept-Encoding'] = 'gzip, br'
+
+ start_time = time.time()
+
+ req = urllib.request.Request(url, headers=headers)
+ response = urllib.request.urlopen(req, timeout=timeout)
+ response_time = time.time()
+
+ content = response.read()
+ read_finish = time.time()
+ if report_text:
+ print(report_text, 'Latency:', response_time - start_time, ' Read time:', read_finish - response_time)
+ encodings = response.getheader('Content-Encoding', default='identity').replace(' ', '').split(',')
+ for encoding in reversed(encodings):
+ if encoding == 'identity':
+ continue
+ if encoding == 'br':
+ content = brotli.decompress(content)
+ elif encoding == 'gzip':
+ content = gzip.decompress(content)
+ return content
+
+mobile_ua = (('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'),)
+
+def dict_add(*dicts):
+ for dictionary in dicts[1:]:
+ dicts[0].update(dictionary)
+ return dicts[0]
+
+def video_id(url):
+ url_parts = urllib.parse.urlparse(url)
+ return urllib.parse.parse_qs(url_parts.query)['v'][0]
+
+def uppercase_escape(s):
+ return re.sub(
+ r'\\U([0-9a-fA-F]{8})',
+ lambda m: chr(int(m.group(1), base=16)), s)
+
+def default_multi_get(object, *keys, default):
+ ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
+ try:
+ for key in keys:
+ object = object[key]
+ return object
+ except (IndexError, KeyError):
+ return default
+
+def get_plain_text(node):
+ try:
+ return html.escape(node['simpleText'])
+ except KeyError:
+ return unformmated_text_runs(node['runs'])
+
+def unformmated_text_runs(runs):
+ result = ''
+ for text_run in runs:
+ result += html.escape(text_run["text"])
+ return result
+
+def format_text_runs(runs):
+ if isinstance(runs, str):
+ return runs
+ result = ''
+ for text_run in runs:
+ if text_run.get("bold", False):
+ result += "<b>" + html.escape(text_run["text"]) + "</b>"
+ elif text_run.get('italics', False):
+ result += "<i>" + html.escape(text_run["text"]) + "</i>"
+ else:
+ result += html.escape(text_run["text"])
+ return result
+
+# default, sddefault, mqdefault, hqdefault, hq720
+def get_thumbnail_url(video_id):
+ return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
+
+def seconds_to_timestamp(seconds):
+ seconds = int(seconds)
+ hours, seconds = divmod(seconds,3600)
+ minutes, seconds = divmod(seconds,60)
+ if hours != 0:
+ timestamp = str(hours) + ":"
+ timestamp += str(minutes).zfill(2) # zfill pads with zeros
+ else:
+ timestamp = str(minutes)
+
+ timestamp += ":" + str(seconds).zfill(2)
+ return timestamp
+
+# playlists:
+
+# id
+# title
+# url
+# author
+# author_url
+# thumbnail
+# description
+# updated
+# size
+# first_video_id
+def medium_playlist_item_info(playlist_renderer):
+ renderer = playlist_renderer
+ try:
+ author_url = URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+ except KeyError: # radioRenderer
+ author_url = ''
+ try:
+ thumbnail = renderer['thumbnails'][0]['thumbnails'][0]['url']
+ except KeyError:
+ thumbnail = renderer['thumbnail']['thumbnails'][0]['url']
+ return {
+ "title": renderer["title"]["simpleText"],
+ 'id': renderer["playlistId"],
+ 'size': renderer.get('videoCount', '50+'),
+ "author": default_multi_get(renderer,'longBylineText','runs',0,'text', default='Youtube'),
+ "author_url": author_url,
+ 'thumbnail': thumbnail,
+ }
+
+def medium_video_item_info(video_renderer):
+ renderer = video_renderer
+ try:
+ return {
+ "title": renderer["title"]["simpleText"],
+ "id": renderer["videoId"],
+ "description": renderer.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
+ "thumbnail": get_thumbnail_url(renderer["videoId"]),
+ "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
+ "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
+ "author": renderer['longBylineText']['runs'][0]['text'],
+ "author_url": URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
+ }
+ except KeyError:
+ print(renderer)
+ raise
+
+def small_video_item_info(compact_video_renderer):
+ renderer = compact_video_renderer
+ return {
+ "title": renderer['title']['simpleText'],
+ "id": renderer['videoId'],
+ "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
+ "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
+ "author": renderer['longBylineText']['runs'][0]['text'],
+ "author_url": renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ }
+
+
+# -----
+# HTML
+# -----
+
+def small_video_item_html(item):
+ video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
+ return small_video_item_template.substitute(
+ title = html.escape(item["title"]),
+ views = item["views"],
+ author = html.escape(item["author"]),
+ duration = item["duration"],
+ url = URL_ORIGIN + "/watch?v=" + item["id"],
+ thumbnail = get_thumbnail_url(item['id']),
+ video_info = html.escape(json.dumps(video_info)),
+ )
+
+def small_playlist_item_html(item):
+ return small_playlist_item_template.substitute(
+ title=html.escape(item["title"]),
+ size = item['size'],
+ author="",
+ url = URL_ORIGIN + "/playlist?list=" + item["id"],
+ thumbnail= get_thumbnail_url(item['first_video_id']),
+ )
+
+def medium_playlist_item_html(item):
+ return medium_playlist_item_template.substitute(
+ title=html.escape(item["title"]),
+ size = item['size'],
+ author=item['author'],
+ author_url= URL_ORIGIN + item['author_url'],
+ url = URL_ORIGIN + "/playlist?list=" + item["id"],
+ thumbnail= item['thumbnail'],
+ )
+
+def medium_video_item_html(medium_video_info):
+ info = medium_video_info
+
+ return medium_video_item_template.substitute(
+ title=html.escape(info["title"]),
+ views=info["views"],
+ published = info["published"],
+ description = format_text_runs(info["description"]),
+ author=html.escape(info["author"]),
+ author_url=info["author_url"],
+ duration=info["duration"],
+ url = URL_ORIGIN + "/watch?v=" + info["id"],
+ thumbnail=info['thumbnail'],
+ datetime='', # TODO
+ )
+
+html_functions = {
+ 'compactVideoRenderer': lambda x: small_video_item_html(small_video_item_info(x)),
+ 'videoRenderer': lambda x: medium_video_item_html(medium_video_item_info(x)),
+ 'compactPlaylistRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
+ 'playlistRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
+ 'channelRenderer': lambda x: '',
+ 'radioRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
+ 'compactRadioRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
+ 'didYouMeanRenderer': lambda x: '',
+}
+
+
+
+
+
+
+
+def get_url(node):
+ try:
+ return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+ except KeyError:
+ return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+
+
+def get_text(node):
+ try:
+ return node['simpleText']
+ except KeyError:
+ return node['runs'][0]['text']
+
+def get_formatted_text(node):
+ try:
+ return node['runs']
+ except KeyError:
+ return node['simpleText']
+
+def get_badges(node):
+ badges = []
+ for badge_node in node:
+ badge = badge_node['metadataBadgeRenderer']['label']
+ if badge.lower() != 'new':
+ badges.append(badge)
+ return badges
+
+def get_thumbnail(node):
+ try:
+ return node['thumbnails'][0]['url'] # polymer format
+ except KeyError:
+ return node['url'] # ajax format
+
+dispatch = {
+
+# polymer format
+ 'title': ('title', get_text),
+ 'publishedTimeText': ('published', get_text),
+ 'videoId': ('id', lambda node: node),
+ 'descriptionSnippet': ('description', get_formatted_text),
+ 'lengthText': ('duration', get_text),
+ 'thumbnail': ('thumbnail', get_thumbnail),
+ 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
+
+ 'videoCountText': ('size', get_text),
+ 'playlistId': ('id', lambda node: node),
+
+ 'subscriberCountText': ('subscriber_count', get_text),
+ 'channelId': ('id', lambda node: node),
+ 'badges': ('badges', get_badges),
+
+# ajax format
+ 'view_count_text': ('views', get_text),
+ 'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
+ 'owner_text': ('author', get_text),
+ 'owner_endpoint': ('author_url', lambda node: node['url']),
+ 'description': ('description', get_formatted_text),
+ 'index': ('playlist_index', get_text),
+ 'short_byline': ('author', get_text),
+ 'length': ('duration', get_text),
+ 'video_id': ('id', lambda node: node),
+
+}
+
+def renderer_info(renderer):
+ try:
+ info = {}
+ if 'viewCountText' in renderer: # prefer this one as it contains all the digits
+ info['views'] = get_text(renderer['viewCountText'])
+ elif 'shortViewCountText' in renderer:
+ info['views'] = get_text(renderer['shortViewCountText'])
+
+ for key, node in renderer.items():
+ if key in ('longBylineText', 'shortBylineText'):
+ info['author'] = get_text(node)
+ try:
+ info['author_url'] = get_url(node)
+ except KeyError:
+ pass
+
+ continue
+
+ try:
+ simple_key, function = dispatch[key]
+ except KeyError:
+ continue
+ info[simple_key] = function(node)
+ return info
+ except KeyError:
+ print(renderer)
+ raise
+
+def ajax_info(item_json):
+ try:
+ info = {}
+ for key, node in item_json.items():
+ try:
+ simple_key, function = dispatch[key]
+ except KeyError:
+ continue
+ info[simple_key] = function(node)
+ return info
+ except KeyError:
+ print(item_json)
+ raise
+
+def badges_html(badges):
+ return ' | '.join(map(html.escape, badges))
+
+
+
+
+
+html_transform_dispatch = {
+ 'title': html.escape,
+ 'published': html.escape,
+ 'id': html.escape,
+ 'description': format_text_runs,
+ 'duration': html.escape,
+ 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
+ 'size': html.escape,
+ 'author': html.escape,
+ 'author_url': lambda url: html.escape(URL_ORIGIN + url),
+ 'views': html.escape,
+ 'subscriber_count': html.escape,
+ 'badges': badges_html,
+ 'playlist_index': html.escape,
+}
+
+def get_html_ready(item):
+ html_ready = {}
+ for key, value in item.items():
+ try:
+ function = html_transform_dispatch[key]
+ except KeyError:
+ continue
+ html_ready[key] = function(value)
+ return html_ready
+
+
+author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''')
+author_template = Template('''<address>By $author</address>''')
+stat_templates = (
+ Template('''<span class="views">$views</span>'''),
+ Template('''<time datetime="$datetime">$published</time>'''),
+)
+def get_video_stats(html_ready):
+ stats = []
+ if 'author' in html_ready:
+ if 'author_url' in html_ready:
+ stats.append(author_template_url.substitute(html_ready))
+ else:
+ stats.append(author_template.substitute(html_ready))
+ for stat in stat_templates:
+ try:
+ stats.append(stat.strict_substitute(html_ready))
+ except KeyError:
+ pass
+ return ' | '.join(stats)
+
+def video_item_html(item, template):
+ html_ready = get_html_ready(item)
+ video_info = {}
+ for key in ('id', 'title', 'author'):
+ try:
+ video_info[key] = html_ready[key]
+ except KeyError:
+ video_info[key] = ''
+ try:
+ video_info['duration'] = html_ready['duration']
+ except KeyError:
+ video_info['duration'] = 'Live' # livestreams don't have a duration
+
+ html_ready['video_info'] = html.escape(json.dumps(video_info) )
+ html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
+ html_ready['datetime'] = '' #TODO
+
+ html_ready['stats'] = get_video_stats(html_ready)
+
+ return template.substitute(html_ready)
+
+
+def playlist_item_html(item, template):
+ html_ready = get_html_ready(item)
+
+ html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
+ html_ready['datetime'] = '' #TODO
+ return template.substitute(html_ready)
+
+
+
+
+
+
+def make_query_string(query_string):
+ return '&'.join(key + '=' + ','.join(values) for key,values in query_string.items())
+
+def update_query_string(query_string, items):
+ parameters = urllib.parse.parse_qs(query_string)
+ parameters.update(items)
+ return make_query_string(parameters)
+
+page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
+current_page_button_template = Template('''<div class="page-button">$page</div>''')
+
+def page_buttons_html(current_page, estimated_pages, url, current_query_string):
+ if current_page <= 5:
+ page_start = 1
+ page_end = min(9, estimated_pages)
+ else:
+ page_start = current_page - 4
+ page_end = min(current_page + 4, estimated_pages)
+
+ result = ""
+ for page in range(page_start, page_end+1):
+ if page == current_page:
+ template = current_page_button_template
+ else:
+ template = page_button_template
+ result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
+ return result
+
+
+
+
+
+
+
+showing_results_for = Template('''
+ <div class="showing-results-for">
+ <div>Showing results for <a>$corrected_query</a></div>
+ <div>Search instead for <a href="$original_query_url">$original_query</a></div>
+ </div>
+''')
+
+did_you_mean = Template('''
+ <div class="did-you-mean">
+ <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
+ </div>
+''')
+
+def renderer_html(renderer, additional_info={}, current_query_string=''):
+ type = list(renderer.keys())[0]
+ renderer = renderer[type]
+ if type in ('videoRenderer', 'playlistRenderer', 'radioRenderer', 'compactVideoRenderer', 'compactPlaylistRenderer', 'compactRadioRenderer', 'gridVideoRenderer', 'gridPlaylistRenderer', 'gridRadioRenderer'):
+ info = renderer_info(renderer)
+ info.update(additional_info)
+ if type == 'compactVideoRenderer':
+ return video_item_html(info, small_video_item_template)
+ if type in ('compactPlaylistRenderer', 'compactRadioRenderer'):
+ return playlist_item_html(info, small_playlist_item_template)
+ if type in ('videoRenderer', 'gridVideoRenderer'):
+ return video_item_html(info, medium_video_item_template)
+ if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer'):
+ return playlist_item_html(info, medium_playlist_item_template)
+
+ if type == 'channelRenderer':
+ info = renderer_info(renderer)
+ html_ready = get_html_ready(info)
+ html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
+ return medium_channel_item_template.substitute(html_ready)
+
+ if type == 'movieRenderer':
+ return ''
+ print(renderer)
+ raise NotImplementedError('Unknown renderer type: ' + type)
+
+
+'videoRenderer'
+'playlistRenderer'
+'channelRenderer'
+'radioRenderer'
+'gridVideoRenderer'
+'gridPlaylistRenderer'
+
+'didYouMeanRenderer'
+'showingResultsForRenderer'
diff --git a/youtube/opensearch.xml b/youtube/opensearch.xml
index 1764138..c9de40c 100644
--- a/youtube/opensearch.xml
+++ b/youtube/opensearch.xml
@@ -1,11 +1,11 @@
-<SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/">
-<ShortName>Youtube local</ShortName>
-<Description>no CIA shit in the background</Description>
-<InputEncoding>UTF-8</InputEncoding>
-<Image width="16" height="16"></Image>
-
-<Url type="text/html" method="GET" template="http://localhost/youtube.com/search">
- <Param name="query" value="{searchTerms}"/>
-</Url>
-<SearchForm>http://localhost/youtube.com/search</SearchForm>
+<SearchPlugin xmlns="http://www.mozilla.org/2006/browser/search/">
+<ShortName>Youtube local</ShortName>
+<Description>no CIA shit in the background</Description>
+<InputEncoding>UTF-8</InputEncoding>
+<Image width="16" height="16"></Image>
+
+<Url type="text/html" method="GET" template="http://localhost/youtube.com/search">
+ <Param name="query" value="{searchTerms}"/>
+</Url>
+<SearchForm>http://localhost/youtube.com/search</SearchForm>
</SearchPlugin> \ No newline at end of file
diff --git a/youtube/playlist.py b/youtube/playlist.py
index fc09191..592d1b4 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -1,243 +1,243 @@
-import base64
-import youtube.common as common
-import urllib
-import json
-from string import Template
-import youtube.proto as proto
-import gevent
-import math
-
-with open("yt_playlist_template.html", "r") as file:
- yt_playlist_template = Template(file.read())
-
-
-
-
-
-
-def youtube_obfuscated_endian(offset):
- if offset < 128:
- return bytes((offset,))
- first_byte = 255 & offset
- second_byte = 255 & (offset >> 7)
- second_byte = second_byte | 1
-
- # The next 2 bytes encode the offset in little endian order,
- # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
- # of the offset. Instead, to get the number which the two bytes encode, that LSB
- # of the second byte is combined with the most significant bit (MSB) of the first byte
- # in a logical AND. Replace the two bits with the result of the AND to get the two little endian
- # bytes that represent the offset.
-
- return bytes((first_byte, second_byte))
-
-
-
-# just some garbage that's required, don't know what it means, if it means anything.
-ctoken_header = b'\xe2\xa9\x85\xb2\x02' # e2 a9 85 b2 02
-
-def byte(x):
- return bytes((x,))
-
-# TL;DR: the offset is hidden inside 3 nested base 64 encodes with random junk data added on the side periodically
-def create_ctoken(playlist_id, offset):
- obfuscated_offset = b'\x08' + youtube_obfuscated_endian(offset) # 0x08 slapped on for no apparent reason
- obfuscated_offset = b'PT:' + base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'')
- obfuscated_offset = b'z' + byte(len(obfuscated_offset)) + obfuscated_offset
- obfuscated_offset = base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'%3D')
-
- playlist_bytes = b'VL' + bytes(playlist_id, 'ascii')
- main_info = b'\x12' + byte(len(playlist_bytes)) + playlist_bytes + b'\x1a' + byte(len(obfuscated_offset)) + obfuscated_offset
-
- ctoken = base64.urlsafe_b64encode(ctoken_header + byte(len(main_info)) + main_info)
-
- return ctoken.decode('ascii')
-
-def playlist_ctoken(playlist_id, offset):
-
- offset = proto.uint(1, offset)
- # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
- offset = b'PT:' + proto.unpadded_b64encode(offset)
- offset = proto.string(15, offset)
-
- continuation_info = proto.string( 3, proto.percent_b64encode(offset) )
-
- playlist_id = proto.string(2, 'VL' + playlist_id )
- pointless_nest = proto.string(80226972, playlist_id + continuation_info)
-
- return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
-
-# initial request types:
-# polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
-# ajax json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418
-
-
-# continuation request types:
-# polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
-# ajax json: https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]
-
-
-headers_1 = (
- ('Accept', '*/*'),
- ('Accept-Language', 'en-US,en;q=0.5'),
- ('X-YouTube-Client-Name', '1'),
- ('X-YouTube-Client-Version', '2.20180614'),
-)
-
-def playlist_first_page(playlist_id):
- url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
- content = common.fetch_url(url, common.mobile_ua + headers_1)
- if content[0:4] == b")]}'":
- content = content[4:]
- content = json.loads(common.uppercase_escape(content.decode('utf-8')))
- return content
-
-ajax_info_dispatch = {
- 'view_count_text': ('views', common.get_text),
- 'num_videos_text': ('size', lambda node: common.get_text(node).split(' ')[0]),
- 'thumbnail': ('thumbnail', lambda node: node.url),
- 'title': ('title', common.get_text),
- 'owner_text': ('author', common.get_text),
- 'owner_endpoint': ('author_url', lambda node: node.url),
- 'description': ('description', common.get_formatted_text),
-
-}
-def metadata_info(ajax_json):
- info = {}
- try:
- for key, node in ajax_json.items():
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except (KeyError,IndexError):
- print(ajax_json)
- raise
-
-
-
-
-#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
-def get_videos_ajax(playlist_id, page):
-
- url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20)
- headers = {
- 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '2',
- 'X-YouTube-Client-Version': '1.20180508',
- }
- print("Sending playlist ajax request")
- content = common.fetch_url(url, headers)
- with open('playlist_debug', 'wb') as f:
- f.write(content)
- content = content[4:]
- print("Finished recieving playlist response")
-
- info = json.loads(common.uppercase_escape(content.decode('utf-8')))
- return info
-
-def get_playlist_videos(ajax_json):
- videos = []
- #info = get_bloated_playlist_videos(playlist_id, page)
- #print(info)
- video_list = ajax_json['content']['continuation_contents']['contents']
-
-
- for video_json_crap in video_list:
- try:
- videos.append({
- "title": video_json_crap["title"]['runs'][0]['text'],
- "id": video_json_crap["video_id"],
- "views": "",
- "duration": common.default_multi_get(video_json_crap, 'length', 'runs', 0, 'text', default=''), # livestreams dont have a length
- "author": video_json_crap['short_byline']['runs'][0]['text'],
- "author_url": '',
- "published": '',
- 'playlist_index': '',
-
- })
- except (KeyError, IndexError):
- print(video_json_crap)
- raise
- return videos
-
-def get_playlist_videos_format2(playlist_id, page):
- videos = []
- info = get_bloated_playlist_videos(playlist_id, page)
- video_list = info['response']['continuationContents']['playlistVideoListContinuation']['contents']
-
- for video_json_crap in video_list:
-
- video_json_crap = video_json_crap['videoRenderer']
-
- try:
- videos.append({
- "title": video_json_crap["title"]['runs'][0]['text'],
- "video_id": video_json_crap["videoId"],
- "views": "",
- "duration": common.default_multi_get(video_json_crap, 'lengthText', 'runs', 0, 'text', default=''), # livestreams dont have a length
- "uploader": video_json_crap['shortBylineText']['runs'][0]['text'],
- "uploader_url": common.ORIGIN_URL + video_json_crap['shortBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published": common.default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
- 'playlist_index': video_json_crap['index']['runs'][0]['text'],
-
- })
- except (KeyError, IndexError):
- print(video_json_crap)
- raise
- return videos
-
-
-def playlist_videos_html(ajax_json):
- result = ''
- for info in get_playlist_videos(ajax_json):
- result += common.small_video_item_html(info)
- return result
-
-playlist_stat_template = Template('''
-<div>$stat</div>''')
-def get_playlist_page(query_string):
- parameters = urllib.parse.parse_qs(query_string)
- playlist_id = parameters['list'][0]
- page = parameters.get("page", "1")[0]
- if page == "1":
- first_page_json = playlist_first_page(playlist_id)
- this_page_json = first_page_json
- else:
- tasks = (
- gevent.spawn(playlist_first_page, playlist_id ),
- gevent.spawn(get_videos_ajax, playlist_id, page)
- )
- gevent.joinall(tasks)
- first_page_json, this_page_json = tasks[0].value, tasks[1].value
-
- try:
- video_list = this_page_json['content']['section_list']['contents'][0]['contents'][0]['contents']
- except KeyError:
- video_list = this_page_json['content']['continuation_contents']['contents']
- videos_html = ''
- for video_json in video_list:
- info = common.ajax_info(video_json)
- videos_html += common.video_item_html(info, common.small_video_item_template)
-
-
- metadata = common.ajax_info(first_page_json['content']['playlist_header'])
- video_count = int(metadata['size'].replace(',', ''))
- page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", query_string)
-
- html_ready = common.get_html_ready(metadata)
- html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
-
- stats = ''
- stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
- stats += playlist_stat_template.substitute(stat=html_ready['views'])
- return yt_playlist_template.substitute(
- videos = videos_html,
- page_buttons = page_buttons,
- stats = stats,
- **html_ready
+import base64
+import youtube.common as common
+import urllib
+import json
+from string import Template
+import youtube.proto as proto
+import gevent
+import math
+
+with open("yt_playlist_template.html", "r") as file:
+ yt_playlist_template = Template(file.read())
+
+
+
+
+
+
+def youtube_obfuscated_endian(offset):
+ if offset < 128:
+ return bytes((offset,))
+ first_byte = 255 & offset
+ second_byte = 255 & (offset >> 7)
+ second_byte = second_byte | 1
+
+ # The next 2 bytes encode the offset in little endian order,
+ # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
+ # of the offset. Instead, to get the number which the two bytes encode, that LSB
+ # of the second byte is combined with the most significant bit (MSB) of the first byte
+ # in a logical AND. Replace the two bits with the result of the AND to get the two little endian
+ # bytes that represent the offset.
+
+ return bytes((first_byte, second_byte))
+
+
+
+# just some garbage that's required, don't know what it means, if it means anything.
+ctoken_header = b'\xe2\xa9\x85\xb2\x02' # e2 a9 85 b2 02
+
+def byte(x):
+ return bytes((x,))
+
+# TL;DR: the offset is hidden inside 3 nested base 64 encodes with random junk data added on the side periodically
+def create_ctoken(playlist_id, offset):
+ obfuscated_offset = b'\x08' + youtube_obfuscated_endian(offset) # 0x08 slapped on for no apparent reason
+ obfuscated_offset = b'PT:' + base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'')
+ obfuscated_offset = b'z' + byte(len(obfuscated_offset)) + obfuscated_offset
+ obfuscated_offset = base64.urlsafe_b64encode(obfuscated_offset).replace(b'=', b'%3D')
+
+ playlist_bytes = b'VL' + bytes(playlist_id, 'ascii')
+ main_info = b'\x12' + byte(len(playlist_bytes)) + playlist_bytes + b'\x1a' + byte(len(obfuscated_offset)) + obfuscated_offset
+
+ ctoken = base64.urlsafe_b64encode(ctoken_header + byte(len(main_info)) + main_info)
+
+ return ctoken.decode('ascii')
+
+def playlist_ctoken(playlist_id, offset):
+
+ offset = proto.uint(1, offset)
+ # this is just obfuscation as far as I can tell. It doesn't even follow protobuf
+ offset = b'PT:' + proto.unpadded_b64encode(offset)
+ offset = proto.string(15, offset)
+
+ continuation_info = proto.string( 3, proto.percent_b64encode(offset) )
+
+ playlist_id = proto.string(2, 'VL' + playlist_id )
+ pointless_nest = proto.string(80226972, playlist_id + continuation_info)
+
+ return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+# initial request types:
+# polymer_json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0
+# ajax json: https://m.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ&pbj=1&lact=0 with header X-YouTube-Client-Version: 1.20180418
+
+
+# continuation request types:
+# polymer_json: https://m.youtube.com/playlist?&ctoken=[...]&pbj=1
+# ajax json: https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=[...]
+
+
+headers_1 = (
+ ('Accept', '*/*'),
+ ('Accept-Language', 'en-US,en;q=0.5'),
+ ('X-YouTube-Client-Name', '1'),
+ ('X-YouTube-Client-Version', '2.20180614'),
+)
+
+def playlist_first_page(playlist_id):
+ url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
+ content = common.fetch_url(url, common.mobile_ua + headers_1)
+ if content[0:4] == b")]}'":
+ content = content[4:]
+ content = json.loads(common.uppercase_escape(content.decode('utf-8')))
+ return content
+
+ajax_info_dispatch = {
+ 'view_count_text': ('views', common.get_text),
+ 'num_videos_text': ('size', lambda node: common.get_text(node).split(' ')[0]),
+ 'thumbnail': ('thumbnail', lambda node: node.url),
+ 'title': ('title', common.get_text),
+ 'owner_text': ('author', common.get_text),
+ 'owner_endpoint': ('author_url', lambda node: node.url),
+ 'description': ('description', common.get_formatted_text),
+
+}
+def metadata_info(ajax_json):
+ info = {}
+ try:
+ for key, node in ajax_json.items():
+ try:
+ simple_key, function = dispatch[key]
+ except KeyError:
+ continue
+ info[simple_key] = function(node)
+ return info
+ except (KeyError,IndexError):
+ print(ajax_json)
+ raise
+
+
+
+
+#https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1
+def get_videos_ajax(playlist_id, page):
+
+ url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20)
+ headers = {
+ 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
+ 'Accept': '*/*',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'X-YouTube-Client-Name': '2',
+ 'X-YouTube-Client-Version': '1.20180508',
+ }
+ print("Sending playlist ajax request")
+ content = common.fetch_url(url, headers)
+ with open('playlist_debug', 'wb') as f:
+ f.write(content)
+ content = content[4:]
+ print("Finished recieving playlist response")
+
+ info = json.loads(common.uppercase_escape(content.decode('utf-8')))
+ return info
+
+def get_playlist_videos(ajax_json):
+ videos = []
+ #info = get_bloated_playlist_videos(playlist_id, page)
+ #print(info)
+ video_list = ajax_json['content']['continuation_contents']['contents']
+
+
+ for video_json_crap in video_list:
+ try:
+ videos.append({
+ "title": video_json_crap["title"]['runs'][0]['text'],
+ "id": video_json_crap["video_id"],
+ "views": "",
+ "duration": common.default_multi_get(video_json_crap, 'length', 'runs', 0, 'text', default=''), # livestreams dont have a length
+ "author": video_json_crap['short_byline']['runs'][0]['text'],
+ "author_url": '',
+ "published": '',
+ 'playlist_index': '',
+
+ })
+ except (KeyError, IndexError):
+ print(video_json_crap)
+ raise
+ return videos
+
+def get_playlist_videos_format2(playlist_id, page):
+ videos = []
+ info = get_bloated_playlist_videos(playlist_id, page)
+ video_list = info['response']['continuationContents']['playlistVideoListContinuation']['contents']
+
+ for video_json_crap in video_list:
+
+ video_json_crap = video_json_crap['videoRenderer']
+
+ try:
+ videos.append({
+ "title": video_json_crap["title"]['runs'][0]['text'],
+ "video_id": video_json_crap["videoId"],
+ "views": "",
+ "duration": common.default_multi_get(video_json_crap, 'lengthText', 'runs', 0, 'text', default=''), # livestreams dont have a length
+ "uploader": video_json_crap['shortBylineText']['runs'][0]['text'],
+ "uploader_url": common.ORIGIN_URL + video_json_crap['shortBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ "published": common.default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
+ 'playlist_index': video_json_crap['index']['runs'][0]['text'],
+
+ })
+ except (KeyError, IndexError):
+ print(video_json_crap)
+ raise
+ return videos
+
+
+def playlist_videos_html(ajax_json):
+ result = ''
+ for info in get_playlist_videos(ajax_json):
+ result += common.small_video_item_html(info)
+ return result
+
+playlist_stat_template = Template('''
+<div>$stat</div>''')
+def get_playlist_page(query_string):
+ parameters = urllib.parse.parse_qs(query_string)
+ playlist_id = parameters['list'][0]
+ page = parameters.get("page", "1")[0]
+ if page == "1":
+ first_page_json = playlist_first_page(playlist_id)
+ this_page_json = first_page_json
+ else:
+ tasks = (
+ gevent.spawn(playlist_first_page, playlist_id ),
+ gevent.spawn(get_videos_ajax, playlist_id, page)
+ )
+ gevent.joinall(tasks)
+ first_page_json, this_page_json = tasks[0].value, tasks[1].value
+
+ try:
+ video_list = this_page_json['content']['section_list']['contents'][0]['contents'][0]['contents']
+ except KeyError:
+ video_list = this_page_json['content']['continuation_contents']['contents']
+ videos_html = ''
+ for video_json in video_list:
+ info = common.ajax_info(video_json)
+ videos_html += common.video_item_html(info, common.small_video_item_template)
+
+
+ metadata = common.ajax_info(first_page_json['content']['playlist_header'])
+ video_count = int(metadata['size'].replace(',', ''))
+ page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", query_string)
+
+ html_ready = common.get_html_ready(metadata)
+ html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
+
+ stats = ''
+ stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
+ stats += playlist_stat_template.substitute(stat=html_ready['views'])
+ return yt_playlist_template.substitute(
+ videos = videos_html,
+ page_buttons = page_buttons,
+ stats = stats,
+ **html_ready
) \ No newline at end of file
diff --git a/youtube/proto.py b/youtube/proto.py
index 9f9dbcc..6230e51 100644
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -1,65 +1,65 @@
-from math import ceil
-import base64
-
-def byte(n):
- return bytes((n,))
-
-
-def varint_encode(offset):
- '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
- The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
- aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
- 1ccccccc 1bbbbbbb 0aaaaaaa
-
- This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
- See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
- needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
- encoded_bytes = bytearray(needed_bytes)
- for i in range(0, needed_bytes - 1):
- encoded_bytes[i] = (offset & 127) | 128 # 7 least significant bits
- offset = offset >> 7
- encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
-
- return bytes(encoded_bytes)
-
-
-def varint_decode(encoded):
- decoded = 0
- for i, byte in enumerate(encoded):
- decoded |= (byte & 127) << 7*i
-
- if not (byte & 128):
- break
- return decoded
-
-
-def string(field_number, data):
- data = as_bytes(data)
- return _proto_field(2, field_number, varint_encode(len(data)) + data)
-nested = string
-
-def uint(field_number, value):
- return _proto_field(0, field_number, varint_encode(value))
-
-
-
-
-def _proto_field(wire_type, field_number, data):
- ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
- return varint_encode( (field_number << 3) | wire_type) + data
-
-
-
-def percent_b64encode(data):
- return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
-
-
-def unpadded_b64encode(data):
- return base64.urlsafe_b64encode(data).replace(b'=', b'')
-
-def as_bytes(value):
- if isinstance(value, str):
- return value.encode('ascii')
- return value
-
+from math import ceil
+import base64
+
+def byte(n):
+ return bytes((n,))
+
+
+def varint_encode(offset):
+ '''In this encoding system, for each 8-bit byte, the first bit is 1 if there are more bytes, and 0 is this is the last one.
+ The next 7 bits are data. These 7-bit sections represent the data in Little endian order. For example, suppose the data is
+ aaaaaaabbbbbbbccccccc (each of these sections is 7 bits). It will be encoded as:
+ 1ccccccc 1bbbbbbb 0aaaaaaa
+
+ This encoding is used in youtube parameters to encode offsets and to encode the length for length-prefixed data.
+ See https://developers.google.com/protocol-buffers/docs/encoding#varints for more info.'''
+ needed_bytes = ceil(offset.bit_length()/7) or 1 # (0).bit_length() returns 0, but we need 1 in that case.
+ encoded_bytes = bytearray(needed_bytes)
+ for i in range(0, needed_bytes - 1):
+ encoded_bytes[i] = (offset & 127) | 128 # 7 least significant bits
+ offset = offset >> 7
+ encoded_bytes[-1] = offset & 127 # leave first bit as zero for last byte
+
+ return bytes(encoded_bytes)
+
+
+def varint_decode(encoded):
+ decoded = 0
+ for i, byte in enumerate(encoded):
+ decoded |= (byte & 127) << 7*i
+
+ if not (byte & 128):
+ break
+ return decoded
+
+
+def string(field_number, data):
+ data = as_bytes(data)
+ return _proto_field(2, field_number, varint_encode(len(data)) + data)
+nested = string
+
+def uint(field_number, value):
+ return _proto_field(0, field_number, varint_encode(value))
+
+
+
+
+def _proto_field(wire_type, field_number, data):
+ ''' See https://developers.google.com/protocol-buffers/docs/encoding#structure '''
+ return varint_encode( (field_number << 3) | wire_type) + data
+
+
+
+def percent_b64encode(data):
+ return base64.urlsafe_b64encode(data).replace(b'=', b'%3D')
+
+
+def unpadded_b64encode(data):
+ return base64.urlsafe_b64encode(data).replace(b'=', b'')
+
+def as_bytes(value):
+ if isinstance(value, str):
+ return value.encode('ascii')
+ return value
+
\ No newline at end of file
diff --git a/youtube/search.py b/youtube/search.py
index 5268dbe..5982d9b 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -1,231 +1,231 @@
-import json
-import urllib
-import html
-from string import Template
-import base64
-from math import ceil
-from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
-import youtube.common as common
-
-with open("yt_search_results_template.html", "r") as file:
- yt_search_results_template = file.read()
-
-with open("yt_search_template.html", "r") as file:
- yt_search_template = file.read()
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-video_result_template = '''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$video_url" title="$video_title">
- <img class="video-thumbnail-img" src="$thumbnail_url">
- <span class="video-duration">$length</span>
- </a>
-
- <a class="title" href="$video_url">$video_title</a>
-
- <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address>
- <span class="views">$views</span>
-
-
- <time datetime="$datetime">Uploaded $upload_date</time>
-
- <span class="description">$description</span>
- </div>
-'''
-
-
-
-# Sort: 1
- # Upload date: 2
- # View count: 3
- # Rating: 1
-# Offset: 9
-# Filters: 2
- # Upload date: 1
- # Type: 2
- # Duration: 3
-
-
-features = {
- '4k': 14,
- 'hd': 4,
- 'hdr': 25,
- 'subtitles': 5,
- 'creative_commons': 6,
- '3d': 7,
- 'live': 8,
- 'purchased': 9,
- '360': 15,
- 'location': 23,
-}
-
-def page_number_to_sp_parameter(page):
- offset = (int(page) - 1)*20 # 20 results per page
- first_byte = 255 & offset
- second_byte = 255 & (offset >> 7)
- second_byte = second_byte | 1
-
- # 0b01001000 is required, and is always the same.
- # The next 2 bytes encode the offset in little endian order,
- # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
- # of the offset. Instead, to get the number which the two bytes encode, that LSB
- # of the second byte is combined with the most significant bit (MSB) of the first byte
- # in a logical AND. Replace the two bits with the result of the AND to get the two little endian
- # bytes that represent the offset.
- # I figured this out by trial and error on the sp parameter. I don't know why it's done like this;
- # perhaps it's just obfuscation.
- param_bytes = bytes((0b01001000, first_byte, second_byte))
- param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes))
- return param_encoded
-
-def get_search_json(query, page):
- url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
- headers = {
- 'Host': 'www.youtube.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'X-YouTube-Client-Name': '1',
- 'X-YouTube-Client-Version': '2.20180418',
- }
- url += "&pbj=1&sp=" + page_number_to_sp_parameter(page)
- content = common.fetch_url(url, headers=headers)
- info = json.loads(content)
- return info
-
-"""def get_search_info(query, page):
- result_info = dict()
- info = get_bloated_search_info(query, page)
-
- estimated_results = int(info[1]['response']['estimatedResults'])
- estimated_pages = ceil(estimated_results/20)
- result_info['estimated_results'] = estimated_results
- result_info['estimated_pages'] = estimated_pages
-
- result_info['results'] = []
- # this is what you get when you hire H-1B's
- video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
-
-
- for video_json_crap in video_list:
- # they have a dictionary whose only content is another dictionary...
- try:
- type = list(video_json_crap.keys())[0]
- except KeyError:
- continue #channelRenderer or playlistRenderer
- '''description = ""
- for text_run in video_json_crap["descriptionSnippet"]["runs"]:
- if text_run.get("bold", False):
- description += "<b>" + html.escape'''
- try:
- result_info['results'].append({
- "title": video_json_crap["title"]["simpleText"],
- "video_id": video_json_crap["videoId"],
- "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
- "thumbnail": get_thumbnail_url(video_json_crap["videoId"]),
- "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'],
- "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "uploader": video_json_crap['longBylineText']['runs'][0]['text'],
- "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
-
- })
- except KeyError:
- print(video_json_crap)
- raise
- return result_info"""
-
-
-def page_buttons_html(page_start, page_end, current_page, query):
- result = ""
- for page in range(page_start, page_end+1):
- if page == current_page:
- template = current_page_button_template
- else:
- template = page_button_template
- result += template.substitute(page=page, href=URL_ORIGIN + "/search?query=" + urllib.parse.quote_plus(query) + "&page=" + str(page))
- return result
-
-showing_results_for = Template('''
- <div>Showing results for <a>$corrected_query</a></div>
- <div>Search instead for <a href="$original_query_url">$original_query</a></div>
-''')
-did_you_mean = Template('''
- <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
-''')
-def get_search_page(query_string, parameters=()):
- qs_query = urllib.parse.parse_qs(query_string)
- if len(qs_query) == 0:
- return yt_search_template
- query = qs_query["query"][0]
- page = qs_query.get("page", "1")[0]
-
- info = get_search_json(query, page)
-
- estimated_results = int(info[1]['response']['estimatedResults'])
- estimated_pages = ceil(estimated_results/20)
- results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
-
- corrections = ''
- result_list_html = ""
- for renderer in results:
- type = list(renderer.keys())[0]
- if type == 'shelfRenderer':
- continue
- if type == 'didYouMeanRenderer':
- renderer = renderer[type]
- corrected_query_string = urllib.parse.parse_qs(query_string)
- corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
- corrected_query_url = URL_ORIGIN + '/search?' + common.make_query_string(corrected_query_string)
- corrections = did_you_mean.substitute(
- corrected_query_url = corrected_query_url,
- corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
- )
- continue
- if type == 'showingResultsForRenderer':
- renderer = renderer[type]
- no_autocorrect_query_string = urllib.parse.parse_qs(query_string)
- no_autocorrect_query_string['autocorrect'] = ['0']
- no_autocorrect_query_url = URL_ORIGIN + '/search?' + common.make_query_string(no_autocorrect_query_string)
- corrections = showing_results_for.substitute(
- corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
- original_query_url = no_autocorrect_query_url,
- original_query = html.escape(renderer['originalQuery']['simpleText']),
- )
- continue
- result_list_html += common.renderer_html(renderer, current_query_string=query_string)
- '''type = list(result.keys())[0]
- result = result[type]
- if type == "showingResultsForRenderer":
- url = URL_ORIGIN + "/search"
- if len(parameters) > 0:
- url += ';' + ';'.join(parameters)
- url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items())
-
- result_list_html += showing_results_for_template.substitute(
- corrected_query=common.format_text_runs(result['correctedQuery']['runs']),
-
- )
- else:
- result_list_html += common.html_functions[type](result)'''
-
- page = int(page)
- if page <= 5:
- page_start = 1
- page_end = min(9, estimated_pages)
- else:
- page_start = page - 4
- page_end = min(page + 4, estimated_pages)
-
-
- result = Template(yt_search_results_template).substitute(
- results = result_list_html,
- page_title = query + " - Search",
- search_box_value = html.escape(query),
- number_of_results = '{:,}'.format(estimated_results),
- number_of_pages = '{:,}'.format(estimated_pages),
- page_buttons = page_buttons_html(page_start, page_end, page, query),
- corrections = corrections
- )
+import json
+import urllib
+import html
+from string import Template
+import base64
+from math import ceil
+from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
+import youtube.common as common
+
+with open("yt_search_results_template.html", "r") as file:
+ yt_search_results_template = file.read()
+
+with open("yt_search_template.html", "r") as file:
+ yt_search_template = file.read()
+
+page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
+current_page_button_template = Template('''<div class="page-button">$page</div>''')
+video_result_template = '''
+ <div class="medium-item">
+ <a class="video-thumbnail-box" href="$video_url" title="$video_title">
+ <img class="video-thumbnail-img" src="$thumbnail_url">
+ <span class="video-duration">$length</span>
+ </a>
+
+ <a class="title" href="$video_url">$video_title</a>
+
+ <address>Uploaded by <a href="$uploader_channel_url">$uploader</a></address>
+ <span class="views">$views</span>
+
+
+ <time datetime="$datetime">Uploaded $upload_date</time>
+
+ <span class="description">$description</span>
+ </div>
+'''
+
+
+
+# Sort: 1
+ # Upload date: 2
+ # View count: 3
+ # Rating: 1
+# Offset: 9
+# Filters: 2
+ # Upload date: 1
+ # Type: 2
+ # Duration: 3
+
+
+features = {
+ '4k': 14,
+ 'hd': 4,
+ 'hdr': 25,
+ 'subtitles': 5,
+ 'creative_commons': 6,
+ '3d': 7,
+ 'live': 8,
+ 'purchased': 9,
+ '360': 15,
+ 'location': 23,
+}
+
+def page_number_to_sp_parameter(page):
+ offset = (int(page) - 1)*20 # 20 results per page
+ first_byte = 255 & offset
+ second_byte = 255 & (offset >> 7)
+ second_byte = second_byte | 1
+
+ # 0b01001000 is required, and is always the same.
+ # The next 2 bytes encode the offset in little endian order,
+ # BUT, it's done in a strange way. The least significant bit (LSB) of the second byte is not part
+ # of the offset. Instead, to get the number which the two bytes encode, that LSB
+ # of the second byte is combined with the most significant bit (MSB) of the first byte
+ # in a logical AND. Replace the two bits with the result of the AND to get the two little endian
+ # bytes that represent the offset.
+ # I figured this out by trial and error on the sp parameter. I don't know why it's done like this;
+ # perhaps it's just obfuscation.
+ param_bytes = bytes((0b01001000, first_byte, second_byte))
+ param_encoded = urllib.parse.quote(base64.urlsafe_b64encode(param_bytes))
+ return param_encoded
+
+def get_search_json(query, page):
+ url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
+ headers = {
+ 'Host': 'www.youtube.com',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
+ 'Accept': '*/*',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'X-YouTube-Client-Name': '1',
+ 'X-YouTube-Client-Version': '2.20180418',
+ }
+ url += "&pbj=1&sp=" + page_number_to_sp_parameter(page)
+ content = common.fetch_url(url, headers=headers)
+ info = json.loads(content)
+ return info
+
+"""def get_search_info(query, page):
+ result_info = dict()
+ info = get_bloated_search_info(query, page)
+
+ estimated_results = int(info[1]['response']['estimatedResults'])
+ estimated_pages = ceil(estimated_results/20)
+ result_info['estimated_results'] = estimated_results
+ result_info['estimated_pages'] = estimated_pages
+
+ result_info['results'] = []
+ # this is what you get when you hire H-1B's
+ video_list = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
+
+
+ for video_json_crap in video_list:
+ # they have a dictionary whose only content is another dictionary...
+ try:
+ type = list(video_json_crap.keys())[0]
+ except KeyError:
+ continue #channelRenderer or playlistRenderer
+ '''description = ""
+ for text_run in video_json_crap["descriptionSnippet"]["runs"]:
+ if text_run.get("bold", False):
+ description += "<b>" + html.escape'''
+ try:
+ result_info['results'].append({
+ "title": video_json_crap["title"]["simpleText"],
+ "video_id": video_json_crap["videoId"],
+ "description": video_json_crap.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
+ "thumbnail": get_thumbnail_url(video_json_crap["videoId"]),
+ "views_text": video_json_crap['viewCountText'].get('simpleText', None) or video_json_crap['viewCountText']['runs'][0]['text'],
+ "length_text": default_multi_get(video_json_crap, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
+ "uploader": video_json_crap['longBylineText']['runs'][0]['text'],
+ "uploader_url": URL_ORIGIN + video_json_crap['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ "published_time_text": default_multi_get(video_json_crap, 'publishedTimeText', 'simpleText', default=''),
+
+ })
+ except KeyError:
+ print(video_json_crap)
+ raise
+ return result_info"""
+
+
+def page_buttons_html(page_start, page_end, current_page, query):
+ result = ""
+ for page in range(page_start, page_end+1):
+ if page == current_page:
+ template = current_page_button_template
+ else:
+ template = page_button_template
+ result += template.substitute(page=page, href=URL_ORIGIN + "/search?query=" + urllib.parse.quote_plus(query) + "&page=" + str(page))
+ return result
+
+showing_results_for = Template('''
+ <div>Showing results for <a>$corrected_query</a></div>
+ <div>Search instead for <a href="$original_query_url">$original_query</a></div>
+''')
+did_you_mean = Template('''
+ <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
+''')
+def get_search_page(query_string, parameters=()):
+ qs_query = urllib.parse.parse_qs(query_string)
+ if len(qs_query) == 0:
+ return yt_search_template
+ query = qs_query["query"][0]
+ page = qs_query.get("page", "1")[0]
+
+ info = get_search_json(query, page)
+
+ estimated_results = int(info[1]['response']['estimatedResults'])
+ estimated_pages = ceil(estimated_results/20)
+ results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents']
+
+ corrections = ''
+ result_list_html = ""
+ for renderer in results:
+ type = list(renderer.keys())[0]
+ if type == 'shelfRenderer':
+ continue
+ if type == 'didYouMeanRenderer':
+ renderer = renderer[type]
+ corrected_query_string = urllib.parse.parse_qs(query_string)
+ corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
+ corrected_query_url = URL_ORIGIN + '/search?' + common.make_query_string(corrected_query_string)
+ corrections = did_you_mean.substitute(
+ corrected_query_url = corrected_query_url,
+ corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
+ )
+ continue
+ if type == 'showingResultsForRenderer':
+ renderer = renderer[type]
+ no_autocorrect_query_string = urllib.parse.parse_qs(query_string)
+ no_autocorrect_query_string['autocorrect'] = ['0']
+ no_autocorrect_query_url = URL_ORIGIN + '/search?' + common.make_query_string(no_autocorrect_query_string)
+ corrections = showing_results_for.substitute(
+ corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
+ original_query_url = no_autocorrect_query_url,
+ original_query = html.escape(renderer['originalQuery']['simpleText']),
+ )
+ continue
+ result_list_html += common.renderer_html(renderer, current_query_string=query_string)
+ '''type = list(result.keys())[0]
+ result = result[type]
+ if type == "showingResultsForRenderer":
+ url = URL_ORIGIN + "/search"
+ if len(parameters) > 0:
+ url += ';' + ';'.join(parameters)
+ url += '?' + '&'.join(key + '=' + ','.join(values) for key,values in qs_query.items())
+
+ result_list_html += showing_results_for_template.substitute(
+ corrected_query=common.format_text_runs(result['correctedQuery']['runs']),
+
+ )
+ else:
+ result_list_html += common.html_functions[type](result)'''
+
+ page = int(page)
+ if page <= 5:
+ page_start = 1
+ page_end = min(9, estimated_pages)
+ else:
+ page_start = page - 4
+ page_end = min(page + 4, estimated_pages)
+
+
+ result = Template(yt_search_results_template).substitute(
+ results = result_list_html,
+ page_title = query + " - Search",
+ search_box_value = html.escape(query),
+ number_of_results = '{:,}'.format(estimated_results),
+ number_of_pages = '{:,}'.format(estimated_pages),
+ page_buttons = page_buttons_html(page_start, page_end, page, query),
+ corrections = corrections
+ )
return result \ No newline at end of file
diff --git a/youtube/shared.css b/youtube/shared.css
index 39e76f4..2ea511a 100644
--- a/youtube/shared.css
+++ b/youtube/shared.css
@@ -1,271 +1,271 @@
-h1, h2, h3, h4, h5, h6, div{
- margin:0;
- padding:0;
-
-}
-
-
-body{
- margin:0;
- padding: 0;
- color:#222;
-
-
- background-color:#cccccc;
-
- min-height:100vh;
-
- display:grid;
- grid-template-rows: 50px 1fr;
-}
-
- header{
- background-color:#333333;
-
- grid-row: 1;
- }
-
- main{
- grid-row: 2;
- }
-
-button{
- padding:0; /* Fuck browser-specific styling. Fix your shit mozilla */
-}
-address{
- font-style:normal;
-}
-#site-search{
- display: grid;
- grid-template-columns: 1fr 0fr;
-
-}
-
- #site-search .search-box{
- align-self:center;
- height:25px;
- border:0;
-
- grid-column: 1;
- }
- #site-search .search-button{
- grid-column: 2;
- align-self:center;
- height:25px;
-
- border-style:solid;
- border-width:1px;
- }
-
-
-.full-item{
- display: grid;
- grid-template-rows: 0fr 0fr 0fr 0fr 0fr;
- grid-template-columns: 1fr 1fr;
-
-}
- .full-item video{
- grid-column: 1 / span 2;
- grid-row: 1;
- }
- .full-item .title{
- grid-column: 1 / span 2;
- grid-row:2;
- min-width: 0;
- }
- .full-item address{
- grid-column: 1;
- grid-row: 3;
- justify-self: start;
- }
- .full-item .views{
- grid-column: 2;
- grid-row: 3;
- justify-self:end;
- }
- .full-item time{
- grid-column: 1;
- grid-row: 4;
- justify-self:start;
- }
- .full-item .likes-dislikes{
- grid-column: 2;
- grid-row: 4;
- justify-self:end;
- }
- .full-item .description{
- background-color:#d0d0d0;
- margin-top:8px;
- white-space: pre-line;
- min-width: 0;
-
- grid-column: 1 / span 2;
- grid-row: 5;
- }
-
-.medium-item{
- background-color:#bcbcbc;
- display: grid;
- align-content: start;
- grid-template-columns: 246px 1fr 0fr;
- grid-template-rows: 0fr 0fr 0fr 0fr 0fr 1fr;
-}
- .medium-item .title{
- grid-column:2 / span 2;
- grid-row:1;
- min-width: 0;
- }
- .medium-item address{
- display:inline;
- }
- /*.medium-item .views{
- grid-column: 3;
- grid-row: 2;
- justify-self:end;
- }
- .medium-item time{
- grid-column: 2;
- grid-row: 3;
- justify-self:start;
- }*/
- .medium-item .stats{
- grid-column: 2 / span 2;
- grid-row: 2;
- }
-
- .medium-item .description{
- grid-column: 2 / span 2;
- grid-row: 4;
- }
- .medium-item .badges{
- grid-column: 2 / span 2;
- grid-row: 5;
- }
- /* thumbnail size */
- .medium-item img{
- /*height:138px;
- width:246px;*/
- height:100%;
- justify-self:center;
- }
-
-.small-item-box{
- color: #767676;
- font-size: 12px;
-
- display:grid;
- grid-template-columns: 1fr 0fr;
- grid-template-rows: 94px;
-}
-
-.small-item{
- background-color:#bcbcbc;
- align-content: start;
- text-decoration:none;
-
- display: grid;
- grid-template-columns: 168px 1fr;
- grid-column-gap: 5px;
- grid-template-rows: 0fr 0fr 0fr 1fr;
-}
- .small-item .title{
- grid-column:2;
- grid-row:1;
- margin:0;
-
- color: #333;
- font-size: 16px;
- font-weight: 500;
- text-decoration:initial;
- min-width: 0;
- }
- .small-item address{
- grid-column: 2;
- grid-row: 2;
- justify-self: start;
- }
-
- .small-item .views{
- grid-column: 2;
- grid-row: 3;
- justify-self:start;
- }
- /* thumbnail size */
- .small-item img{
- /*height:94px;
- width:168px;*/
- height:100%;
- justify-self:center;
- }
-
-.item-checkbox{
- justify-self:start;
- align-self:center;
- height:30px;
- width:30px;
-
- grid-column: 2;
-}
-
-/* ---Thumbnails for videos---- */
-.video-thumbnail-box{
- grid-column:1;
- grid-row:1 / span 6;
-
- display:grid;
- grid-template-columns: 1fr 0fr;
-}
- .video-thumbnail-img{
- grid-column:1 / span 2;
- grid-row:1;
- }
- .video-duration{
- grid-column: 2;
- grid-row: 1;
- align-self: end;
- opacity: .8;
- color: #ffffff;
- font-size: 12px;
- background-color: #000000;
- }
-
-/* ---Thumbnails for playlists---- */
-.playlist-thumbnail-box{
- grid-column:1;
- grid-row:1 / span 5;
-
- display:grid;
- grid-template-columns: 3fr 2fr;
-}
- .playlist-thumbnail-img{
- grid-column:1 / span 2;
- grid-row:1;
- }
- .playlist-thumbnail-info{
- grid-column:2;
- grid-row:1;
-
- display: grid;
- align-items:center;
-
- text-align:center;
- white-space: pre-line;
- opacity: .8;
- color: #cfcfcf;
- background-color: #000000;
- }
-
-.page-button-row{
- justify-self:center;
- display: grid;
- grid-auto-columns: 40px;
- grid-auto-flow: column;
- height: 40px;
-}
- .page-button{
- background-color: #e9e9e9;
- border-style: outset;
- border-width: 2px;
- font-weight: bold;
- text-align: center;
+h1, h2, h3, h4, h5, h6, div{
+ margin:0;
+ padding:0;
+
+}
+
+
+body{
+ margin:0;
+ padding: 0;
+ color:#222;
+
+
+ background-color:#cccccc;
+
+ min-height:100vh;
+
+ display:grid;
+ grid-template-rows: 50px 1fr;
+}
+
+ header{
+ background-color:#333333;
+
+ grid-row: 1;
+ }
+
+ main{
+ grid-row: 2;
+ }
+
+button{
+ padding:0; /* Fuck browser-specific styling. Fix your shit mozilla */
+}
+address{
+ font-style:normal;
+}
+#site-search{
+ display: grid;
+ grid-template-columns: 1fr 0fr;
+
+}
+
+ #site-search .search-box{
+ align-self:center;
+ height:25px;
+ border:0;
+
+ grid-column: 1;
+ }
+ #site-search .search-button{
+ grid-column: 2;
+ align-self:center;
+ height:25px;
+
+ border-style:solid;
+ border-width:1px;
+ }
+
+
+.full-item{
+ display: grid;
+ grid-template-rows: 0fr 0fr 0fr 0fr 0fr;
+ grid-template-columns: 1fr 1fr;
+
+}
+ .full-item video{
+ grid-column: 1 / span 2;
+ grid-row: 1;
+ }
+ .full-item .title{
+ grid-column: 1 / span 2;
+ grid-row:2;
+ min-width: 0;
+ }
+ .full-item address{
+ grid-column: 1;
+ grid-row: 3;
+ justify-self: start;
+ }
+ .full-item .views{
+ grid-column: 2;
+ grid-row: 3;
+ justify-self:end;
+ }
+ .full-item time{
+ grid-column: 1;
+ grid-row: 4;
+ justify-self:start;
+ }
+ .full-item .likes-dislikes{
+ grid-column: 2;
+ grid-row: 4;
+ justify-self:end;
+ }
+ .full-item .description{
+ background-color:#d0d0d0;
+ margin-top:8px;
+ white-space: pre-line;
+ min-width: 0;
+
+ grid-column: 1 / span 2;
+ grid-row: 5;
+ }
+
+.medium-item{
+ background-color:#bcbcbc;
+ display: grid;
+ align-content: start;
+ grid-template-columns: 246px 1fr 0fr;
+ grid-template-rows: 0fr 0fr 0fr 0fr 0fr 1fr;
+}
+ .medium-item .title{
+ grid-column:2 / span 2;
+ grid-row:1;
+ min-width: 0;
+ }
+ .medium-item address{
+ display:inline;
+ }
+ /*.medium-item .views{
+ grid-column: 3;
+ grid-row: 2;
+ justify-self:end;
+ }
+ .medium-item time{
+ grid-column: 2;
+ grid-row: 3;
+ justify-self:start;
+ }*/
+ .medium-item .stats{
+ grid-column: 2 / span 2;
+ grid-row: 2;
+ }
+
+ .medium-item .description{
+ grid-column: 2 / span 2;
+ grid-row: 4;
+ }
+ .medium-item .badges{
+ grid-column: 2 / span 2;
+ grid-row: 5;
+ }
+ /* thumbnail size */
+ .medium-item img{
+ /*height:138px;
+ width:246px;*/
+ height:100%;
+ justify-self:center;
+ }
+
+.small-item-box{
+ color: #767676;
+ font-size: 12px;
+
+ display:grid;
+ grid-template-columns: 1fr 0fr;
+ grid-template-rows: 94px;
+}
+
+.small-item{
+ background-color:#bcbcbc;
+ align-content: start;
+ text-decoration:none;
+
+ display: grid;
+ grid-template-columns: 168px 1fr;
+ grid-column-gap: 5px;
+ grid-template-rows: 0fr 0fr 0fr 1fr;
+}
+ .small-item .title{
+ grid-column:2;
+ grid-row:1;
+ margin:0;
+
+ color: #333;
+ font-size: 16px;
+ font-weight: 500;
+ text-decoration:initial;
+ min-width: 0;
+ }
+ .small-item address{
+ grid-column: 2;
+ grid-row: 2;
+ justify-self: start;
+ }
+
+ .small-item .views{
+ grid-column: 2;
+ grid-row: 3;
+ justify-self:start;
+ }
+ /* thumbnail size */
+ .small-item img{
+ /*height:94px;
+ width:168px;*/
+ height:100%;
+ justify-self:center;
+ }
+
+.item-checkbox{
+ justify-self:start;
+ align-self:center;
+ height:30px;
+ width:30px;
+
+ grid-column: 2;
+}
+
+/* ---Thumbnails for videos---- */
+.video-thumbnail-box{
+ grid-column:1;
+ grid-row:1 / span 6;
+
+ display:grid;
+ grid-template-columns: 1fr 0fr;
+}
+ .video-thumbnail-img{
+ grid-column:1 / span 2;
+ grid-row:1;
+ }
+ .video-duration{
+ grid-column: 2;
+ grid-row: 1;
+ align-self: end;
+ opacity: .8;
+ color: #ffffff;
+ font-size: 12px;
+ background-color: #000000;
+ }
+
+/* ---Thumbnails for playlists---- */
+.playlist-thumbnail-box{
+ grid-column:1;
+ grid-row:1 / span 5;
+
+ display:grid;
+ grid-template-columns: 3fr 2fr;
+}
+ .playlist-thumbnail-img{
+ grid-column:1 / span 2;
+ grid-row:1;
+ }
+ .playlist-thumbnail-info{
+ grid-column:2;
+ grid-row:1;
+
+ display: grid;
+ align-items:center;
+
+ text-align:center;
+ white-space: pre-line;
+ opacity: .8;
+ color: #cfcfcf;
+ background-color: #000000;
+ }
+
+.page-button-row{
+ justify-self:center;
+ display: grid;
+ grid-auto-columns: 40px;
+ grid-auto-flow: column;
+ height: 40px;
+}
+ .page-button{
+ background-color: #e9e9e9;
+ border-style: outset;
+ border-width: 2px;
+ font-weight: bold;
+ text-align: center;
} \ No newline at end of file
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 5edf6fc..47f1ea3 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -1,18 +1,18 @@
-import urllib
-
-with open("subscriptions.txt", 'r', encoding='utf-8') as file:
- subscriptions = file.read()
-
-# Line format: "channel_id channel_name"
-# Example:
-# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown
-
-subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines())
-
-def get_new_videos():
- for channel_id, channel_name in subscriptions:
-
-
-
-
-def get_subscriptions_page():
+import urllib
+
+with open("subscriptions.txt", 'r', encoding='utf-8') as file:
+ subscriptions = file.read()
+
+# Line format: "channel_id channel_name"
+# Example:
+# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown
+
+subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines())
+
+def get_new_videos():
+ for channel_id, channel_name in subscriptions:
+
+
+
+
+def get_subscriptions_page():
diff --git a/youtube/template.py b/youtube/template.py
index 7f13415..b6df1ef 100644
--- a/youtube/template.py
+++ b/youtube/template.py
@@ -1,132 +1,132 @@
-
-import re as _re
-from collections import ChainMap as _ChainMap
-
-class _TemplateMetaclass(type):
- pattern = r"""
- %(delim)s(?:
- (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
- (?P<named>%(id)s) | # delimiter and a Python identifier
- {(?P<braced>%(id)s)} | # delimiter and a braced identifier
- (?P<invalid>) # Other ill-formed delimiter exprs
- )
- """
-
- def __init__(cls, name, bases, dct):
- super(_TemplateMetaclass, cls).__init__(name, bases, dct)
- if 'pattern' in dct:
- pattern = cls.pattern
- else:
- pattern = _TemplateMetaclass.pattern % {
- 'delim' : _re.escape(cls.delimiter),
- 'id' : cls.idpattern,
- }
- cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
-
-
-class Template(metaclass=_TemplateMetaclass):
- """A string class for supporting $-substitutions."""
-
- delimiter = '$'
- idpattern = r'[_a-z][_a-z0-9]*'
- flags = _re.IGNORECASE
-
- def __init__(self, template):
- self.template = template
-
- # Search for $$, $identifier, ${identifier}, and any bare $'s
-
- def _invalid(self, mo):
- i = mo.start('invalid')
- lines = self.template[:i].splitlines(keepends=True)
- if not lines:
- colno = 1
- lineno = 1
- else:
- colno = i - len(''.join(lines[:-1]))
- lineno = len(lines)
- raise ValueError('Invalid placeholder in string: line %d, col %d' %
- (lineno, colno))
-
- def substitute(*args, **kws):
- if not args:
- raise TypeError("descriptor 'substitute' of 'Template' object "
- "needs an argument")
- self, *args = args # allow the "self" keyword be passed
- if len(args) > 1:
- raise TypeError('Too many positional arguments')
- if not args:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, args[0])
- else:
- mapping = args[0]
- # Helper function for .sub()
- def convert(mo):
- # Check the most common path first.
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- return str(mapping.get(named,''))
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- self._invalid(mo)
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
- return self.pattern.sub(convert, self.template)
-
- def strict_substitute(*args, **kws):
- if not args:
- raise TypeError("descriptor 'substitute' of 'Template' object "
- "needs an argument")
- self, *args = args # allow the "self" keyword be passed
- if len(args) > 1:
- raise TypeError('Too many positional arguments')
- if not args:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, args[0])
- else:
- mapping = args[0]
- # Helper function for .sub()
- def convert(mo):
- # Check the most common path first.
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- return str(mapping[named])
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- self._invalid(mo)
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
- return self.pattern.sub(convert, self.template)
-
- def safe_substitute(*args, **kws):
- if not args:
- raise TypeError("descriptor 'safe_substitute' of 'Template' object "
- "needs an argument")
- self, *args = args # allow the "self" keyword be passed
- if len(args) > 1:
- raise TypeError('Too many positional arguments')
- if not args:
- mapping = kws
- elif kws:
- mapping = _ChainMap(kws, args[0])
- else:
- mapping = args[0]
- # Helper function for .sub()
- def convert(mo):
- named = mo.group('named') or mo.group('braced')
- if named is not None:
- try:
- return str(mapping[named])
- except KeyError:
- return mo.group()
- if mo.group('escaped') is not None:
- return self.delimiter
- if mo.group('invalid') is not None:
- return mo.group()
- raise ValueError('Unrecognized named group in pattern',
- self.pattern)
+
+import re as _re
+from collections import ChainMap as _ChainMap
+
+class _TemplateMetaclass(type):
+ pattern = r"""
+ %(delim)s(?:
+ (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
+ (?P<named>%(id)s) | # delimiter and a Python identifier
+ {(?P<braced>%(id)s)} | # delimiter and a braced identifier
+ (?P<invalid>) # Other ill-formed delimiter exprs
+ )
+ """
+
+ def __init__(cls, name, bases, dct):
+ super(_TemplateMetaclass, cls).__init__(name, bases, dct)
+ if 'pattern' in dct:
+ pattern = cls.pattern
+ else:
+ pattern = _TemplateMetaclass.pattern % {
+ 'delim' : _re.escape(cls.delimiter),
+ 'id' : cls.idpattern,
+ }
+ cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE)
+
+
+class Template(metaclass=_TemplateMetaclass):
+ """A string class for supporting $-substitutions."""
+
+ delimiter = '$'
+ idpattern = r'[_a-z][_a-z0-9]*'
+ flags = _re.IGNORECASE
+
+ def __init__(self, template):
+ self.template = template
+
+ # Search for $$, $identifier, ${identifier}, and any bare $'s
+
+ def _invalid(self, mo):
+ i = mo.start('invalid')
+ lines = self.template[:i].splitlines(keepends=True)
+ if not lines:
+ colno = 1
+ lineno = 1
+ else:
+ colno = i - len(''.join(lines[:-1]))
+ lineno = len(lines)
+ raise ValueError('Invalid placeholder in string: line %d, col %d' %
+ (lineno, colno))
+
+ def substitute(*args, **kws):
+ if not args:
+ raise TypeError("descriptor 'substitute' of 'Template' object "
+ "needs an argument")
+ self, *args = args # allow the "self" keyword be passed
+ if len(args) > 1:
+ raise TypeError('Too many positional arguments')
+ if not args:
+ mapping = kws
+ elif kws:
+ mapping = _ChainMap(kws, args[0])
+ else:
+ mapping = args[0]
+ # Helper function for .sub()
+ def convert(mo):
+ # Check the most common path first.
+ named = mo.group('named') or mo.group('braced')
+ if named is not None:
+ return str(mapping.get(named,''))
+ if mo.group('escaped') is not None:
+ return self.delimiter
+ if mo.group('invalid') is not None:
+ self._invalid(mo)
+ raise ValueError('Unrecognized named group in pattern',
+ self.pattern)
+ return self.pattern.sub(convert, self.template)
+
+ def strict_substitute(*args, **kws):
+ if not args:
+ raise TypeError("descriptor 'substitute' of 'Template' object "
+ "needs an argument")
+ self, *args = args # allow the "self" keyword be passed
+ if len(args) > 1:
+ raise TypeError('Too many positional arguments')
+ if not args:
+ mapping = kws
+ elif kws:
+ mapping = _ChainMap(kws, args[0])
+ else:
+ mapping = args[0]
+ # Helper function for .sub()
+ def convert(mo):
+ # Check the most common path first.
+ named = mo.group('named') or mo.group('braced')
+ if named is not None:
+ return str(mapping[named])
+ if mo.group('escaped') is not None:
+ return self.delimiter
+ if mo.group('invalid') is not None:
+ self._invalid(mo)
+ raise ValueError('Unrecognized named group in pattern',
+ self.pattern)
+ return self.pattern.sub(convert, self.template)
+
+ def safe_substitute(*args, **kws):
+ if not args:
+ raise TypeError("descriptor 'safe_substitute' of 'Template' object "
+ "needs an argument")
+ self, *args = args # allow the "self" keyword be passed
+ if len(args) > 1:
+ raise TypeError('Too many positional arguments')
+ if not args:
+ mapping = kws
+ elif kws:
+ mapping = _ChainMap(kws, args[0])
+ else:
+ mapping = args[0]
+ # Helper function for .sub()
+ def convert(mo):
+ named = mo.group('named') or mo.group('braced')
+ if named is not None:
+ try:
+ return str(mapping[named])
+ except KeyError:
+ return mo.group()
+ if mo.group('escaped') is not None:
+ return self.delimiter
+ if mo.group('invalid') is not None:
+ return mo.group()
+ raise ValueError('Unrecognized named group in pattern',
+ self.pattern)
return self.pattern.sub(convert, self.template) \ No newline at end of file
diff --git a/youtube/watch.py b/youtube/watch.py
index b8aa17d..6e1efbc 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -1,294 +1,294 @@
-from youtube_dl.YoutubeDL import YoutubeDL
-import json
-import urllib
-from string import Template
-import html
-import youtube.common as common
-from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN
-import youtube.comments as comments
-import gevent
-
-video_height_priority = (360, 480, 240, 720, 1080)
-
-
-_formats = {
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-}
-
-
-
-
-source_tag_template = Template('''
-<source src="$src" type="$type">''')
-
-with open("yt_watch_template.html", "r") as file:
- yt_watch_template = Template(file.read())
-
-
-
-# example:
-#https://www.youtube.com/related_ajax?ctoken=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&continuation=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&itct=CCkQybcCIhMIg8PShInX2gIVgdvBCh15WA0ZKPgd
-def get_bloated_more_related_videos(video_url, related_videos_token, id_token):
- related_videos_token = urllib.parse.quote(related_videos_token)
- url = "https://www.youtube.com/related_ajax?ctoken=" + related_videos_token + "&continuation=" + related_videos_token
- headers = {
- 'Host': 'www.youtube.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
- 'Accept': '*/*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'Referer': video_url,
- 'X-YouTube-Client-Name': '1',
- 'X-YouTube-Client-Version': '2.20180418',
- 'X-Youtube-Identity-Token': id_token,
-
- }
- #print(url)
- req = urllib.request.Request(url, headers=headers)
- response = urllib.request.urlopen(req, timeout = 5)
- content = response.read()
- info = json.loads(content)
- return info
-
-def get_more_related_videos_info(video_url, related_videos_token, id_token):
- results = []
- info = get_bloated_more_related_videos(video_url, related_videos_token, id_token)
- bloated_results = info[1]['response']['continuationContents']['watchNextSecondaryResultsContinuation']['results']
- for bloated_result in bloated_results:
- bloated_result = bloated_result['compactVideoRenderer']
- results.append({
- "title": bloated_result['title']['simpleText'],
- "video_id": bloated_result['videoId'],
- "views_text": bloated_result['viewCountText']['simpleText'],
- "length_text": default_multi_get(bloated_result, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "length_text": bloated_result['lengthText']['simpleText'],
- "uploader_name": bloated_result['longBylineText']['runs'][0]['text'],
- "uploader_url": bloated_result['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- })
- return results
-
-def more_related_videos_html(video_info):
- related_videos = get_related_videos(url, 1, video_info['related_videos_token'], video_info['id_token'])
-
- related_videos_html = ""
- for video in related_videos:
- related_videos_html += Template(video_related_template).substitute(
- video_title=html.escape(video["title"]),
- views=video["views_text"],
- uploader=html.escape(video["uploader_name"]),
- uploader_channel_url=video["uploader_url"],
- length=video["length_text"],
- video_url = "/youtube.com/watch?v=" + video["video_id"],
- thumbnail_url= get_thumbnail_url(video['video_id']),
- )
- return related_videos_html
-
-
-
-def get_related_items_html(info):
- result = ""
- for item in info['related_vids']:
- if 'list' in item: # playlist:
- result += common.small_playlist_item_html(watch_page_related_playlist_info(item))
- else:
- result += common.small_video_item_html(watch_page_related_video_info(item))
- return result
-
-
-# json of related items retrieved directly from the watch page has different names for everything
-# converts these to standard names
-def watch_page_related_video_info(item):
- result = {key: item[key] for key in ('id', 'title', 'author')}
- result['duration'] = common.seconds_to_timestamp(item['length_seconds'])
- try:
- result['views'] = item['short_view_count_text']
- except KeyError:
- result['views'] = ''
- return result
-
-def watch_page_related_playlist_info(item):
- return {
- 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+",
- 'title': item['playlist_title'],
- 'id': item['list'],
- 'first_video_id': item['video_id'],
- }
-
-
-def sort_formats(info):
- info['formats'].sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0))
- for index, format in enumerate(info['formats']):
- if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
- break
- info['formats'] = info['formats'][index:] + info['formats'][0:index]
- info['formats'] = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
-
-def formats_html(info):
- result = ''
- for format in info['formats']:
- result += source_tag_template.substitute(
- src=format['url'],
- type='audio/' + format['ext'] if format['vcodec'] == "none" else 'video/' + format['ext'],
- )
- return result
-
-def choose_format(info):
- suitable_formats = []
- with open('teste.txt', 'w', encoding='utf-8') as f:
- f.write(json.dumps(info['formats']))
- for format in info['formats']:
- if (format["ext"] in ("mp4", "webm")
- and format["acodec"] != "none"
- and format["vcodec"] != "none"
- and format.get("height","none") in video_height_priority):
- suitable_formats.append(format)
-
- current_best = (suitable_formats[0],video_height_priority.index(suitable_formats[0]["height"]))
- for format in suitable_formats:
- video_priority_index = video_height_priority.index(format["height"])
- if video_priority_index < current_best[1]:
- current_best = (format, video_priority_index)
- return current_best[0]
-
-more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''')
-def get_watch_page(query_string):
- id = urllib.parse.parse_qs(query_string)['v'][0]
- tasks = (
- gevent.spawn(comments.video_comments, id ),
- gevent.spawn(YoutubeDL(params={'youtube_include_dash_manifest':False}).extract_info, "https://www.youtube.com/watch?v=" + id, download=False)
- )
- gevent.joinall(tasks)
- comments_info, info = tasks[0].value, tasks[1].value
- comments_html, ctoken = comments_info
-
- if ctoken == '':
- more_comments_button = ''
- else:
- more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
- #comments_html = comments.comments_html(video_id(url))
- #info = YoutubeDL().extract_info(url, download=False)
-
- #chosen_format = choose_format(info)
- sort_formats(info)
-
-
-
- upload_year = info["upload_date"][0:4]
- upload_month = info["upload_date"][4:6]
- upload_day = info["upload_date"][6:8]
- upload_date = upload_month + "/" + upload_day + "/" + upload_year
-
- related_videos_html = get_related_items_html(info)
-
- page = yt_watch_template.substitute(
- video_title=html.escape(info["title"]),
- page_title=html.escape(info["title"]),
- uploader=html.escape(info["uploader"]),
- uploader_channel_url='/' + info["uploader_url"],
- #upload_date=datetime.datetime.fromtimestamp(info["timestamp"]).strftime("%d %b %Y %H:%M:%S"),
- upload_date = upload_date,
- views='{:,}'.format(info["view_count"]),
- likes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["like_count"]),
- dislikes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["dislike_count"]),
- description=html.escape(info["description"]),
- video_sources=formats_html(info),
- related = related_videos_html,
- comments=comments_html,
- more_comments_button = more_comments_button,
- )
+from youtube_dl.YoutubeDL import YoutubeDL
+import json
+import urllib
+from string import Template
+import html
+import youtube.common as common
+from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN
+import youtube.comments as comments
+import gevent
+
+video_height_priority = (360, 480, 240, 720, 1080)
+
+
+_formats = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
+
+ # RTMP (unnamed)
+ '_rtmp': {'protocol': 'rtmp'},
+}
+
+
+
+
+source_tag_template = Template('''
+<source src="$src" type="$type">''')
+
+with open("yt_watch_template.html", "r") as file:
+ yt_watch_template = Template(file.read())
+
+
+
+# example:
+#https://www.youtube.com/related_ajax?ctoken=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&continuation=CBQSJhILVGNxV29rOEF1YkXAAQDIAQDgAQGiAg0o____________AUAAGAAq0gEInJOqsOyB1tAaCNeMgaD4spLIKQioxdHSu8SF9JgBCLr27tnaioDpXwj1-L_R3s7r2wcIv8TnueeUo908CMXSganIrvHDJgiVuMirrqbgqYABCJDsu8PBzdGW8wEI_-WI2t-c-IlQCOK_m_KB_rP5wAEIl7S4serqnq5YCNSs55mMt8qLyQEImvutmp-x9LaCAQiVg96VpY_pqJMBCOPsgdTflsGRsQEI7ZfYleKIub0tCIrcsb7a_uu95gEIi9Gz6_bC76zEAQjo1c_W8JzlkhI%3D&itct=CCkQybcCIhMIg8PShInX2gIVgdvBCh15WA0ZKPgd
+def get_bloated_more_related_videos(video_url, related_videos_token, id_token):
+ related_videos_token = urllib.parse.quote(related_videos_token)
+ url = "https://www.youtube.com/related_ajax?ctoken=" + related_videos_token + "&continuation=" + related_videos_token
+ headers = {
+ 'Host': 'www.youtube.com',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
+ 'Accept': '*/*',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'Referer': video_url,
+ 'X-YouTube-Client-Name': '1',
+ 'X-YouTube-Client-Version': '2.20180418',
+ 'X-Youtube-Identity-Token': id_token,
+
+ }
+ #print(url)
+ req = urllib.request.Request(url, headers=headers)
+ response = urllib.request.urlopen(req, timeout = 5)
+ content = response.read()
+ info = json.loads(content)
+ return info
+
+def get_more_related_videos_info(video_url, related_videos_token, id_token):
+ results = []
+ info = get_bloated_more_related_videos(video_url, related_videos_token, id_token)
+ bloated_results = info[1]['response']['continuationContents']['watchNextSecondaryResultsContinuation']['results']
+ for bloated_result in bloated_results:
+ bloated_result = bloated_result['compactVideoRenderer']
+ results.append({
+ "title": bloated_result['title']['simpleText'],
+ "video_id": bloated_result['videoId'],
+ "views_text": bloated_result['viewCountText']['simpleText'],
+ "length_text": default_multi_get(bloated_result, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
+ "length_text": bloated_result['lengthText']['simpleText'],
+ "uploader_name": bloated_result['longBylineText']['runs'][0]['text'],
+ "uploader_url": bloated_result['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
+ })
+ return results
+
+def more_related_videos_html(video_info):
+ related_videos = get_related_videos(url, 1, video_info['related_videos_token'], video_info['id_token'])
+
+ related_videos_html = ""
+ for video in related_videos:
+ related_videos_html += Template(video_related_template).substitute(
+ video_title=html.escape(video["title"]),
+ views=video["views_text"],
+ uploader=html.escape(video["uploader_name"]),
+ uploader_channel_url=video["uploader_url"],
+ length=video["length_text"],
+ video_url = "/youtube.com/watch?v=" + video["video_id"],
+ thumbnail_url= get_thumbnail_url(video['video_id']),
+ )
+ return related_videos_html
+
+
+
+def get_related_items_html(info):
+ result = ""
+ for item in info['related_vids']:
+ if 'list' in item: # playlist:
+ result += common.small_playlist_item_html(watch_page_related_playlist_info(item))
+ else:
+ result += common.small_video_item_html(watch_page_related_video_info(item))
+ return result
+
+
+# json of related items retrieved directly from the watch page has different names for everything
+# converts these to standard names
+def watch_page_related_video_info(item):
+ result = {key: item[key] for key in ('id', 'title', 'author')}
+ result['duration'] = common.seconds_to_timestamp(item['length_seconds'])
+ try:
+ result['views'] = item['short_view_count_text']
+ except KeyError:
+ result['views'] = ''
+ return result
+
+def watch_page_related_playlist_info(item):
+ return {
+ 'size': item['playlist_length'] if item['playlist_length'] != "0" else "50+",
+ 'title': item['playlist_title'],
+ 'id': item['list'],
+ 'first_video_id': item['video_id'],
+ }
+
+
+def sort_formats(info):
+ info['formats'].sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0))
+ for index, format in enumerate(info['formats']):
+ if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
+ break
+ info['formats'] = info['formats'][index:] + info['formats'][0:index]
+ info['formats'] = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
+
+def formats_html(info):
+ result = ''
+ for format in info['formats']:
+ result += source_tag_template.substitute(
+ src=format['url'],
+ type='audio/' + format['ext'] if format['vcodec'] == "none" else 'video/' + format['ext'],
+ )
+ return result
+
+def choose_format(info):
+ suitable_formats = []
+ with open('teste.txt', 'w', encoding='utf-8') as f:
+ f.write(json.dumps(info['formats']))
+ for format in info['formats']:
+ if (format["ext"] in ("mp4", "webm")
+ and format["acodec"] != "none"
+ and format["vcodec"] != "none"
+ and format.get("height","none") in video_height_priority):
+ suitable_formats.append(format)
+
+ current_best = (suitable_formats[0],video_height_priority.index(suitable_formats[0]["height"]))
+ for format in suitable_formats:
+ video_priority_index = video_height_priority.index(format["height"])
+ if video_priority_index < current_best[1]:
+ current_best = (format, video_priority_index)
+ return current_best[0]
+
+more_comments_template = Template('''<a class="page-button more-comments" href="$url">More comments</a>''')
+def get_watch_page(query_string):
+ id = urllib.parse.parse_qs(query_string)['v'][0]
+ tasks = (
+ gevent.spawn(comments.video_comments, id ),
+ gevent.spawn(YoutubeDL(params={'youtube_include_dash_manifest':False}).extract_info, "https://www.youtube.com/watch?v=" + id, download=False)
+ )
+ gevent.joinall(tasks)
+ comments_info, info = tasks[0].value, tasks[1].value
+ comments_html, ctoken = comments_info
+
+ if ctoken == '':
+ more_comments_button = ''
+ else:
+ more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
+ #comments_html = comments.comments_html(video_id(url))
+ #info = YoutubeDL().extract_info(url, download=False)
+
+ #chosen_format = choose_format(info)
+ sort_formats(info)
+
+
+
+ upload_year = info["upload_date"][0:4]
+ upload_month = info["upload_date"][4:6]
+ upload_day = info["upload_date"][6:8]
+ upload_date = upload_month + "/" + upload_day + "/" + upload_year
+
+ related_videos_html = get_related_items_html(info)
+
+ page = yt_watch_template.substitute(
+ video_title=html.escape(info["title"]),
+ page_title=html.escape(info["title"]),
+ uploader=html.escape(info["uploader"]),
+ uploader_channel_url='/' + info["uploader_url"],
+ #upload_date=datetime.datetime.fromtimestamp(info["timestamp"]).strftime("%d %b %Y %H:%M:%S"),
+ upload_date = upload_date,
+ views='{:,}'.format(info["view_count"]),
+ likes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["like_count"]),
+ dislikes=(lambda x: '{:,}'.format(x) if x is not None else "")(info["dislike_count"]),
+ description=html.escape(info["description"]),
+ video_sources=formats_html(info),
+ related = related_videos_html,
+ comments=comments_html,
+ more_comments_button = more_comments_button,
+ )
return page \ No newline at end of file
diff --git a/youtube/watch_later.py b/youtube/watch_later.py
index 126fb6e..4bb421c 100644
--- a/youtube/watch_later.py
+++ b/youtube/watch_later.py
@@ -1,11 +1,11 @@
-import os.path
-import json
-watch_later_file = os.path.normpath("youtube/watch_later.txt")
-def add_to_watch_later(video_info_list):
- with open(watch_later_file, "a", encoding='utf-8') as file:
- for info in video_info_list:
- file.write(info + "\n")
-
-
-def get_watch_later_page():
+import os.path
+import json
+watch_later_file = os.path.normpath("youtube/watch_later.txt")
+def add_to_watch_later(video_info_list):
+ with open(watch_later_file, "a", encoding='utf-8') as file:
+ for info in video_info_list:
+ file.write(info + "\n")
+
+
+def get_watch_later_page():
pass \ No newline at end of file
diff --git a/youtube/youtube.py b/youtube/youtube.py
index 7ec75c0..a7cc204 100644
--- a/youtube/youtube.py
+++ b/youtube/youtube.py
@@ -1,60 +1,60 @@
-import mimetypes
-import urllib.parse
-from youtube import watch_later, watch, search, playlist, channel, comments
-YOUTUBE_FILES = (
- "/shared.css",
- "/opensearch.xml",
- '/comments.css',
-)
-
-def youtube(env, start_response):
- path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING']
- if method == "GET":
- if path in YOUTUBE_FILES:
- with open("youtube" + path, 'rb') as f:
- mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
- start_response('200 OK', (('Content-type',mime_type),) )
- return f.read()
-
- elif path == "/comments":
- start_response('200 OK', (('Content-type','text/html'),) )
- return comments.get_comments_page(query_string).encode()
-
- elif path == "/watch":
- start_response('200 OK', (('Content-type','text/html'),) )
- return watch.get_watch_page(query_string).encode()
-
- elif path == "/search":
- start_response('200 OK', (('Content-type','text/html'),) )
- return search.get_search_page(query_string).encode()
-
- elif path == "/playlist":
- start_response('200 OK', (('Content-type','text/html'),) )
- return playlist.get_playlist_page(query_string).encode()
-
- elif path.startswith("/channel/"):
- start_response('200 OK', (('Content-type','text/html'),) )
- return channel.get_channel_page(path[9:], query_string=query_string).encode()
-
- elif path.startswith("/user/"):
- start_response('200 OK', (('Content-type','text/html'),) )
- return channel.get_user_page(path[6:], query_string=query_string).encode()
-
- else:
- start_response('404 Not Found', () )
- return b'404 Not Found'
-
- elif method == "POST":
- if path == "/edit_playlist":
- fields = urllib.parse.parse_qs(env['wsgi.input'].read().decode())
- if fields['action'][0] == 'add' and fields['playlist_name'][0] == 'watch_later':
- watch_later.add_to_watch_later(fields['video_info_list'])
-
- start_response('204 No Content', ())
- else:
- start_response('404 Not Found', ())
- return b'404 Not Found'
-
- else:
- start_response('501 Not Implemented', ())
+import mimetypes
+import urllib.parse
+from youtube import watch_later, watch, search, playlist, channel, comments
+YOUTUBE_FILES = (
+ "/shared.css",
+ "/opensearch.xml",
+ '/comments.css',
+)
+
+def youtube(env, start_response):
+ path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING']
+ if method == "GET":
+ if path in YOUTUBE_FILES:
+ with open("youtube" + path, 'rb') as f:
+ mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
+ start_response('200 OK', (('Content-type',mime_type),) )
+ return f.read()
+
+ elif path == "/comments":
+ start_response('200 OK', (('Content-type','text/html'),) )
+ return comments.get_comments_page(query_string).encode()
+
+ elif path == "/watch":
+ start_response('200 OK', (('Content-type','text/html'),) )
+ return watch.get_watch_page(query_string).encode()
+
+ elif path == "/search":
+ start_response('200 OK', (('Content-type','text/html'),) )
+ return search.get_search_page(query_string).encode()
+
+ elif path == "/playlist":
+ start_response('200 OK', (('Content-type','text/html'),) )
+ return playlist.get_playlist_page(query_string).encode()
+
+ elif path.startswith("/channel/"):
+ start_response('200 OK', (('Content-type','text/html'),) )
+ return channel.get_channel_page(path[9:], query_string=query_string).encode()
+
+ elif path.startswith("/user/"):
+ start_response('200 OK', (('Content-type','text/html'),) )
+ return channel.get_user_page(path[6:], query_string=query_string).encode()
+
+ else:
+ start_response('404 Not Found', () )
+ return b'404 Not Found'
+
+ elif method == "POST":
+ if path == "/edit_playlist":
+ fields = urllib.parse.parse_qs(env['wsgi.input'].read().decode())
+ if fields['action'][0] == 'add' and fields['playlist_name'][0] == 'watch_later':
+ watch_later.add_to_watch_later(fields['video_info_list'])
+
+ start_response('204 No Content', ())
+ else:
+ start_response('404 Not Found', ())
+ return b'404 Not Found'
+
+ else:
+ start_response('501 Not Implemented', ())
return b'501 Not Implemented' \ No newline at end of file