diff options
Diffstat (limited to 'youtube/common.py')
-rw-r--r-- | youtube/common.py | 1278 |
1 files changed, 639 insertions, 639 deletions
diff --git a/youtube/common.py b/youtube/common.py index 67bd81f..3133fed 100644 --- a/youtube/common.py +++ b/youtube/common.py @@ -1,639 +1,639 @@ -from youtube.template import Template
-import html
-import json
-import re
-import urllib.parse
-import gzip
-import brotli
-import time
-
-
-URL_ORIGIN = "/https://www.youtube.com"
-
-
-# videos (all of type str):
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# published
-# duration
-# likes
-# dislikes
-# views
-# playlist_index
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-
-
-
-
-
-
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="current-page-button">$page</a>''')
-
-medium_playlist_item_template = Template('''
- <div class="medium-item">
- <a class="playlist-thumbnail-box" href="$url" title="$title">
- <img class="playlist-thumbnail-img" src="$thumbnail">
- <div class="playlist-thumbnail-info">
- <span>$size</span>
- </div>
- </a>
-
- <a class="title" href="$url" title=$title>$title</a>
-
- <address><a href="$author_url">$author</a></address>
- </div>
-''')
-medium_video_item_template = Template('''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
-
- <a class="title" href="$url">$title</a>
-
- <div class="stats">$stats</div>
- <!--
- <address><a href="$author_url">$author</a></address>
- <span class="views">$views</span>
- <time datetime="$datetime">Uploaded $published</time>-->
-
- <span class="description">$description</span>
- <span class="badges">$badges</span>
- </div>
-''')
-
-small_video_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <address>$author</address>
- <span class="views">$views</span>
-
- </div>
- <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add">
- </div>
-''')
-
-small_playlist_item_template = Template('''
- <div class="small-item-box">
- <div class="small-item">
- <a class="playlist-thumbnail-box" href="$url" title="$title">
- <img class="playlist-thumbnail-img" src="$thumbnail">
- <div class="playlist-thumbnail-info">
- <span>$size</span>
- </div>
- </a>
- <a class="title" href="$url" title="$title">$title</a>
-
- <address>$author</address>
- </div>
- </div>
-''')
-
-medium_channel_item_template = Template('''
- <div class="medium-item">
- <a class="video-thumbnail-box" href="$url" title="$title">
- <img class="video-thumbnail-img" src="$thumbnail">
- <span class="video-duration">$duration</span>
- </a>
-
- <a class="title" href="$url">$title</a>
-
- <span>$subscriber_count</span>
- <span>$size</span>
-
- <span class="description">$description</span>
- </div>
-''')
-
-
-def fetch_url(url, headers=(), timeout=5, report_text=None):
- if isinstance(headers, list):
- headers += [('Accept-Encoding', 'gzip, br')]
- headers = dict(headers)
- elif isinstance(headers, tuple):
- headers += (('Accept-Encoding', 'gzip, br'),)
- headers = dict(headers)
- else:
- headers = headers.copy()
- headers['Accept-Encoding'] = 'gzip, br'
-
- start_time = time.time()
-
- req = urllib.request.Request(url, headers=headers)
- response = urllib.request.urlopen(req, timeout=timeout)
- response_time = time.time()
-
- content = response.read()
- read_finish = time.time()
- if report_text:
- print(report_text, 'Latency:', response_time - start_time, ' Read time:', read_finish - response_time)
- encodings = response.getheader('Content-Encoding', default='identity').replace(' ', '').split(',')
- for encoding in reversed(encodings):
- if encoding == 'identity':
- continue
- if encoding == 'br':
- content = brotli.decompress(content)
- elif encoding == 'gzip':
- content = gzip.decompress(content)
- return content
-
-mobile_ua = (('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'),)
-
-def dict_add(*dicts):
- for dictionary in dicts[1:]:
- dicts[0].update(dictionary)
- return dicts[0]
-
-def video_id(url):
- url_parts = urllib.parse.urlparse(url)
- return urllib.parse.parse_qs(url_parts.query)['v'][0]
-
-def uppercase_escape(s):
- return re.sub(
- r'\\U([0-9a-fA-F]{8})',
- lambda m: chr(int(m.group(1), base=16)), s)
-
-def default_multi_get(object, *keys, default):
- ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
- try:
- for key in keys:
- object = object[key]
- return object
- except (IndexError, KeyError):
- return default
-
-def get_plain_text(node):
- try:
- return html.escape(node['simpleText'])
- except KeyError:
- return unformmated_text_runs(node['runs'])
-
-def unformmated_text_runs(runs):
- result = ''
- for text_run in runs:
- result += html.escape(text_run["text"])
- return result
-
-def format_text_runs(runs):
- if isinstance(runs, str):
- return runs
- result = ''
- for text_run in runs:
- if text_run.get("bold", False):
- result += "<b>" + html.escape(text_run["text"]) + "</b>"
- elif text_run.get('italics', False):
- result += "<i>" + html.escape(text_run["text"]) + "</i>"
- else:
- result += html.escape(text_run["text"])
- return result
-
-# default, sddefault, mqdefault, hqdefault, hq720
-def get_thumbnail_url(video_id):
- return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
-
-def seconds_to_timestamp(seconds):
- seconds = int(seconds)
- hours, seconds = divmod(seconds,3600)
- minutes, seconds = divmod(seconds,60)
- if hours != 0:
- timestamp = str(hours) + ":"
- timestamp += str(minutes).zfill(2) # zfill pads with zeros
- else:
- timestamp = str(minutes)
-
- timestamp += ":" + str(seconds).zfill(2)
- return timestamp
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-def medium_playlist_item_info(playlist_renderer):
- renderer = playlist_renderer
- try:
- author_url = URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- except KeyError: # radioRenderer
- author_url = ''
- try:
- thumbnail = renderer['thumbnails'][0]['thumbnails'][0]['url']
- except KeyError:
- thumbnail = renderer['thumbnail']['thumbnails'][0]['url']
- return {
- "title": renderer["title"]["simpleText"],
- 'id': renderer["playlistId"],
- 'size': renderer.get('videoCount', '50+'),
- "author": default_multi_get(renderer,'longBylineText','runs',0,'text', default='Youtube'),
- "author_url": author_url,
- 'thumbnail': thumbnail,
- }
-
-def medium_video_item_info(video_renderer):
- renderer = video_renderer
- try:
- return {
- "title": renderer["title"]["simpleText"],
- "id": renderer["videoId"],
- "description": renderer.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text
- "thumbnail": get_thumbnail_url(renderer["videoId"]),
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "author": renderer['longBylineText']['runs'][0]['text'],
- "author_url": URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''),
- }
- except KeyError:
- print(renderer)
- raise
-
-def small_video_item_info(compact_video_renderer):
- renderer = compact_video_renderer
- return {
- "title": renderer['title']['simpleText'],
- "id": renderer['videoId'],
- "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'],
- "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length
- "author": renderer['longBylineText']['runs'][0]['text'],
- "author_url": renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
- }
-
-
-# -----
-# HTML
-# -----
-
-def small_video_item_html(item):
- video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
- return small_video_item_template.substitute(
- title = html.escape(item["title"]),
- views = item["views"],
- author = html.escape(item["author"]),
- duration = item["duration"],
- url = URL_ORIGIN + "/watch?v=" + item["id"],
- thumbnail = get_thumbnail_url(item['id']),
- video_info = html.escape(json.dumps(video_info)),
- )
-
-def small_playlist_item_html(item):
- return small_playlist_item_template.substitute(
- title=html.escape(item["title"]),
- size = item['size'],
- author="",
- url = URL_ORIGIN + "/playlist?list=" + item["id"],
- thumbnail= get_thumbnail_url(item['first_video_id']),
- )
-
-def medium_playlist_item_html(item):
- return medium_playlist_item_template.substitute(
- title=html.escape(item["title"]),
- size = item['size'],
- author=item['author'],
- author_url= URL_ORIGIN + item['author_url'],
- url = URL_ORIGIN + "/playlist?list=" + item["id"],
- thumbnail= item['thumbnail'],
- )
-
-def medium_video_item_html(medium_video_info):
- info = medium_video_info
-
- return medium_video_item_template.substitute(
- title=html.escape(info["title"]),
- views=info["views"],
- published = info["published"],
- description = format_text_runs(info["description"]),
- author=html.escape(info["author"]),
- author_url=info["author_url"],
- duration=info["duration"],
- url = URL_ORIGIN + "/watch?v=" + info["id"],
- thumbnail=info['thumbnail'],
- datetime='', # TODO
- )
-
-html_functions = {
- 'compactVideoRenderer': lambda x: small_video_item_html(small_video_item_info(x)),
- 'videoRenderer': lambda x: medium_video_item_html(medium_video_item_info(x)),
- 'compactPlaylistRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
- 'playlistRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
- 'channelRenderer': lambda x: '',
- 'radioRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)),
- 'compactRadioRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)),
- 'didYouMeanRenderer': lambda x: '',
-}
-
-
-
-
-
-
-
-def get_url(node):
- try:
- return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
- except KeyError:
- return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
-
-
-def get_text(node):
- try:
- return node['simpleText']
- except KeyError:
- return node['runs'][0]['text']
-
-def get_formatted_text(node):
- try:
- return node['runs']
- except KeyError:
- return node['simpleText']
-
-def get_badges(node):
- badges = []
- for badge_node in node:
- badge = badge_node['metadataBadgeRenderer']['label']
- if badge.lower() != 'new':
- badges.append(badge)
- return badges
-
-def get_thumbnail(node):
- try:
- return node['thumbnails'][0]['url'] # polymer format
- except KeyError:
- return node['url'] # ajax format
-
-dispatch = {
-
-# polymer format
- 'title': ('title', get_text),
- 'publishedTimeText': ('published', get_text),
- 'videoId': ('id', lambda node: node),
- 'descriptionSnippet': ('description', get_formatted_text),
- 'lengthText': ('duration', get_text),
- 'thumbnail': ('thumbnail', get_thumbnail),
- 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']),
-
- 'videoCountText': ('size', get_text),
- 'playlistId': ('id', lambda node: node),
-
- 'subscriberCountText': ('subscriber_count', get_text),
- 'channelId': ('id', lambda node: node),
- 'badges': ('badges', get_badges),
-
-# ajax format
- 'view_count_text': ('views', get_text),
- 'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]),
- 'owner_text': ('author', get_text),
- 'owner_endpoint': ('author_url', lambda node: node['url']),
- 'description': ('description', get_formatted_text),
- 'index': ('playlist_index', get_text),
- 'short_byline': ('author', get_text),
- 'length': ('duration', get_text),
- 'video_id': ('id', lambda node: node),
-
-}
-
-def renderer_info(renderer):
- try:
- info = {}
- if 'viewCountText' in renderer: # prefer this one as it contains all the digits
- info['views'] = get_text(renderer['viewCountText'])
- elif 'shortViewCountText' in renderer:
- info['views'] = get_text(renderer['shortViewCountText'])
-
- for key, node in renderer.items():
- if key in ('longBylineText', 'shortBylineText'):
- info['author'] = get_text(node)
- try:
- info['author_url'] = get_url(node)
- except KeyError:
- pass
-
- continue
-
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(renderer)
- raise
-
-def ajax_info(item_json):
- try:
- info = {}
- for key, node in item_json.items():
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(item_json)
- raise
-
-def badges_html(badges):
- return ' | '.join(map(html.escape, badges))
-
-
-
-
-
-html_transform_dispatch = {
- 'title': html.escape,
- 'published': html.escape,
- 'id': html.escape,
- 'description': format_text_runs,
- 'duration': html.escape,
- 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')),
- 'size': html.escape,
- 'author': html.escape,
- 'author_url': lambda url: html.escape(URL_ORIGIN + url),
- 'views': html.escape,
- 'subscriber_count': html.escape,
- 'badges': badges_html,
- 'playlist_index': html.escape,
-}
-
-def get_html_ready(item):
- html_ready = {}
- for key, value in item.items():
- try:
- function = html_transform_dispatch[key]
- except KeyError:
- continue
- html_ready[key] = function(value)
- return html_ready
-
-
-author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''')
-author_template = Template('''<address>By $author</address>''')
-stat_templates = (
- Template('''<span class="views">$views</span>'''),
- Template('''<time datetime="$datetime">$published</time>'''),
-)
-def get_video_stats(html_ready):
- stats = []
- if 'author' in html_ready:
- if 'author_url' in html_ready:
- stats.append(author_template_url.substitute(html_ready))
- else:
- stats.append(author_template.substitute(html_ready))
- for stat in stat_templates:
- try:
- stats.append(stat.strict_substitute(html_ready))
- except KeyError:
- pass
- return ' | '.join(stats)
-
-def video_item_html(item, template):
- html_ready = get_html_ready(item)
- video_info = {}
- for key in ('id', 'title', 'author'):
- try:
- video_info[key] = html_ready[key]
- except KeyError:
- video_info[key] = ''
- try:
- video_info['duration'] = html_ready['duration']
- except KeyError:
- video_info['duration'] = 'Live' # livestreams don't have a duration
-
- html_ready['video_info'] = html.escape(json.dumps(video_info) )
- html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
- html_ready['datetime'] = '' #TODO
-
- html_ready['stats'] = get_video_stats(html_ready)
-
- return template.substitute(html_ready)
-
-
-def playlist_item_html(item, template):
- html_ready = get_html_ready(item)
-
- html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
- html_ready['datetime'] = '' #TODO
- return template.substitute(html_ready)
-
-
-
-
-
-
-def make_query_string(query_string):
- return '&'.join(key + '=' + ','.join(values) for key,values in query_string.items())
-
-def update_query_string(query_string, items):
- parameters = urllib.parse.parse_qs(query_string)
- parameters.update(items)
- return make_query_string(parameters)
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-
-def page_buttons_html(current_page, estimated_pages, url, current_query_string):
- if current_page <= 5:
- page_start = 1
- page_end = min(9, estimated_pages)
- else:
- page_start = current_page - 4
- page_end = min(current_page + 4, estimated_pages)
-
- result = ""
- for page in range(page_start, page_end+1):
- if page == current_page:
- template = current_page_button_template
- else:
- template = page_button_template
- result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
- return result
-
-
-
-
-
-
-
-showing_results_for = Template('''
- <div class="showing-results-for">
- <div>Showing results for <a>$corrected_query</a></div>
- <div>Search instead for <a href="$original_query_url">$original_query</a></div>
- </div>
-''')
-
-did_you_mean = Template('''
- <div class="did-you-mean">
- <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
- </div>
-''')
-
-def renderer_html(renderer, additional_info={}, current_query_string=''):
- type = list(renderer.keys())[0]
- renderer = renderer[type]
- if type in ('videoRenderer', 'playlistRenderer', 'radioRenderer', 'compactVideoRenderer', 'compactPlaylistRenderer', 'compactRadioRenderer', 'gridVideoRenderer', 'gridPlaylistRenderer', 'gridRadioRenderer'):
- info = renderer_info(renderer)
- info.update(additional_info)
- if type == 'compactVideoRenderer':
- return video_item_html(info, small_video_item_template)
- if type in ('compactPlaylistRenderer', 'compactRadioRenderer'):
- return playlist_item_html(info, small_playlist_item_template)
- if type in ('videoRenderer', 'gridVideoRenderer'):
- return video_item_html(info, medium_video_item_template)
- if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer'):
- return playlist_item_html(info, medium_playlist_item_template)
-
- if type == 'channelRenderer':
- info = renderer_info(renderer)
- html_ready = get_html_ready(info)
- html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
- return medium_channel_item_template.substitute(html_ready)
-
- if type == 'movieRenderer':
- return ''
- print(renderer)
- raise NotImplementedError('Unknown renderer type: ' + type)
-
-
-'videoRenderer'
-'playlistRenderer'
-'channelRenderer'
-'radioRenderer'
-'gridVideoRenderer'
-'gridPlaylistRenderer'
-
-'didYouMeanRenderer'
-'showingResultsForRenderer'
+from youtube.template import Template +import html +import json +import re +import urllib.parse +import gzip +import brotli +import time + + +URL_ORIGIN = "/https://www.youtube.com" + + +# videos (all of type str): + +# id +# title +# url +# author +# author_url +# thumbnail +# description +# published +# duration +# likes +# dislikes +# views +# playlist_index + +# playlists: + +# id +# title +# url +# author +# author_url +# thumbnail +# description +# updated +# size +# first_video_id + + + + + + + +page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') +current_page_button_template = Template('''<div class="current-page-button">$page</a>''') + +medium_playlist_item_template = Template(''' + <div class="medium-item"> + <a class="playlist-thumbnail-box" href="$url" title="$title"> + <img class="playlist-thumbnail-img" src="$thumbnail"> + <div class="playlist-thumbnail-info"> + <span>$size</span> + </div> + </a> + + <a class="title" href="$url" title=$title>$title</a> + + <address><a href="$author_url">$author</a></address> + </div> +''') +medium_video_item_template = Template(''' + <div class="medium-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + + <a class="title" href="$url">$title</a> + + <div class="stats">$stats</div> + <!-- + <address><a href="$author_url">$author</a></address> + <span class="views">$views</span> + <time datetime="$datetime">Uploaded $published</time>--> + + <span class="description">$description</span> + <span class="badges">$badges</span> + </div> +''') + +small_video_item_template = Template(''' + <div class="small-item-box"> + <div class="small-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + <a class="title" href="$url" title="$title">$title</a> + + <address>$author</address> + <span class="views">$views</span> + + </div> + <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-add"> + </div> +''') + +small_playlist_item_template = Template(''' + <div class="small-item-box"> + <div class="small-item"> + <a class="playlist-thumbnail-box" href="$url" title="$title"> + <img class="playlist-thumbnail-img" src="$thumbnail"> + <div class="playlist-thumbnail-info"> + <span>$size</span> + </div> + </a> + <a class="title" href="$url" title="$title">$title</a> + + <address>$author</address> + </div> + </div> +''') + +medium_channel_item_template = Template(''' + <div class="medium-item"> + <a class="video-thumbnail-box" href="$url" title="$title"> + <img class="video-thumbnail-img" src="$thumbnail"> + <span class="video-duration">$duration</span> + </a> + + <a class="title" href="$url">$title</a> + + <span>$subscriber_count</span> + <span>$size</span> + + <span class="description">$description</span> + </div> +''') + + +def fetch_url(url, headers=(), timeout=5, report_text=None): + if isinstance(headers, list): + headers += [('Accept-Encoding', 'gzip, br')] + headers = dict(headers) + elif isinstance(headers, tuple): + headers += (('Accept-Encoding', 'gzip, br'),) + headers = dict(headers) + else: + headers = headers.copy() + headers['Accept-Encoding'] = 'gzip, br' + + start_time = time.time() + + req = urllib.request.Request(url, headers=headers) + response = urllib.request.urlopen(req, timeout=timeout) + response_time = time.time() + + content = response.read() + read_finish = time.time() + if report_text: + print(report_text, 'Latency:', response_time - start_time, ' Read time:', read_finish - response_time) + encodings = response.getheader('Content-Encoding', default='identity').replace(' ', '').split(',') + for encoding in reversed(encodings): + if encoding == 'identity': + continue + if encoding == 'br': + content = brotli.decompress(content) + elif encoding == 'gzip': + content = gzip.decompress(content) + return content + +mobile_ua = (('User-Agent', 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'),) + +def dict_add(*dicts): + for dictionary in dicts[1:]: + dicts[0].update(dictionary) + return dicts[0] + +def video_id(url): + url_parts = urllib.parse.urlparse(url) + return urllib.parse.parse_qs(url_parts.query)['v'][0] + +def uppercase_escape(s): + return re.sub( + r'\\U([0-9a-fA-F]{8})', + lambda m: chr(int(m.group(1), base=16)), s) + +def default_multi_get(object, *keys, default): + ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors ''' + try: + for key in keys: + object = object[key] + return object + except (IndexError, KeyError): + return default + +def get_plain_text(node): + try: + return html.escape(node['simpleText']) + except KeyError: + return unformmated_text_runs(node['runs']) + +def unformmated_text_runs(runs): + result = '' + for text_run in runs: + result += html.escape(text_run["text"]) + return result + +def format_text_runs(runs): + if isinstance(runs, str): + return runs + result = '' + for text_run in runs: + if text_run.get("bold", False): + result += "<b>" + html.escape(text_run["text"]) + "</b>" + elif text_run.get('italics', False): + result += "<i>" + html.escape(text_run["text"]) + "</i>" + else: + result += html.escape(text_run["text"]) + return result + +# default, sddefault, mqdefault, hqdefault, hq720 +def get_thumbnail_url(video_id): + return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + +def seconds_to_timestamp(seconds): + seconds = int(seconds) + hours, seconds = divmod(seconds,3600) + minutes, seconds = divmod(seconds,60) + if hours != 0: + timestamp = str(hours) + ":" + timestamp += str(minutes).zfill(2) # zfill pads with zeros + else: + timestamp = str(minutes) + + timestamp += ":" + str(seconds).zfill(2) + return timestamp + +# playlists: + +# id +# title +# url +# author +# author_url +# thumbnail +# description +# updated +# size +# first_video_id +def medium_playlist_item_info(playlist_renderer): + renderer = playlist_renderer + try: + author_url = URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + except KeyError: # radioRenderer + author_url = '' + try: + thumbnail = renderer['thumbnails'][0]['thumbnails'][0]['url'] + except KeyError: + thumbnail = renderer['thumbnail']['thumbnails'][0]['url'] + return { + "title": renderer["title"]["simpleText"], + 'id': renderer["playlistId"], + 'size': renderer.get('videoCount', '50+'), + "author": default_multi_get(renderer,'longBylineText','runs',0,'text', default='Youtube'), + "author_url": author_url, + 'thumbnail': thumbnail, + } + +def medium_video_item_info(video_renderer): + renderer = video_renderer + try: + return { + "title": renderer["title"]["simpleText"], + "id": renderer["videoId"], + "description": renderer.get("descriptionSnippet",dict()).get('runs',[]), # a list of text runs (formmated), rather than plain text + "thumbnail": get_thumbnail_url(renderer["videoId"]), + "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'], + "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "author": renderer['longBylineText']['runs'][0]['text'], + "author_url": URL_ORIGIN + renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + "published": default_multi_get(renderer, 'publishedTimeText', 'simpleText', default=''), + } + except KeyError: + print(renderer) + raise + +def small_video_item_info(compact_video_renderer): + renderer = compact_video_renderer + return { + "title": renderer['title']['simpleText'], + "id": renderer['videoId'], + "views": renderer['viewCountText'].get('simpleText', None) or renderer['viewCountText']['runs'][0]['text'], + "duration": default_multi_get(renderer, 'lengthText', 'simpleText', default=''), # livestreams dont have a length + "author": renderer['longBylineText']['runs'][0]['text'], + "author_url": renderer['longBylineText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + } + + +# ----- +# HTML +# ----- + +def small_video_item_html(item): + video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')}) + return small_video_item_template.substitute( + title = html.escape(item["title"]), + views = item["views"], + author = html.escape(item["author"]), + duration = item["duration"], + url = URL_ORIGIN + "/watch?v=" + item["id"], + thumbnail = get_thumbnail_url(item['id']), + video_info = html.escape(json.dumps(video_info)), + ) + +def small_playlist_item_html(item): + return small_playlist_item_template.substitute( + title=html.escape(item["title"]), + size = item['size'], + author="", + url = URL_ORIGIN + "/playlist?list=" + item["id"], + thumbnail= get_thumbnail_url(item['first_video_id']), + ) + +def medium_playlist_item_html(item): + return medium_playlist_item_template.substitute( + title=html.escape(item["title"]), + size = item['size'], + author=item['author'], + author_url= URL_ORIGIN + item['author_url'], + url = URL_ORIGIN + "/playlist?list=" + item["id"], + thumbnail= item['thumbnail'], + ) + +def medium_video_item_html(medium_video_info): + info = medium_video_info + + return medium_video_item_template.substitute( + title=html.escape(info["title"]), + views=info["views"], + published = info["published"], + description = format_text_runs(info["description"]), + author=html.escape(info["author"]), + author_url=info["author_url"], + duration=info["duration"], + url = URL_ORIGIN + "/watch?v=" + info["id"], + thumbnail=info['thumbnail'], + datetime='', # TODO + ) + +html_functions = { + 'compactVideoRenderer': lambda x: small_video_item_html(small_video_item_info(x)), + 'videoRenderer': lambda x: medium_video_item_html(medium_video_item_info(x)), + 'compactPlaylistRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)), + 'playlistRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)), + 'channelRenderer': lambda x: '', + 'radioRenderer': lambda x: medium_playlist_item_html(medium_playlist_item_info(x)), + 'compactRadioRenderer': lambda x: small_playlist_item_html(small_playlist_item_info(x)), + 'didYouMeanRenderer': lambda x: '', +} + + + + + + + +def get_url(node): + try: + return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + except KeyError: + return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] + + +def get_text(node): + try: + return node['simpleText'] + except KeyError: + return node['runs'][0]['text'] + +def get_formatted_text(node): + try: + return node['runs'] + except KeyError: + return node['simpleText'] + +def get_badges(node): + badges = [] + for badge_node in node: + badge = badge_node['metadataBadgeRenderer']['label'] + if badge.lower() != 'new': + badges.append(badge) + return badges + +def get_thumbnail(node): + try: + return node['thumbnails'][0]['url'] # polymer format + except KeyError: + return node['url'] # ajax format + +dispatch = { + +# polymer format + 'title': ('title', get_text), + 'publishedTimeText': ('published', get_text), + 'videoId': ('id', lambda node: node), + 'descriptionSnippet': ('description', get_formatted_text), + 'lengthText': ('duration', get_text), + 'thumbnail': ('thumbnail', get_thumbnail), + 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']), + + 'videoCountText': ('size', get_text), + 'playlistId': ('id', lambda node: node), + + 'subscriberCountText': ('subscriber_count', get_text), + 'channelId': ('id', lambda node: node), + 'badges': ('badges', get_badges), + +# ajax format + 'view_count_text': ('views', get_text), + 'num_videos_text': ('size', lambda node: get_text(node).split(' ')[0]), + 'owner_text': ('author', get_text), + 'owner_endpoint': ('author_url', lambda node: node['url']), + 'description': ('description', get_formatted_text), + 'index': ('playlist_index', get_text), + 'short_byline': ('author', get_text), + 'length': ('duration', get_text), + 'video_id': ('id', lambda node: node), + +} + +def renderer_info(renderer): + try: + info = {} + if 'viewCountText' in renderer: # prefer this one as it contains all the digits + info['views'] = get_text(renderer['viewCountText']) + elif 'shortViewCountText' in renderer: + info['views'] = get_text(renderer['shortViewCountText']) + + for key, node in renderer.items(): + if key in ('longBylineText', 'shortBylineText'): + info['author'] = get_text(node) + try: + info['author_url'] = get_url(node) + except KeyError: + pass + + continue + + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(renderer) + raise + +def ajax_info(item_json): + try: + info = {} + for key, node in item_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(item_json) + raise + +def badges_html(badges): + return ' | '.join(map(html.escape, badges)) + + + + + +html_transform_dispatch = { + 'title': html.escape, + 'published': html.escape, + 'id': html.escape, + 'description': format_text_runs, + 'duration': html.escape, + 'thumbnail': lambda url: html.escape('/' + url.lstrip('/')), + 'size': html.escape, + 'author': html.escape, + 'author_url': lambda url: html.escape(URL_ORIGIN + url), + 'views': html.escape, + 'subscriber_count': html.escape, + 'badges': badges_html, + 'playlist_index': html.escape, +} + +def get_html_ready(item): + html_ready = {} + for key, value in item.items(): + try: + function = html_transform_dispatch[key] + except KeyError: + continue + html_ready[key] = function(value) + return html_ready + + +author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''') +author_template = Template('''<address>By $author</address>''') +stat_templates = ( + Template('''<span class="views">$views</span>'''), + Template('''<time datetime="$datetime">$published</time>'''), +) +def get_video_stats(html_ready): + stats = [] + if 'author' in html_ready: + if 'author_url' in html_ready: + stats.append(author_template_url.substitute(html_ready)) + else: + stats.append(author_template.substitute(html_ready)) + for stat in stat_templates: + try: + stats.append(stat.strict_substitute(html_ready)) + except KeyError: + pass + return ' | '.join(stats) + +def video_item_html(item, template): + html_ready = get_html_ready(item) + video_info = {} + for key in ('id', 'title', 'author'): + try: + video_info[key] = html_ready[key] + except KeyError: + video_info[key] = '' + try: + video_info['duration'] = html_ready['duration'] + except KeyError: + video_info['duration'] = 'Live' # livestreams don't have a duration + + html_ready['video_info'] = html.escape(json.dumps(video_info) ) + html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id'] + html_ready['datetime'] = '' #TODO + + html_ready['stats'] = get_video_stats(html_ready) + + return template.substitute(html_ready) + + +def playlist_item_html(item, template): + html_ready = get_html_ready(item) + + html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id'] + html_ready['datetime'] = '' #TODO + return template.substitute(html_ready) + + + + + + +def make_query_string(query_string): + return '&'.join(key + '=' + ','.join(values) for key,values in query_string.items()) + +def update_query_string(query_string, items): + parameters = urllib.parse.parse_qs(query_string) + parameters.update(items) + return make_query_string(parameters) + +page_button_template = Template('''<a class="page-button" href="$href">$page</a>''') +current_page_button_template = Template('''<div class="page-button">$page</div>''') + +def page_buttons_html(current_page, estimated_pages, url, current_query_string): + if current_page <= 5: + page_start = 1 + page_end = min(9, estimated_pages) + else: + page_start = current_page - 4 + page_end = min(current_page + 4, estimated_pages) + + result = "" + for page in range(page_start, page_end+1): + if page == current_page: + template = current_page_button_template + else: + template = page_button_template + result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) ) + return result + + + + + + + +showing_results_for = Template(''' + <div class="showing-results-for"> + <div>Showing results for <a>$corrected_query</a></div> + <div>Search instead for <a href="$original_query_url">$original_query</a></div> + </div> +''') + +did_you_mean = Template(''' + <div class="did-you-mean"> + <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> + </div> +''') + +def renderer_html(renderer, additional_info={}, current_query_string=''): + type = list(renderer.keys())[0] + renderer = renderer[type] + if type in ('videoRenderer', 'playlistRenderer', 'radioRenderer', 'compactVideoRenderer', 'compactPlaylistRenderer', 'compactRadioRenderer', 'gridVideoRenderer', 'gridPlaylistRenderer', 'gridRadioRenderer'): + info = renderer_info(renderer) + info.update(additional_info) + if type == 'compactVideoRenderer': + return video_item_html(info, small_video_item_template) + if type in ('compactPlaylistRenderer', 'compactRadioRenderer'): + return playlist_item_html(info, small_playlist_item_template) + if type in ('videoRenderer', 'gridVideoRenderer'): + return video_item_html(info, medium_video_item_template) + if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer'): + return playlist_item_html(info, medium_playlist_item_template) + + if type == 'channelRenderer': + info = renderer_info(renderer) + html_ready = get_html_ready(info) + html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id'] + return medium_channel_item_template.substitute(html_ready) + + if type == 'movieRenderer': + return '' + print(renderer) + raise NotImplementedError('Unknown renderer type: ' + type) + + +'videoRenderer' +'playlistRenderer' +'channelRenderer' +'radioRenderer' +'gridVideoRenderer' +'gridPlaylistRenderer' + +'didYouMeanRenderer' +'showingResultsForRenderer' |