From a61ba6b8f45d94bf8e89a9f351c5c6cac2379387 Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Sat, 16 Feb 2019 15:21:39 -0800
Subject: playlist: fix error when there's no description

---
 youtube/playlist.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'youtube')
diff --git a/youtube/playlist.py b/youtube/playlist.py
index cc0da33..419ed00 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -2,13 +2,14 @@ import base64
 import youtube.common as common
 import urllib
 import json
-from string import Template
+import string
+from youtube import template
 import youtube.proto as proto
 import gevent
 import math
 
 with open("yt_playlist_template.html", "r") as file:
-    yt_playlist_template = Template(file.read())
+    yt_playlist_template = template.Template(file.read())
 
 
 
@@ -76,7 +77,7 @@ def get_videos(playlist_id, page):
     return info
 
 
-playlist_stat_template = Template('''
+playlist_stat_template = string.Template('''
 <div>$stat</div>''')
 def get_playlist_page(env, start_response):
     start_response('200 OK', [('Content-type','text/html'),])
-- 
cgit v1.2.3


From b32330be4f15dd044e6212f526e52375f0a0f6c2 Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Thu, 21 Feb 2019 21:32:31 -0800
Subject: refactor common.py into 3 files

---
 youtube/accounts.py        |  18 +-
 youtube/channel.py         |  64 ++--
 youtube/comments.py        |  59 ++--
 youtube/common.py          | 748 ---------------------------------------------
 youtube/html_common.py     | 429 ++++++++++++++++++++++++++
 youtube/local_playlist.py  |  23 +-
 youtube/playlist.py        |  25 +-
 youtube/post_comment.py    |  36 +--
 youtube/search.py          |  25 +-
 youtube/util.py            | 153 ++++++++++
 youtube/watch.py           |  26 +-
 youtube/youtube.py         |   4 +-
 youtube/yt_data_extract.py | 205 +++++++++++++
 13 files changed, 928 insertions(+), 887 deletions(-)
 delete mode 100644 youtube/common.py
 create mode 100644 youtube/html_common.py
 create mode 100644 youtube/util.py
 create mode 100644 youtube/yt_data_extract.py

(limited to 'youtube')

diff --git a/youtube/accounts.py b/youtube/accounts.py
index bde9852..375bf2a 100644
--- a/youtube/accounts.py
+++ b/youtube/accounts.py
@@ -1,10 +1,10 @@
 # Contains functions having to do with logging in
+from youtube import util, html_common
+import settings
 
 import urllib
 import json
-from youtube import common
 import re
-import settings
 import http.cookiejar
 import io
 import os
@@ -106,7 +106,7 @@ def get_account_login_page(env, start_response):
     '''
 
     page = '''
-    <form action="''' + common.URL_ORIGIN + '''/login" method="POST">
+    <form action="''' + util.URL_ORIGIN + '''/login" method="POST">
         <div class="form-field">
             <label for="username">Username:</label>
             <input type="text" id="username" name="username">
@@ -130,10 +130,10 @@ Using Tor to log in should only be done if the account was created using a proxy
     </div>
     '''
 
-    return common.yt_basic_template.substitute(
+    return html_common.yt_basic_template.substitute(
         page_title = "Login",
         style = style,
-        header = common.get_header(),
+        header = html_common.get_header(),
         page = page,
     ).encode('utf-8')
 
@@ -229,7 +229,7 @@ def _login(username, password, cookiejar, use_tor):
     Taken from youtube-dl
     """
 
-    login_page = common.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
+    login_page = util.fetch_url(_LOGIN_URL, yt_dl_headers, report_text='Downloaded login page', cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
     '''with open('debug/login_page', 'w', encoding='utf-8') as f:
         f.write(login_page)'''
     #print(cookiejar.as_lwp_str())
@@ -255,7 +255,7 @@ def _login(username, password, cookiejar, use_tor):
             'Google-Accounts-XSRF': 1,
         }
         headers.update(yt_dl_headers)
-        result = common.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
+        result = util.fetch_url(url, headers, report_text=note, data=data, cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
         #print(cookiejar.as_lwp_str())
         '''with open('debug/' + note, 'w', encoding='utf-8') as f:
             f.write(result)'''
@@ -387,7 +387,7 @@ def _login(username, password, cookiejar, use_tor):
         return False
 
     try:
-        check_cookie_results = common.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
+        check_cookie_results = util.fetch_url(check_cookie_url, headers=yt_dl_headers, report_text="Checked cookie", cookiejar_send=cookiejar, cookiejar_receive=cookiejar, use_tor=use_tor).decode('utf-8')
     except (urllib.error.URLError, compat_http_client.HTTPException, socket.error) as err:
         return False
 
@@ -398,7 +398,7 @@ def _login(username, password, cookiejar, use_tor):
         warn('Unable to log in')
         return False
 
-    select_site_page = common.fetch_url('https://m.youtube.com/select_site', headers=common.mobile_ua, report_text="Retrieved page for channel id", cookiejar_send=cookiejar, use_tor=use_tor).decode('utf-8')
+    select_site_page = util.fetch_url('https://m.youtube.com/select_site', headers=util.mobile_ua, report_text="Retrieved page for channel id", cookiejar_send=cookiejar, use_tor=use_tor).decode('utf-8')
     match = _CHANNEL_ID_RE.search(select_site_page)
     if match is None:
         warn('Failed to find channel id')
diff --git a/youtube/channel.py b/youtube/channel.py
index 9577525..e9f315b 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -1,6 +1,6 @@
 import base64
-import youtube.common as common
-from youtube.common import default_multi_get, URL_ORIGIN, get_thumbnail_url, video_id
+from youtube import util, yt_data_extract, html_common
+
 import http_errors
 import urllib
 import json
@@ -91,7 +91,7 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
     url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken
 
     print("Sending channel tab ajax request")
-    content = common.fetch_url(url, common.desktop_ua + headers_1)
+    content = util.fetch_url(url, util.desktop_ua + headers_1)
     print("Finished recieving channel tab response")
 
     '''with open('debug/channel_debug', 'wb') as f:
@@ -110,7 +110,7 @@ def get_number_of_videos(channel_id):
 
     # Sometimes retrieving playlist info fails with 403 for no discernable reason
     try:
-        response = common.fetch_url(url, common.mobile_ua + headers_pbj)
+        response = util.fetch_url(url, util.mobile_ua + headers_pbj)
     except urllib.error.HTTPError as e:
         if e.code != 403:
             raise
@@ -133,20 +133,20 @@ def get_channel_id(username):
     # method that gives the smallest possible response at ~10 kb
     # needs to be as fast as possible
     url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
-    response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
+    response = util.fetch_url(url, util.mobile_ua + headers_1).decode('utf-8')
     return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
 
 def grid_items_html(items, additional_info={}):
     result = '''            <nav class="item-grid">\n'''
     for item in items:
-        result += common.renderer_html(item, additional_info)
+        result += html_common.renderer_html(item, additional_info)
     result += '''\n</nav>'''
     return result
 
 def list_items_html(items, additional_info={}):
     result = '''                <nav class="item-list">'''
     for item in items:
-        result += common.renderer_html(item, additional_info)
+        result += html_common.renderer_html(item, additional_info)
     result += '''\n</nav>'''
     return result
 
@@ -168,11 +168,11 @@ def channel_tabs_html(channel_id, current_tab, search_box_value=''):
             )
         else:
             result += channel_tab_template.substitute(
-                href_attribute = ' href="' + URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"',
+                href_attribute = ' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab_name.lower() + '"',
                 tab_name = tab_name,
             )
     result += channel_search_template.substitute(
-        action = URL_ORIGIN + "/channel/" + channel_id + "/search",
+        action = util.URL_ORIGIN + "/channel/" + channel_id + "/search",
         search_box_value = html.escape(search_box_value),
     )
     return result
@@ -192,7 +192,7 @@ def channel_sort_buttons_html(channel_id, tab, current_sort):
             )
         else:
             result += channel_sort_button_template.substitute(
-                href_attribute=' href="' + URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"',
+                href_attribute=' href="' + util.URL_ORIGIN + '/channel/' + channel_id + '/' + tab + '?sort=' + sort_number + '"',
                 text = 'Sort by ' + sort_name
             )
     return result
@@ -246,14 +246,14 @@ def channel_videos_html(polymer_json, current_page=1, current_sort=3, number_of_
     items_html = grid_items_html(items, {'author': microformat['title']})
     
     return yt_channel_items_template.substitute(
-        header              = common.get_header(),
+        header              = html_common.get_header(),
         channel_title       = microformat['title'],
         channel_tabs        = channel_tabs_html(channel_id, 'Videos'),
         sort_buttons        = channel_sort_buttons_html(channel_id, 'videos', current_sort),
         avatar              = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
         page_title          = microformat['title'] + ' - Channel',
         items               = items_html,
-        page_buttons        = common.page_buttons_html(current_page, math.ceil(number_of_videos/30), URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
+        page_buttons        = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/30), util.URL_ORIGIN + "/channel/" + channel_id + "/videos", current_query_string),
         number_of_results   = '{:,}'.format(number_of_videos) + " videos",
     )
 
@@ -267,7 +267,7 @@ def channel_playlists_html(polymer_json, current_sort=3):
     items_html = grid_items_html(items, {'author': microformat['title']})
     
     return yt_channel_items_template.substitute(
-        header              = common.get_header(),
+        header              = html_common.get_header(),
         channel_title       = microformat['title'],
         channel_tabs        = channel_tabs_html(channel_id, 'Playlists'),
         sort_buttons        = channel_sort_buttons_html(channel_id, 'playlists', current_sort),
@@ -310,25 +310,25 @@ def channel_about_page(polymer_json):
 
         channel_links += channel_link_template.substitute(
             url     = html.escape(url),
-            text    = common.get_plain_text(link_json['title']),
+            text    = yt_data_extract.get_plain_text(link_json['title']),
         )
 
     stats = ''
     for stat_name in ('subscriberCountText', 'joinedDateText', 'viewCountText', 'country'):
         try:
-            stat_value = common.get_plain_text(channel_metadata[stat_name])
+            stat_value = yt_data_extract.get_plain_text(channel_metadata[stat_name])
         except KeyError:
             continue
         else:
             stats += stat_template.substitute(stat_value=stat_value)
     try:
-        description = common.format_text_runs(common.get_formatted_text(channel_metadata['description']))
+        description = yt_data_extract.format_text_runs(yt_data_extract.get_formatted_text(channel_metadata['description']))
     except KeyError:
         description = ''
     return yt_channel_about_template.substitute(
-        header              = common.get_header(),
-        page_title          = common.get_plain_text(channel_metadata['title']) + ' - About',
-        channel_title       = common.get_plain_text(channel_metadata['title']),
+        header              = html_common.get_header(),
+        page_title          = yt_data_extract.get_plain_text(channel_metadata['title']) + ' - About',
+        channel_title       = yt_data_extract.get_plain_text(channel_metadata['title']),
         avatar              = html.escape(avatar),
         description         = description,
         links               = channel_links,
@@ -351,13 +351,13 @@ def channel_search_page(polymer_json, query, current_page=1, number_of_videos =
     items_html = list_items_html(items)
 
     return yt_channel_items_template.substitute(
-        header              = common.get_header(),
+        header              = html_common.get_header(),
         channel_title       = html.escape(microformat['title']),
         channel_tabs        = channel_tabs_html(channel_id, '', query),
         avatar              = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
         page_title          = html.escape(query + ' - Channel search'),
         items               = items_html,
-        page_buttons        = common.page_buttons_html(current_page, math.ceil(number_of_videos/29), URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string),
+        page_buttons        = html_common.page_buttons_html(current_page, math.ceil(number_of_videos/29), util.URL_ORIGIN + "/channel/" + channel_id + "/search", current_query_string),
         number_of_results   = '',
         sort_buttons        = '',
     )
@@ -367,7 +367,7 @@ def get_channel_search_json(channel_id, query, page):
     ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
     ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')
 
-    polymer_json = common.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, common.desktop_ua + headers_1)
+    polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1)
     '''with open('debug/channel_search_debug', 'wb') as f:
         f.write(polymer_json)'''
     polymer_json = json.loads(polymer_json)
@@ -384,10 +384,10 @@ def get_channel_page(env, start_response):
         tab = 'videos'
     
     parameters = env['parameters']
-    page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
-    sort = common.default_multi_get(parameters, 'sort', 0, default='3')
-    view = common.default_multi_get(parameters, 'view', 0, default='1')
-    query = common.default_multi_get(parameters, 'query', 0, default='')
+    page_number = int(util.default_multi_get(parameters, 'page', 0, default='1'))
+    sort = util.default_multi_get(parameters, 'sort', 0, default='3')
+    view = util.default_multi_get(parameters, 'view', 0, default='1')
+    query = util.default_multi_get(parameters, 'query', 0, default='')
 
     if tab == 'videos':
         tasks = (
@@ -399,11 +399,11 @@ def get_channel_page(env, start_response):
 
         result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING'])
     elif tab == 'about':
-        polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', common.desktop_ua + headers_1)
+        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1)
         polymer_json = json.loads(polymer_json)
         result = channel_about_page(polymer_json)
     elif tab == 'playlists':
-        polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], common.desktop_ua + headers_1)
+        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1)
         '''with open('debug/channel_playlists_debug', 'wb') as f:
             f.write(polymer_json)'''
         polymer_json = json.loads(polymer_json)
@@ -443,22 +443,22 @@ def get_channel_page_general_url(env, start_response):
         return b'Invalid channel url'
 
     if page == 'videos':
-        polymer_json = common.fetch_url(base_url + '/videos?pbj=1&view=0', common.desktop_ua + headers_1)
+        polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1)
         '''with open('debug/user_page_videos', 'wb') as f:
             f.write(polymer_json)'''
         polymer_json = json.loads(polymer_json)
         result = channel_videos_html(polymer_json)
     elif page == 'about':
-        polymer_json = common.fetch_url(base_url + '/about?pbj=1', common.desktop_ua + headers_1)
+        polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1)
         polymer_json = json.loads(polymer_json)
         result = channel_about_page(polymer_json)
     elif page == 'playlists':
-        polymer_json = common.fetch_url(base_url+ '/playlists?pbj=1&view=1', common.desktop_ua + headers_1)
+        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1)
         polymer_json = json.loads(polymer_json)
         result = channel_playlists_html(polymer_json)
     elif page == 'search':
         raise NotImplementedError()
-        '''polymer_json = common.fetch_url('https://www.youtube.com/user' + username +  '/search?pbj=1&' + query_string, common.desktop_ua + headers_1)
+        '''polymer_json = util.fetch_url('https://www.youtube.com/user' + username +  '/search?pbj=1&' + query_string, util.desktop_ua + headers_1)
         polymer_json = json.loads(polymer_json)
         return channel_search_page('''
     else:
diff --git a/youtube/comments.py b/youtube/comments.py
index 10209e7..4087b47 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -1,13 +1,14 @@
+from youtube import proto, util, html_common, yt_data_extract, accounts
+import settings
+
 import json
-from youtube import proto, common, accounts
 import base64
-from youtube.common import uppercase_escape, default_multi_get, format_text_runs, URL_ORIGIN, fetch_url
 from string import Template
 import urllib.request
 import urllib
 import html
-import settings
 import re
+
 comment_area_template = Template('''
 <section class="comment-area">
 $video-metadata
@@ -130,7 +131,7 @@ def request_comments(ctoken, replies=False):
     url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
 
     for i in range(0,8):    # don't retry more than 8 times
-        content = fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
+        content = util.fetch_url(url, headers=mobile_headers, report_text="Retrieved comments")
         if content[0:4] == b")]}'":             # random closing characters included at beginning of response for some reason
             content = content[4:]
         elif content[0:10] == b'\n<!DOCTYPE':   # occasionally returns html instead of json for no reason
@@ -151,10 +152,10 @@ def single_comment_ctoken(video_id, comment_id):
 
 def parse_comments_ajax(content, replies=False):
     try:
-        content = json.loads(uppercase_escape(content.decode('utf-8')))
+        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
         #print(content)
         comments_raw = content['content']['continuation_contents']['contents']
-        ctoken = default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
+        ctoken = util.default_multi_get(content, 'content', 'continuation_contents', 'continuations', 0, 'continuation', default='')
         
         comments = []
         for comment_raw in comments_raw:
@@ -163,7 +164,7 @@ def parse_comments_ajax(content, replies=False):
                 if comment_raw['replies'] is not None:
                     reply_ctoken = comment_raw['replies']['continuations'][0]['continuation']
                     comment_id, video_id = get_ids(reply_ctoken)
-                    replies_url = URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
+                    replies_url = util.URL_ORIGIN + '/comments?parent_id=' + comment_id + "&video_id=" + video_id
                 comment_raw = comment_raw['comment']
             comment = {
             'author': comment_raw['author']['runs'][0]['text'],
@@ -189,7 +190,7 @@ reply_count_regex = re.compile(r'(\d+)')
 def parse_comments_polymer(content, replies=False):
     try:
         video_title = ''
-        content = json.loads(uppercase_escape(content.decode('utf-8')))
+        content = json.loads(util.uppercase_escape(content.decode('utf-8')))
         url = content[1]['url']
         ctoken = urllib.parse.parse_qs(url[url.find('?')+1:])['ctoken'][0]
         video_id = ctoken_metadata(ctoken)['video_id']
@@ -200,7 +201,7 @@ def parse_comments_polymer(content, replies=False):
             comments_raw = content[1]['response']['continuationContents']['commentRepliesContinuation']['contents']
             replies = True
 
-        ctoken = default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')
+        ctoken = util.default_multi_get(content, 1, 'response', 'continuationContents', 'commentSectionContinuation', 'continuations', 0, 'nextContinuationData', 'continuation', default='')
         
         comments = []
         for comment_raw in comments_raw:
@@ -219,8 +220,8 @@ def parse_comments_polymer(content, replies=False):
                 if 'replies' in comment_raw:
                     #reply_ctoken = comment_raw['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']['continuation']
                     #comment_id, video_id = get_ids(reply_ctoken)
-                    replies_url = URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
-                    view_replies_text = common.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText'])
+                    replies_url = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id
+                    view_replies_text = yt_data_extract.get_plain_text(comment_raw['replies']['commentRepliesRenderer']['moreText'])
                     match = reply_count_regex.search(view_replies_text)
                     if match is None:
                         view_replies_text = '1 reply'
@@ -228,18 +229,18 @@ def parse_comments_polymer(content, replies=False):
                         view_replies_text = match.group(1) + " replies"
                 elif not replies:
                     view_replies_text = "Reply"
-                    replies_url = URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
+                    replies_url = util.URL_ORIGIN + '/post_comment?parent_id=' + parent_id + "&video_id=" + video_id
                 comment_raw = comment_raw['comment']
             
             comment_raw = comment_raw['commentRenderer']
             comment = {
-            'author': common.get_plain_text(comment_raw['authorText']),
+            'author': yt_data_extract.get_plain_text(comment_raw['authorText']),
             'author_url': comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'],
             'author_channel_id': comment_raw['authorEndpoint']['browseEndpoint']['browseId'],
             'author_id': comment_raw['authorId'],
             'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'],
             'likes': comment_raw['likeCount'],
-            'published': common.get_plain_text(comment_raw['publishedTimeText']),
+            'published': yt_data_extract.get_plain_text(comment_raw['publishedTimeText']),
             'text': comment_raw['contentText'].get('runs', ''),
             'view_replies_text': view_replies_text,
             'replies_url': replies_url,
@@ -264,13 +265,13 @@ def get_comments_html(comments):
             replies = reply_link_template.substitute(url=comment['replies_url'], view_replies_text=html.escape(comment['view_replies_text']))
         if settings.enable_comment_avatars:
             avatar = comment_avatar_template.substitute(
-                author_url = URL_ORIGIN + comment['author_url'],
+                author_url = util.URL_ORIGIN + comment['author_url'],
                 author_avatar = '/' + comment['author_avatar'],
             )
         else:
             avatar = ''
         if comment['author_channel_id'] in accounts.accounts:
-            delete_url = (URL_ORIGIN + '/delete_comment?video_id='
+            delete_url = (util.URL_ORIGIN + '/delete_comment?video_id='
                 + comment['video_id']
                 + '&channel_id='+ comment['author_channel_id']
                 + '&author_id=' + comment['author_id']
@@ -280,14 +281,14 @@ def get_comments_html(comments):
         else:
             action_buttons = ''
 
-        permalink = URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id']
+        permalink = util.URL_ORIGIN + '/watch?v=' + comment['video_id'] + '&lc=' + comment['comment_id']
         html_result += comment_template.substitute(
             author=comment['author'],
-            author_url = URL_ORIGIN + comment['author_url'],
+            author_url = util.URL_ORIGIN + comment['author_url'],
             avatar = avatar,
             likes = str(comment['likes']) + ' likes' if str(comment['likes']) != '0' else '',
             published = comment['published'],
-            text = format_text_runs(comment['text']),
+            text = yt_data_extract.format_text_runs(comment['text']),
             datetime = '',  #TODO
             replies = replies,
             action_buttons = action_buttons,
@@ -297,10 +298,10 @@ def get_comments_html(comments):
     
 def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
     if settings.enable_comments:
-        post_comment_url = common.URL_ORIGIN + "/post_comment?video_id=" + video_id
+        post_comment_url = util.URL_ORIGIN + "/post_comment?video_id=" + video_id
         post_comment_link = '''<a class="sort-button" href="''' + post_comment_url + '''">Post comment</a>'''
 
-        other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
+        other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(video_id, sort=1 - sort, lc=lc)
         other_sort_name = 'newest' if sort == 0 else 'top'
         other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
 
@@ -314,7 +315,7 @@ def video_comments(video_id, sort=0, offset=0, lc='', secret_key=''):
         if ctoken == '':
             more_comments_button = ''
         else:
-            more_comments_button = more_comments_template.substitute(url = common.URL_ORIGIN + '/comments?ctoken=' + ctoken)
+            more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
 
         result = '''<section class="comments-area">\n'''
         result += comment_links + '\n'
@@ -350,7 +351,7 @@ comment_box_template = Template('''
         <select id="account-selection" name="channel_id">
 $options
         </select>
-        <a href="''' + common.URL_ORIGIN + '''/login" target="_blank">Add account</a>
+        <a href="''' + util.URL_ORIGIN + '''/login" target="_blank">Add account</a>
     </div>
     <textarea name="comment_text"></textarea>
     $video_id_input
@@ -359,7 +360,7 @@ $options
 def get_comments_page(env, start_response):
     start_response('200 OK',  [('Content-type','text/html'),] )
     parameters = env['parameters']
-    ctoken = default_multi_get(parameters, 'ctoken', 0, default='')
+    ctoken = util.default_multi_get(parameters, 'ctoken', 0, default='')
     replies = False
     if not ctoken:
         video_id = parameters['video_id'][0]
@@ -384,17 +385,17 @@ def get_comments_page(env, start_response):
             page_number = page_number,
             sort = 'top' if metadata['sort'] == 0 else 'newest',
             title = html.escape(comment_info['video_title']),
-            url = common.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
+            url = util.URL_ORIGIN + '/watch?v=' + metadata['video_id'],
             thumbnail = '/i.ytimg.com/vi/'+ metadata['video_id'] + '/mqdefault.jpg',
         )
         comment_box = comment_box_template.substitute(
-            form_action= common.URL_ORIGIN + '/post_comment',
+            form_action= util.URL_ORIGIN + '/post_comment',
             video_id_input='''<input type="hidden" name="video_id" value="''' + metadata['video_id'] + '''">''',
             post_text='Post comment',
             options=comment_box_account_options(),
         )
 
-        other_sort_url = common.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort'])
+        other_sort_url = util.URL_ORIGIN + '/comments?ctoken=' + make_comment_ctoken(metadata['video_id'], sort=1 - metadata['sort'])
         other_sort_name = 'newest' if metadata['sort'] == 0 else 'top'
         other_sort_link = '''<a class="sort-button" href="''' + other_sort_url + '''">Sort by ''' + other_sort_name + '''</a>'''
 
@@ -408,7 +409,7 @@ def get_comments_page(env, start_response):
     if ctoken == '':
         more_comments_button = ''
     else:
-        more_comments_button = more_comments_template.substitute(url = URL_ORIGIN + '/comments?ctoken=' + ctoken)
+        more_comments_button = more_comments_template.substitute(url = util.URL_ORIGIN + '/comments?ctoken=' + ctoken)
     comments_area = '<section class="comments-area">\n'
     comments_area += video_metadata + comment_box + comment_links + '\n'
     comments_area += '<div class="comments">\n'
@@ -417,7 +418,7 @@ def get_comments_page(env, start_response):
     comments_area += more_comments_button + '\n'
     comments_area += '</section>\n'
     return yt_comments_template.substitute(
-        header = common.get_header(),
+        header = html_common.get_header(),
         comments_area = comments_area,
         page_title = page_title,
     ).encode('utf-8')
diff --git a/youtube/common.py b/youtube/common.py
deleted file mode 100644
index cb963ce..0000000
--- a/youtube/common.py
+++ /dev/null
@@ -1,748 +0,0 @@
-from youtube.template import Template
-from youtube import local_playlist
-import settings
-import html
-import json
-import re
-import urllib.parse
-import gzip
-import brotli
-import time
-import socks, sockshandler
-
-URL_ORIGIN = "/https://www.youtube.com"
-
-
-# videos (all of type str):
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# published
-# duration
-# likes
-# dislikes
-# views
-# playlist_index
-
-# playlists:
-
-# id
-# title
-# url
-# author
-# author_url
-# thumbnail
-# description
-# updated
-# size
-# first_video_id
-
-
-with open('yt_basic_template.html', 'r', encoding='utf-8') as file:
-    yt_basic_template = Template(file.read())
-
-
-
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="current-page-button">$page</a>''')
-
-medium_playlist_item_template = Template('''
-                <div class="medium-item-box">
-                    <div class="medium-item">
-                        <a class="playlist-thumbnail-box" href="$url" title="$title">
-                            <img class="playlist-thumbnail-img" src="$thumbnail">
-                            <div class="playlist-thumbnail-info">
-                                <span>$size</span>
-                            </div>
-                        </a>
-
-                        <a class="title" href="$url" title="$title">$title</a>
-                        
-                        <div class="stats">$stats</div>
-                    </div>
-                </div>
-''')
-medium_video_item_template = Template('''
-                <div class="medium-item-box">
-                    <div class="medium-item">
-                        <a class="video-thumbnail-box" href="$url" title="$title">
-                            <img class="video-thumbnail-img" src="$thumbnail">
-                            <span class="video-duration">$duration</span>
-                        </a>
-
-                        <a class="title" href="$url" title="$title">$title</a>
-                        
-                        <div class="stats">$stats</div>
-
-                        <span class="description">$description</span>
-                        <span class="badges">$badges</span>
-                    </div>
-                    <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-edit">
-                </div>
-''')
-
-small_video_item_template = Template('''
-                <div class="small-item-box">
-                    <div class="small-item">
-                        <a class="video-thumbnail-box" href="$url" title="$title">
-                            <img class="video-thumbnail-img" src="$thumbnail">
-                            <span class="video-duration">$duration</span>
-                        </a>
-                        <a class="title" href="$url" title="$title">$title</a>
-                        
-                        <address>$author</address>
-                        <span class="views">$views</span>
-                        
-                    </div>
-                    <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-edit">
-                </div>
-''')
-
-small_playlist_item_template = Template('''
-                <div class="small-item-box">
-                    <div class="small-item">
-                        <a class="playlist-thumbnail-box" href="$url" title="$title">
-                            <img class="playlist-thumbnail-img" src="$thumbnail">
-                            <div class="playlist-thumbnail-info">
-                                <span>$size</span>
-                            </div>
-                        </a>
-                        <a class="title" href="$url" title="$title">$title</a>
-                        
-                        <address>$author</address>
-                    </div>
-                </div>
-''')
-
-medium_channel_item_template = Template('''
-                <div class="medium-item-box">
-                    <div class="medium-item">
-                        <a class="video-thumbnail-box" href="$url" title="$title">
-                            <img class="video-thumbnail-img" src="$thumbnail">
-                            <span class="video-duration">$duration</span>
-                        </a>
-
-                        <a class="title" href="$url">$title</a>
-                        
-                        <span>$subscriber_count</span>
-                        <span>$size</span>
-
-                        <span class="description">$description</span>
-                    </div>
-                </div>
-''')
-
-
-class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
-    '''Separate cookiejars for receiving and sending'''
-    def __init__(self, cookiejar_send=None, cookiejar_receive=None):
-        import http.cookiejar
-        self.cookiejar_send = cookiejar_send
-        self.cookiejar_receive = cookiejar_receive
-
-    def http_request(self, request):
-        if self.cookiejar_send is not None:
-            self.cookiejar_send.add_cookie_header(request)
-        return request
-
-    def http_response(self, request, response):
-        if self.cookiejar_receive is not None:
-            self.cookiejar_receive.extract_cookies(response, request)
-        return response
-
-    https_request = http_request
-    https_response = http_response
-
-
-def decode_content(content, encoding_header):
-    encodings = encoding_header.replace(' ', '').split(',')
-    for encoding in reversed(encodings):
-        if encoding == 'identity':
-            continue
-        if encoding == 'br':
-            content = brotli.decompress(content)
-        elif encoding == 'gzip':
-            content = gzip.decompress(content)
-    return content
-
-def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
-    '''
-    When cookiejar_send is set to a CookieJar object,
-     those cookies will be sent in the request (but cookies in response will not be merged into it)
-    When cookiejar_receive is set to a CookieJar object,
-     cookies received in the response will be merged into the object (nothing will be sent from it)
-    When both are set to the same object, cookies will be sent from the object,
-     and response cookies will be merged into it.
-    '''
-    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
-    headers['Accept-Encoding'] = 'gzip, br'
-
-    # prevent python version being leaked by urllib if User-Agent isn't provided
-    #  (urllib will use ex. Python-urllib/3.6 otherwise)
-    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
-        headers['User-Agent'] = 'Python-urllib'
-
-    if data is not None:
-        if isinstance(data, str):
-            data = data.encode('ascii')
-        elif not isinstance(data, bytes):
-            data = urllib.parse.urlencode(data).encode('ascii')
-
-    start_time = time.time()
-
-
-    req = urllib.request.Request(url, data=data, headers=headers)
-
-    cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
-
-    if use_tor and settings.route_tor:
-        opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
-    else:
-        opener = urllib.request.build_opener(cookie_processor)
-
-    response = opener.open(req, timeout=timeout)
-    response_time = time.time()
-
-
-    content = response.read()
-    read_finish = time.time()
-    if report_text:
-        print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
-    content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
-    return content
-
-mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
-mobile_ua = (('User-Agent', mobile_user_agent),)
-desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
-desktop_ua = (('User-Agent', desktop_user_agent),)
-
-def dict_add(*dicts):
-    for dictionary in dicts[1:]:
-        dicts[0].update(dictionary)
-    return dicts[0]
-
-def video_id(url):
-    url_parts = urllib.parse.urlparse(url)
-    return urllib.parse.parse_qs(url_parts.query)['v'][0]
-
-def uppercase_escape(s):
-     return re.sub(
-         r'\\U([0-9a-fA-F]{8})',
-         lambda m: chr(int(m.group(1), base=16)), s)
-
-def default_multi_get(object, *keys, default):
-    ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
-    try:
-        for key in keys:
-            object = object[key]
-        return object
-    except (IndexError, KeyError):
-        return default
-
-def get_plain_text(node):
-    try:
-        return html.escape(node['simpleText'])
-    except KeyError:
-        return unformmated_text_runs(node['runs'])
-        
-def unformmated_text_runs(runs):
-    result = ''
-    for text_run in runs:
-        result += html.escape(text_run["text"])
-    return result
-
-def format_text_runs(runs):
-    if isinstance(runs, str):
-        return runs
-    result = ''
-    for text_run in runs:
-        if text_run.get("bold", False):
-            result += "<b>" + html.escape(text_run["text"]) + "</b>"
-        elif text_run.get('italics', False):
-            result += "<i>" + html.escape(text_run["text"]) + "</i>"
-        else:
-            result += html.escape(text_run["text"])
-    return result
-
-# default, sddefault, mqdefault, hqdefault, hq720
-def get_thumbnail_url(video_id):
-    return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
-    
-def seconds_to_timestamp(seconds):
-    seconds = int(seconds)
-    hours, seconds = divmod(seconds,3600)
-    minutes, seconds = divmod(seconds,60)
-    if hours != 0:
-        timestamp = str(hours) + ":"
-        timestamp += str(minutes).zfill(2)  # zfill pads with zeros
-    else:
-        timestamp = str(minutes)
-
-    timestamp += ":" + str(seconds).zfill(2)
-    return timestamp
-
-
-# -----
-# HTML
-# -----
-
-def small_video_item_html(item):
-    video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
-    return small_video_item_template.substitute(
-        title       = html.escape(item["title"]),
-        views       = item["views"],
-        author      = html.escape(item["author"]),
-        duration    = item["duration"],
-        url         = URL_ORIGIN + "/watch?v=" + item["id"],
-        thumbnail   = get_thumbnail_url(item['id']),
-        video_info  = html.escape(video_info),
-    )
-
-def small_playlist_item_html(item):
-    return small_playlist_item_template.substitute(
-        title=html.escape(item["title"]),
-        size = item['size'],
-        author="",
-        url = URL_ORIGIN + "/playlist?list=" + item["id"],
-        thumbnail= get_thumbnail_url(item['first_video_id']),
-    )
-
-def medium_playlist_item_html(item):
-    return medium_playlist_item_template.substitute(
-        title=html.escape(item["title"]),
-        size = item['size'],
-        author=item['author'],
-        author_url= URL_ORIGIN + item['author_url'],
-        url = URL_ORIGIN + "/playlist?list=" + item["id"],
-        thumbnail= item['thumbnail'],
-    )
-
-def medium_video_item_html(medium_video_info):
-    info = medium_video_info
-       
-    return medium_video_item_template.substitute(
-            title=html.escape(info["title"]),
-            views=info["views"],
-            published = info["published"],
-            description = format_text_runs(info["description"]),
-            author=html.escape(info["author"]),
-            author_url=info["author_url"],
-            duration=info["duration"],
-            url = URL_ORIGIN + "/watch?v=" + info["id"],
-            thumbnail=info['thumbnail'],
-            datetime='', # TODO
-        )
-
-
-header_template = Template('''
-        <header>
-
-                <form id="site-search" action="/youtube.com/search">
-                    <input type="search" name="query" class="search-box" value="$search_box_value">
-                    <button type="submit" value="Search" class="search-button">Search</button>
-                    <div class="dropdown">
-                        <button class="dropdown-label">Options</button>
-                        <div class="css-sucks">
-                            <div class="dropdown-content">
-                                <h3>Sort by</h3>
-                                <input type="radio" id="sort_relevance" name="sort" value="0">
-                                <label for="sort_relevance">Relevance</label>
-
-                                <input type="radio" id="sort_upload_date" name="sort" value="2">
-                                <label for="sort_upload_date">Upload date</label>
-
-                                <input type="radio" id="sort_view_count" name="sort" value="3">
-                                <label for="sort_view_count">View count</label>
-
-                                <input type="radio" id="sort_rating" name="sort" value="1">
-                                <label for="sort_rating">Rating</label>
-
-
-                                <h3>Upload date</h3>
-                                <input type="radio" id="time_any" name="time" value="0">
-                                <label for="time_any">Any</label>
-
-                                <input type="radio" id="time_last_hour" name="time" value="1">
-                                <label for="time_last_hour">Last hour</label>
-
-                                <input type="radio" id="time_today" name="time" value="2">
-                                <label for="time_today">Today</label>
-
-                                <input type="radio" id="time_this_week" name="time" value="3">
-                                <label for="time_this_week">This week</label>
-
-                                <input type="radio" id="time_this_month" name="time" value="4">
-                                <label for="time_this_month">This month</label>
-
-                                <input type="radio" id="time_this_year" name="time" value="5">
-                                <label for="time_this_year">This year</label>
-
-                                <h3>Type</h3>
-                                <input type="radio" id="type_any" name="type" value="0">
-                                <label for="type_any">Any</label>
-
-                                <input type="radio" id="type_video" name="type" value="1">
-                                <label for="type_video">Video</label>
-
-                                <input type="radio" id="type_channel" name="type" value="2">
-                                <label for="type_channel">Channel</label>
-
-                                <input type="radio" id="type_playlist" name="type" value="3">
-                                <label for="type_playlist">Playlist</label>
-
-                                <input type="radio" id="type_movie" name="type" value="4">
-                                <label for="type_movie">Movie</label>
-
-                                <input type="radio" id="type_show" name="type" value="5">
-                                <label for="type_show">Show</label>
-
-
-                                <h3>Duration</h3>
-                                <input type="radio" id="duration_any" name="duration" value="0">
-                                <label for="duration_any">Any</label>
-
-                                <input type="radio" id="duration_short" name="duration" value="1">
-                                <label for="duration_short">Short (< 4 minutes)</label>
-
-                                <input type="radio" id="duration_long" name="duration" value="2">
-                                <label for="duration_long">Long (> 20 minutes)</label>
-
-                            </div>
-                        </div>
-                    </div>
-                </form>
-
-            <div id="header-right">
-                <form id="playlist-edit" action="/youtube.com/edit_playlist" method="post" target="_self">
-                    <input name="playlist_name" id="playlist-name-selection" list="playlist-options" type="text">
-                    <datalist id="playlist-options">
-$playlists
-                    </datalist>
-                    <button type="submit" id="playlist-add-button" name="action" value="add">Add to playlist</button>
-                    <button type="reset" id="item-selection-reset">Clear selection</button>
-                </form>
-                <a href="/youtube.com/playlists" id="local-playlists">Local playlists</a>
-            </div>
-        </header>
-''')
-playlist_option_template = Template('''<option value="$name">$name</option>''')
-def get_header(search_box_value=""):
-    playlists = ''
-    for name in local_playlist.get_playlist_names():
-        playlists += playlist_option_template.substitute(name = name)
-    return header_template.substitute(playlists = playlists, search_box_value = html.escape(search_box_value))
-
-
-
-def get_url(node):
-    try:
-        return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
-    except KeyError:
-        return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
-
-
-def get_text(node):
-    try:
-        return node['simpleText']
-    except KeyError:
-            pass
-    try:
-        return node['runs'][0]['text']
-    except IndexError: # empty text runs
-        return ''
-
-def get_formatted_text(node):
-    try:
-        return node['runs']
-    except KeyError:
-        return node['simpleText']
-
-def get_badges(node):
-    badges = []
-    for badge_node in node:
-        badge = badge_node['metadataBadgeRenderer']['label']
-        if badge.lower() != 'new':
-            badges.append(badge)
-    return badges
-
-def get_thumbnail(node):
-    try:
-        return node['thumbnails'][0]['url']     # polymer format
-    except KeyError:
-        return node['url']     # ajax format
-
-dispatch = {
-
-# polymer format    
-    'title':                ('title',       get_text),
-    'publishedTimeText':    ('published',   get_text),
-    'videoId':              ('id',          lambda node: node),
-    'descriptionSnippet':   ('description', get_formatted_text),
-    'lengthText':           ('duration',    get_text),
-    'thumbnail':            ('thumbnail',   get_thumbnail),
-    'thumbnails':           ('thumbnail',   lambda node: node[0]['thumbnails'][0]['url']),
-
-    'viewCountText':        ('views',       get_text),
-    'numVideosText':        ('size',        lambda node: get_text(node).split(' ')[0]),     # the format is "324 videos"
-    'videoCountText':       ('size',        get_text),
-    'playlistId':           ('id',          lambda node: node),
-    'descriptionText':      ('description', get_formatted_text),
-
-    'subscriberCountText':  ('subscriber_count',    get_text),
-    'channelId':            ('id',          lambda node: node),
-    'badges':               ('badges',      get_badges),
-
-# ajax format
-    'view_count_text':  ('views',       get_text),
-    'num_videos_text':  ('size',        lambda node: get_text(node).split(' ')[0]),
-    'owner_text':       ('author',      get_text),
-    'owner_endpoint':   ('author_url',  lambda node: node['url']),
-    'description':      ('description', get_formatted_text),
-    'index':            ('playlist_index', get_text),
-    'short_byline':     ('author',      get_text),
-    'length':           ('duration',    get_text),
-    'video_id':         ('id',          lambda node: node),
-
-}
-
-def renderer_info(renderer):
-    try:
-        info = {}
-        if 'viewCountText' in renderer:     # prefer this one as it contains all the digits
-            info['views'] = get_text(renderer['viewCountText'])
-        elif 'shortViewCountText' in renderer:
-            info['views'] = get_text(renderer['shortViewCountText'])
-
-        if 'ownerText' in renderer:
-            info['author'] = renderer['ownerText']['runs'][0]['text']
-            info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
-        try:
-            overlays = renderer['thumbnailOverlays']
-        except KeyError:
-            pass
-        else:
-            for overlay in overlays:
-                if 'thumbnailOverlayTimeStatusRenderer' in overlay:
-                    info['duration'] = get_text(overlay['thumbnailOverlayTimeStatusRenderer']['text'])
-                # show renderers don't have videoCountText
-                elif 'thumbnailOverlayBottomPanelRenderer' in overlay:
-                    info['size'] = get_text(overlay['thumbnailOverlayBottomPanelRenderer']['text'])
-
-        # show renderers don't have playlistId, have to dig into the url to get it
-        try:
-            info['id'] = renderer['navigationEndpoint']['watchEndpoint']['playlistId']
-        except KeyError:
-            pass
-        for key, node in renderer.items():
-            if key in ('longBylineText', 'shortBylineText'):
-                info['author'] = get_text(node)
-                try:
-                    info['author_url'] = get_url(node)
-                except KeyError:
-                    pass
-
-            # show renderers don't have thumbnail key at top level, dig into thumbnailRenderer
-            elif key == 'thumbnailRenderer' and 'showCustomThumbnailRenderer' in node:
-                info['thumbnail'] = node['showCustomThumbnailRenderer']['thumbnail']['thumbnails'][0]['url']
-            else:
-                try:
-                    simple_key, function = dispatch[key]
-                except KeyError:
-                    continue
-                info[simple_key] = function(node)
-        return info
-    except KeyError:
-        print(renderer)
-        raise
-    
-def ajax_info(item_json):
-    try:
-        info = {}          
-        for key, node in item_json.items():
-            try:
-                simple_key, function = dispatch[key]
-            except KeyError:
-                continue
-            info[simple_key] = function(node)
-        return info
-    except KeyError:
-        print(item_json)
-        raise
-    
-def badges_html(badges):
-    return ' | '.join(map(html.escape, badges))
-
-
-
-
-
-html_transform_dispatch = {
-    'title':        html.escape,
-    'published':    html.escape,
-    'id':           html.escape,
-    'description':  format_text_runs,
-    'duration':     html.escape,
-    'thumbnail':    lambda url: html.escape('/' + url.lstrip('/')),
-    'size':         html.escape,
-    'author':       html.escape,
-    'author_url':   lambda url: html.escape(URL_ORIGIN + url),
-    'views':        html.escape,
-    'subscriber_count': html.escape,
-    'badges':       badges_html,
-    'playlist_index':   html.escape,
-}
-
-def get_html_ready(item):
-    html_ready = {}
-    for key, value in item.items():
-        try:
-            function = html_transform_dispatch[key]
-        except KeyError:
-            continue
-        html_ready[key] = function(value)
-    return html_ready
-
-
-author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''')
-author_template = Template('''<address><b>$author</b></address>''')
-stat_templates = (
-    Template('''<span class="views">$views</span>'''),
-    Template('''<time datetime="$datetime">$published</time>'''),
-)
-def get_stats(html_ready):
-    stats = []
-    if 'author' in html_ready:
-        if 'author_url' in html_ready:
-            stats.append(author_template_url.substitute(html_ready))
-        else:
-            stats.append(author_template.substitute(html_ready))
-    for stat in stat_templates:
-        try:
-            stats.append(stat.strict_substitute(html_ready))
-        except KeyError:
-            pass
-    return ' | '.join(stats)
-
-def video_item_html(item, template, html_exclude=set()):
-
-    video_info = {}
-    for key in ('id', 'title', 'author'):
-        try:
-            video_info[key] = item[key] 
-        except KeyError:
-            video_info[key] = ''
-    try:
-        video_info['duration'] = item['duration']
-    except KeyError:
-        video_info['duration'] = 'Live'     # livestreams don't have a duration
-
-    html_ready = get_html_ready(item)
-
-    html_ready['video_info'] = html.escape(json.dumps(video_info) )
-    html_ready['url'] = URL_ORIGIN + "/watch?v=" + html_ready['id']
-    html_ready['datetime'] = '' #TODO
-    
-    for key in html_exclude:
-        del html_ready[key]
-    html_ready['stats'] = get_stats(html_ready)
-
-    return template.substitute(html_ready)
-
-
-def playlist_item_html(item, template, html_exclude=set()):
-    html_ready = get_html_ready(item)
-
-    html_ready['url'] = URL_ORIGIN + "/playlist?list=" + html_ready['id']
-    html_ready['datetime'] = '' #TODO
-
-    for key in html_exclude:
-        del html_ready[key]
-    html_ready['stats'] = get_stats(html_ready)
-
-    return template.substitute(html_ready)
-
-
-
-
-
-
-def update_query_string(query_string, items):
-    parameters = urllib.parse.parse_qs(query_string)
-    parameters.update(items)
-    return urllib.parse.urlencode(parameters, doseq=True)
-
-page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
-current_page_button_template = Template('''<div class="page-button">$page</div>''')
-
-def page_buttons_html(current_page, estimated_pages, url, current_query_string):
-    if current_page <= 5:
-        page_start = 1
-        page_end = min(9, estimated_pages)
-    else:
-        page_start = current_page - 4
-        page_end = min(current_page + 4, estimated_pages)
-
-    result = ""
-    for page in range(page_start, page_end+1):
-        if page == current_page:
-            template = current_page_button_template
-        else:
-            template = page_button_template
-        result += template.substitute(page=page, href = url + "?" + update_query_string(current_query_string, {'page': [str(page)]}) )
-    return result
-
-
-
-
-
-
-
-showing_results_for = Template('''
-                <div class="showing-results-for">
-                    <div>Showing results for <a>$corrected_query</a></div>
-                    <div>Search instead for <a href="$original_query_url">$original_query</a></div>
-                </div>
-''')
-
-did_you_mean = Template('''
-                <div class="did-you-mean">
-                    <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
-                </div>
-''')
-    
-def renderer_html(renderer, additional_info={}, current_query_string=''):
-    type = list(renderer.keys())[0]
-    renderer = renderer[type]
-    if type == 'itemSectionRenderer':
-        return renderer_html(renderer['contents'][0], additional_info, current_query_string)
-
-    if type == 'channelRenderer':
-        info = renderer_info(renderer)
-        html_ready = get_html_ready(info)
-        html_ready['url'] = URL_ORIGIN + "/channel/" + html_ready['id']
-        return medium_channel_item_template.substitute(html_ready)
-    
-    if type in ('movieRenderer', 'clarificationRenderer'):
-        return ''
-
-    info = renderer_info(renderer)
-    info.update(additional_info)
-    html_exclude = set(additional_info.keys())
-    if type == 'compactVideoRenderer':
-        return video_item_html(info, small_video_item_template, html_exclude=html_exclude)
-    if type in ('compactPlaylistRenderer', 'compactRadioRenderer', 'compactShowRenderer'):
-        return playlist_item_html(info, small_playlist_item_template, html_exclude=html_exclude)
-    if type in ('videoRenderer', 'gridVideoRenderer'):
-        return video_item_html(info, medium_video_item_template, html_exclude=html_exclude)
-    if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer', 'gridShowRenderer', 'showRenderer'):
-        return playlist_item_html(info, medium_playlist_item_template, html_exclude=html_exclude)
-
-    #print(renderer)
-    #raise NotImplementedError('Unknown renderer type: ' + type)
-    return ''
diff --git a/youtube/html_common.py b/youtube/html_common.py
new file mode 100644
index 0000000..27a582b
--- /dev/null
+++ b/youtube/html_common.py
@@ -0,0 +1,429 @@
+from youtube.template import Template
+from youtube import local_playlist, yt_data_extract, util
+
+import json
+import html
+
+
+with open('yt_basic_template.html', 'r', encoding='utf-8') as file:
+    yt_basic_template = Template(file.read())
+
+
+
+
+page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
+current_page_button_template = Template('''<div class="current-page-button">$page</a>''')
+
+medium_playlist_item_template = Template('''
+                <div class="medium-item-box">
+                    <div class="medium-item">
+                        <a class="playlist-thumbnail-box" href="$url" title="$title">
+                            <img class="playlist-thumbnail-img" src="$thumbnail">
+                            <div class="playlist-thumbnail-info">
+                                <span>$size</span>
+                            </div>
+                        </a>
+
+                        <a class="title" href="$url" title="$title">$title</a>
+                        
+                        <div class="stats">$stats</div>
+                    </div>
+                </div>
+''')
+medium_video_item_template = Template('''
+                <div class="medium-item-box">
+                    <div class="medium-item">
+                        <a class="video-thumbnail-box" href="$url" title="$title">
+                            <img class="video-thumbnail-img" src="$thumbnail">
+                            <span class="video-duration">$duration</span>
+                        </a>
+
+                        <a class="title" href="$url" title="$title">$title</a>
+                        
+                        <div class="stats">$stats</div>
+
+                        <span class="description">$description</span>
+                        <span class="badges">$badges</span>
+                    </div>
+                    <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-edit">
+                </div>
+''')
+
+small_video_item_template = Template('''
+                <div class="small-item-box">
+                    <div class="small-item">
+                        <a class="video-thumbnail-box" href="$url" title="$title">
+                            <img class="video-thumbnail-img" src="$thumbnail">
+                            <span class="video-duration">$duration</span>
+                        </a>
+                        <a class="title" href="$url" title="$title">$title</a>
+                        
+                        <address>$author</address>
+                        <span class="views">$views</span>
+                        
+                    </div>
+                    <input class="item-checkbox" type="checkbox" name="video_info_list" value="$video_info" form="playlist-edit">
+                </div>
+''')
+
+small_playlist_item_template = Template('''
+                <div class="small-item-box">
+                    <div class="small-item">
+                        <a class="playlist-thumbnail-box" href="$url" title="$title">
+                            <img class="playlist-thumbnail-img" src="$thumbnail">
+                            <div class="playlist-thumbnail-info">
+                                <span>$size</span>
+                            </div>
+                        </a>
+                        <a class="title" href="$url" title="$title">$title</a>
+                        
+                        <address>$author</address>
+                    </div>
+                </div>
+''')
+
+medium_channel_item_template = Template('''
+                <div class="medium-item-box">
+                    <div class="medium-item">
+                        <a class="video-thumbnail-box" href="$url" title="$title">
+                            <img class="video-thumbnail-img" src="$thumbnail">
+                            <span class="video-duration">$duration</span>
+                        </a>
+
+                        <a class="title" href="$url">$title</a>
+                        
+                        <span>$subscriber_count</span>
+                        <span>$size</span>
+
+                        <span class="description">$description</span>
+                    </div>
+                </div>
+''')
+
+
+
+
+# -----
+# HTML
+# -----
+
+def small_video_item_html(item):
+    video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
+    return small_video_item_template.substitute(
+        title       = html.escape(item["title"]),
+        views       = item["views"],
+        author      = html.escape(item["author"]),
+        duration    = item["duration"],
+        url         = util.URL_ORIGIN + "/watch?v=" + item["id"],
+        thumbnail   = util.get_thumbnail_url(item['id']),
+        video_info  = html.escape(video_info),
+    )
+
+def small_playlist_item_html(item):
+    return small_playlist_item_template.substitute(
+        title=html.escape(item["title"]),
+        size = item['size'],
+        author="",
+        url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
+        thumbnail= util.get_thumbnail_url(item['first_video_id']),
+    )
+
+def medium_playlist_item_html(item):
+    return medium_playlist_item_template.substitute(
+        title=html.escape(item["title"]),
+        size = item['size'],
+        author=item['author'],
+        author_url= util.URL_ORIGIN + item['author_url'],
+        url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
+        thumbnail= item['thumbnail'],
+    )
+
+def medium_video_item_html(medium_video_info):
+    info = medium_video_info
+       
+    return medium_video_item_template.substitute(
+            title=html.escape(info["title"]),
+            views=info["views"],
+            published = info["published"],
+            description = yt_data_extract.format_text_runs(info["description"]),
+            author=html.escape(info["author"]),
+            author_url=info["author_url"],
+            duration=info["duration"],
+            url = util.URL_ORIGIN + "/watch?v=" + info["id"],
+            thumbnail=info['thumbnail'],
+            datetime='', # TODO
+        )
+
+
+header_template = Template('''
+        <header>
+
+                <form id="site-search" action="/youtube.com/search">
+                    <input type="search" name="query" class="search-box" value="$search_box_value">
+                    <button type="submit" value="Search" class="search-button">Search</button>
+                    <div class="dropdown">
+                        <button class="dropdown-label">Options</button>
+                        <div class="css-sucks">
+                            <div class="dropdown-content">
+                                <h3>Sort by</h3>
+                                <input type="radio" id="sort_relevance" name="sort" value="0">
+                                <label for="sort_relevance">Relevance</label>
+
+                                <input type="radio" id="sort_upload_date" name="sort" value="2">
+                                <label for="sort_upload_date">Upload date</label>
+
+                                <input type="radio" id="sort_view_count" name="sort" value="3">
+                                <label for="sort_view_count">View count</label>
+
+                                <input type="radio" id="sort_rating" name="sort" value="1">
+                                <label for="sort_rating">Rating</label>
+
+
+                                <h3>Upload date</h3>
+                                <input type="radio" id="time_any" name="time" value="0">
+                                <label for="time_any">Any</label>
+
+                                <input type="radio" id="time_last_hour" name="time" value="1">
+                                <label for="time_last_hour">Last hour</label>
+
+                                <input type="radio" id="time_today" name="time" value="2">
+                                <label for="time_today">Today</label>
+
+                                <input type="radio" id="time_this_week" name="time" value="3">
+                                <label for="time_this_week">This week</label>
+
+                                <input type="radio" id="time_this_month" name="time" value="4">
+                                <label for="time_this_month">This month</label>
+
+                                <input type="radio" id="time_this_year" name="time" value="5">
+                                <label for="time_this_year">This year</label>
+
+                                <h3>Type</h3>
+                                <input type="radio" id="type_any" name="type" value="0">
+                                <label for="type_any">Any</label>
+
+                                <input type="radio" id="type_video" name="type" value="1">
+                                <label for="type_video">Video</label>
+
+                                <input type="radio" id="type_channel" name="type" value="2">
+                                <label for="type_channel">Channel</label>
+
+                                <input type="radio" id="type_playlist" name="type" value="3">
+                                <label for="type_playlist">Playlist</label>
+
+                                <input type="radio" id="type_movie" name="type" value="4">
+                                <label for="type_movie">Movie</label>
+
+                                <input type="radio" id="type_show" name="type" value="5">
+                                <label for="type_show">Show</label>
+
+
+                                <h3>Duration</h3>
+                                <input type="radio" id="duration_any" name="duration" value="0">
+                                <label for="duration_any">Any</label>
+
+                                <input type="radio" id="duration_short" name="duration" value="1">
+                                <label for="duration_short">Short (< 4 minutes)</label>
+
+                                <input type="radio" id="duration_long" name="duration" value="2">
+                                <label for="duration_long">Long (> 20 minutes)</label>
+
+                            </div>
+                        </div>
+                    </div>
+                </form>
+
+            <div id="header-right">
+                <form id="playlist-edit" action="/youtube.com/edit_playlist" method="post" target="_self">
+                    <input name="playlist_name" id="playlist-name-selection" list="playlist-options" type="text">
+                    <datalist id="playlist-options">
+$playlists
+                    </datalist>
+                    <button type="submit" id="playlist-add-button" name="action" value="add">Add to playlist</button>
+                    <button type="reset" id="item-selection-reset">Clear selection</button>
+                </form>
+                <a href="/youtube.com/playlists" id="local-playlists">Local playlists</a>
+            </div>
+        </header>
+''')
+playlist_option_template = Template('''<option value="$name">$name</option>''')
+def get_header(search_box_value=""):
+    playlists = ''
+    for name in local_playlist.get_playlist_names():
+        playlists += playlist_option_template.substitute(name = name)
+    return header_template.substitute(playlists = playlists, search_box_value = html.escape(search_box_value))
+
+
+
+
+
+
+
+
+
+
+
+def badges_html(badges):
+    return ' | '.join(map(html.escape, badges))
+
+
+html_transform_dispatch = {
+    'title':        html.escape,
+    'published':    html.escape,
+    'id':           html.escape,
+    'description':  yt_data_extract.format_text_runs,
+    'duration':     html.escape,
+    'thumbnail':    lambda url: html.escape('/' + url.lstrip('/')),
+    'size':         html.escape,
+    'author':       html.escape,
+    'author_url':   lambda url: html.escape(util.URL_ORIGIN + url),
+    'views':        html.escape,
+    'subscriber_count': html.escape,
+    'badges':       badges_html,
+    'playlist_index':   html.escape,
+}
+
+def get_html_ready(item):
+    html_ready = {}
+    for key, value in item.items():
+        try:
+            function = html_transform_dispatch[key]
+        except KeyError:
+            continue
+        html_ready[key] = function(value)
+    return html_ready
+
+
+author_template_url = Template('''<address>By <a href="$author_url">$author</a></address>''')
+author_template = Template('''<address><b>$author</b></address>''')
+stat_templates = (
+    Template('''<span class="views">$views</span>'''),
+    Template('''<time datetime="$datetime">$published</time>'''),
+)
+def get_stats(html_ready):
+    stats = []
+    if 'author' in html_ready:
+        if 'author_url' in html_ready:
+            stats.append(author_template_url.substitute(html_ready))
+        else:
+            stats.append(author_template.substitute(html_ready))
+    for stat in stat_templates:
+        try:
+            stats.append(stat.strict_substitute(html_ready))
+        except KeyError:
+            pass
+    return ' | '.join(stats)
+
+def video_item_html(item, template, html_exclude=set()):
+
+    video_info = {}
+    for key in ('id', 'title', 'author'):
+        try:
+            video_info[key] = item[key] 
+        except KeyError:
+            video_info[key] = ''
+    try:
+        video_info['duration'] = item['duration']
+    except KeyError:
+        video_info['duration'] = 'Live'     # livestreams don't have a duration
+
+    html_ready = get_html_ready(item)
+
+    html_ready['video_info'] = html.escape(json.dumps(video_info) )
+    html_ready['url'] = util.URL_ORIGIN + "/watch?v=" + html_ready['id']
+    html_ready['datetime'] = '' #TODO
+    
+    for key in html_exclude:
+        del html_ready[key]
+    html_ready['stats'] = get_stats(html_ready)
+
+    return template.substitute(html_ready)
+
+
+def playlist_item_html(item, template, html_exclude=set()):
+    html_ready = get_html_ready(item)
+
+    html_ready['url'] = util.URL_ORIGIN + "/playlist?list=" + html_ready['id']
+    html_ready['datetime'] = '' #TODO
+
+    for key in html_exclude:
+        del html_ready[key]
+    html_ready['stats'] = get_stats(html_ready)
+
+    return template.substitute(html_ready)
+
+
+
+
+
+
+
+page_button_template = Template('''<a class="page-button" href="$href">$page</a>''')
+current_page_button_template = Template('''<div class="page-button">$page</div>''')
+
+def page_buttons_html(current_page, estimated_pages, url, current_query_string):
+    if current_page <= 5:
+        page_start = 1
+        page_end = min(9, estimated_pages)
+    else:
+        page_start = current_page - 4
+        page_end = min(current_page + 4, estimated_pages)
+
+    result = ""
+    for page in range(page_start, page_end+1):
+        if page == current_page:
+            template = current_page_button_template
+        else:
+            template = page_button_template
+        result += template.substitute(page=page, href = url + "?" + util.update_query_string(current_query_string, {'page': [str(page)]}) )
+    return result
+
+
+
+
+
+
+
+showing_results_for = Template('''
+                <div class="showing-results-for">
+                    <div>Showing results for <a>$corrected_query</a></div>
+                    <div>Search instead for <a href="$original_query_url">$original_query</a></div>
+                </div>
+''')
+
+did_you_mean = Template('''
+                <div class="did-you-mean">
+                    <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div>
+                </div>
+''')
+    
+def renderer_html(renderer, additional_info={}, current_query_string=''):
+    type = list(renderer.keys())[0]
+    renderer = renderer[type]
+    if type == 'itemSectionRenderer':
+        return renderer_html(renderer['contents'][0], additional_info, current_query_string)
+
+    if type == 'channelRenderer':
+        info = yt_data_extract.renderer_info(renderer)
+        html_ready = get_html_ready(info)
+        html_ready['url'] = util.URL_ORIGIN + "/channel/" + html_ready['id']
+        return medium_channel_item_template.substitute(html_ready)
+    
+    if type in ('movieRenderer', 'clarificationRenderer'):
+        return ''
+
+    info = yt_data_extract.renderer_info(renderer)
+    info.update(additional_info)
+    html_exclude = set(additional_info.keys())
+    if type == 'compactVideoRenderer':
+        return video_item_html(info, small_video_item_template, html_exclude=html_exclude)
+    if type in ('compactPlaylistRenderer', 'compactRadioRenderer', 'compactShowRenderer'):
+        return playlist_item_html(info, small_playlist_item_template, html_exclude=html_exclude)
+    if type in ('videoRenderer', 'gridVideoRenderer'):
+        return video_item_html(info, medium_video_item_template, html_exclude=html_exclude)
+    if type in ('playlistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'gridRadioRenderer', 'gridShowRenderer', 'showRenderer'):
+        return playlist_item_html(info, medium_playlist_item_template, html_exclude=html_exclude)
+
+    #print(renderer)
+    #raise NotImplementedError('Unknown renderer type: ' + type)
+    return ''
\ No newline at end of file
diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py
index 0375040..e354013 100644
--- a/youtube/local_playlist.py
+++ b/youtube/local_playlist.py
@@ -1,11 +1,12 @@
+from youtube.template import Template
+from youtube import util, html_common
+import settings
+
 import os
 import json
-from youtube.template import Template
-from youtube import common
 import html
 import gevent
 import urllib
-import settings
 
 playlists_directory = os.path.join(settings.data_dir, "playlists")
 thumbnails_directory = os.path.join(settings.data_dir, "playlist_thumbnails")
@@ -38,7 +39,7 @@ def download_thumbnail(playlist_name, video_id):
     url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
     save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
     try:
-        thumbnail = common.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
+        thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
     except urllib.error.HTTPError as e:
         print("Failed to download thumbnail for " + video_id + ": " + str(e))
         return
@@ -78,15 +79,15 @@ def get_local_playlist_page(name):
             if info['id'] + ".jpg" in thumbnails:
                 info['thumbnail'] = "/youtube.com/data/playlist_thumbnails/" + name + "/" + info['id'] + ".jpg"
             else:
-                info['thumbnail'] = common.get_thumbnail_url(info['id'])
+                info['thumbnail'] = util.get_thumbnail_url(info['id'])
                 missing_thumbnails.append(info['id'])
-            videos_html += common.video_item_html(info, common.small_video_item_template)
+            videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
         except json.decoder.JSONDecodeError:
             pass
     gevent.spawn(download_thumbnails, name, missing_thumbnails)
     return local_playlist_template.substitute(
         page_title = name + ' - Local playlist',
-        header = common.get_header(),
+        header = html_common.get_header(),
         videos = videos_html,
         title = name,
         page_buttons = ''
@@ -127,11 +128,11 @@ def get_playlists_list_page():
     page = '''<ul>\n'''
     list_item_template = Template('''    <li><a href="$url">$name</a></li>\n''')
     for name in get_playlist_names():
-        page += list_item_template.substitute(url = html.escape(common.URL_ORIGIN + '/playlists/' + name), name = html.escape(name))
+        page += list_item_template.substitute(url = html.escape(util.URL_ORIGIN + '/playlists/' + name), name = html.escape(name))
     page += '''</ul>\n'''
-    return common.yt_basic_template.substitute(
+    return html_common.yt_basic_template.substitute(
         page_title = "Local playlists",
-        header = common.get_header(),
+        header = html_common.get_header(),
         style = '',
         page = page,
     )
@@ -151,7 +152,7 @@ def path_edit_playlist(env, start_response):
     if parameters['action'][0] == 'remove':
         playlist_name = env['path_parts'][1]
         remove_from_playlist(playlist_name, parameters['video_info_list'])
-        start_response('303 See Other', [('Location', common.URL_ORIGIN + env['PATH_INFO']),] )
+        start_response('303 See Other', [('Location', util.URL_ORIGIN + env['PATH_INFO']),] )
         return b''
 
     else:
diff --git a/youtube/playlist.py b/youtube/playlist.py
index 419ed00..fbe6448 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -1,10 +1,9 @@
+from youtube import util, yt_data_extract, html_common, template, proto
+
 import base64
-import youtube.common as common
 import urllib
 import json
 import string
-from youtube import template
-import youtube.proto as proto
 import gevent
 import math
 
@@ -49,10 +48,10 @@ headers_1 = (
 
 def playlist_first_page(playlist_id, report_text = "Retrieved playlist"):
     url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
-    content = common.fetch_url(url, common.mobile_ua + headers_1, report_text=report_text)
+    content = util.fetch_url(url, util.mobile_ua + headers_1, report_text=report_text)
     '''with open('debug/playlist_debug', 'wb') as f:
         f.write(content)'''
-    content = json.loads(common.uppercase_escape(content.decode('utf-8')))
+    content = json.loads(util.uppercase_escape(content.decode('utf-8')))
 
     return content
     
@@ -69,11 +68,11 @@ def get_videos(playlist_id, page):
         'X-YouTube-Client-Version': '2.20180508',
     }
 
-    content = common.fetch_url(url, headers, report_text="Retrieved playlist")
+    content = util.fetch_url(url, headers, report_text="Retrieved playlist")
     '''with open('debug/playlist_debug', 'wb') as f:
         f.write(content)'''
 
-    info = json.loads(common.uppercase_escape(content.decode('utf-8')))
+    info = json.loads(util.uppercase_escape(content.decode('utf-8')))
     return info
 
 
@@ -101,22 +100,22 @@ def get_playlist_page(env, start_response):
         video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents']
     videos_html = ''
     for video_json in video_list:
-        info = common.renderer_info(video_json['playlistVideoRenderer'])
-        videos_html += common.video_item_html(info, common.small_video_item_template)
+        info = yt_data_extract.renderer_info(video_json['playlistVideoRenderer'])
+        videos_html += html_common.video_item_html(info, html_common.small_video_item_template)
 
 
-    metadata = common.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
+    metadata = yt_data_extract.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer'])
     video_count = int(metadata['size'].replace(',', ''))
-    page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", env['QUERY_STRING'])
+    page_buttons = html_common.page_buttons_html(int(page), math.ceil(video_count/20), util.URL_ORIGIN + "/playlist", env['QUERY_STRING'])
 
-    html_ready = common.get_html_ready(metadata)
+    html_ready = html_common.get_html_ready(metadata)
     html_ready['page_title'] = html_ready['title'] + ' - Page ' + str(page)
 
     stats = ''
     stats += playlist_stat_template.substitute(stat=html_ready['size'] + ' videos')
     stats += playlist_stat_template.substitute(stat=html_ready['views'])
     return yt_playlist_template.substitute(
-        header          = common.get_header(),
+        header          = html_common.get_header(),
         videos          = videos_html,
         page_buttons    = page_buttons,
         stats = stats,
diff --git a/youtube/post_comment.py b/youtube/post_comment.py
index 92c45e1..876a1c0 100644
--- a/youtube/post_comment.py
+++ b/youtube/post_comment.py
@@ -1,11 +1,11 @@
 # Contains functions having to do with posting/editing/deleting comments
+from youtube import util, html_common, proto, comments, accounts
+import settings
 
 import urllib
 import json
-from youtube import common, proto, comments, accounts
 import re
 import traceback
-import settings
 import os
 
 def _post_comment(text, video_id, session_token, cookiejar):
@@ -31,7 +31,7 @@ def _post_comment(text, video_id, session_token, cookiejar):
     data = urllib.parse.urlencode(data_dict).encode()
 
 
-    content = common.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
+    content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
 
     code = json.loads(content)['code']
     print("Comment posting code: " + code)
@@ -62,7 +62,7 @@ def _post_comment_reply(text, video_id, parent_comment_id, session_token, cookie
     }
     data = urllib.parse.urlencode(data_dict).encode()
 
-    content = common.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
+    content = util.fetch_url("https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
 
     code = json.loads(content)['code']
     print("Comment posting code: " + code)
@@ -90,7 +90,7 @@ def _delete_comment(video_id, comment_id, author_id, session_token, cookiejar):
     }
     data = urllib.parse.urlencode(data_dict).encode()
 
-    content = common.fetch_url("https://m.youtube.com/service_ajax?name=performCommentActionEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
+    content = util.fetch_url("https://m.youtube.com/service_ajax?name=performCommentActionEndpoint", headers=headers, data=data, cookiejar_send=cookiejar)
     code = json.loads(content)['code']
     print("Comment deletion code: " + code)
     return code
@@ -101,8 +101,8 @@ def get_session_token(video_id, cookiejar):
     # youtube-dl uses disable_polymer=1 which uses a different request format which has an obfuscated javascript algorithm to generate a parameter called "bgr"
     # Tokens retrieved from disable_polymer pages only work with that format. Tokens retrieved on mobile only work using mobile requests
     # Additionally, tokens retrieved without sending the same cookie won't work. So this is necessary even if the bgr and stuff was reverse engineered.
-    headers = {'User-Agent': common.mobile_user_agent}
-    mobile_page = common.fetch_url('https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode()
+    headers = {'User-Agent': util.mobile_user_agent}
+    mobile_page = util.fetch_url('https://m.youtube.com/watch?v=' + video_id, headers, report_text="Retrieved session token for comment", cookiejar_send=cookiejar, cookiejar_receive=cookiejar).decode()
     match = xsrf_token_regex.search(mobile_page)
     if match:
         return match.group(1).replace("%3D", "=")
@@ -118,9 +118,9 @@ def delete_comment(env, start_response):
     code = _delete_comment(video_id, parameters['comment_id'][0], parameters['author_id'][0], token, cookiejar)
 
     if code == "SUCCESS":
-        start_response('303 See Other',  [('Location', common.URL_ORIGIN + '/comment_delete_success'),] )
+        start_response('303 See Other',  [('Location', util.URL_ORIGIN + '/comment_delete_success'),] )
     else:
-        start_response('303 See Other',  [('Location', common.URL_ORIGIN + '/comment_delete_fail'),] )
+        start_response('303 See Other',  [('Location', util.URL_ORIGIN + '/comment_delete_fail'),] )
 
 def post_comment(env, start_response):
     parameters = env['parameters']
@@ -131,11 +131,11 @@ def post_comment(env, start_response):
 
     if 'parent_id' in parameters:
         code = _post_comment_reply(parameters['comment_text'][0], parameters['video_id'][0], parameters['parent_id'][0], token, cookiejar)
-        start_response('303 See Other',  (('Location', common.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) )
+        start_response('303 See Other',  (('Location', util.URL_ORIGIN + '/comments?' + env['QUERY_STRING']),) )
 
     else:
         code = _post_comment(parameters['comment_text'][0], parameters['video_id'][0], token, cookiejar)
-        start_response('303 See Other',  (('Location', common.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) )
+        start_response('303 See Other',  (('Location', util.URL_ORIGIN + '/comments?ctoken=' + comments.make_comment_ctoken(video_id, sort=1)),) )
 
     return b''
 
@@ -163,10 +163,10 @@ def get_delete_comment_page(env, start_response):
     page += '''
         <input type="submit" value="Yes, delete it">
     </form>'''
-    return common.yt_basic_template.substitute(
+    return html_common.yt_basic_template.substitute(
         page_title = "Delete comment?",
         style = style,
-        header = common.get_header(),
+        header = html_common.get_header(),
         page = page,
     ).encode('utf-8')
 
@@ -174,7 +174,7 @@ def get_post_comment_page(env, start_response):
     start_response('200 OK', [('Content-type','text/html'),])
     parameters = env['parameters']
     video_id = parameters['video_id'][0]
-    parent_id = common.default_multi_get(parameters, 'parent_id', 0, default='')
+    parent_id = util.default_multi_get(parameters, 'parent_id', 0, default='')
     
     style = ''' main{
     display: grid;
@@ -194,23 +194,23 @@ textarea{
 }'''
     if parent_id:   # comment reply
         comment_box = comments.comment_box_template.substitute(
-            form_action = common.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id,
+            form_action = util.URL_ORIGIN + '/comments?parent_id=' + parent_id + "&video_id=" + video_id,
             video_id_input = '',
             post_text = "Post reply",
             options=comments.comment_box_account_options(),
         )
     else:
         comment_box = comments.comment_box_template.substitute(
-            form_action = common.URL_ORIGIN + '/post_comment',
+            form_action = util.URL_ORIGIN + '/post_comment',
             video_id_input = '''<input type="hidden" name="video_id" value="''' + video_id + '''">''',
             post_text = "Post comment",
             options=comments.comment_box_account_options(),
         )
         
     page = '''<div class="left">\n''' + comment_box + '''</div>\n'''
-    return common.yt_basic_template.substitute(
+    return html_common.yt_basic_template.substitute(
         page_title = "Post comment reply" if parent_id else "Post a comment",
         style = style,
-        header = common.get_header(),
+        header = html_common.get_header(),
         page = page,
     ).encode('utf-8')
diff --git a/youtube/search.py b/youtube/search.py
index db65eaa..0cef0f3 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -1,11 +1,12 @@
+from youtube import util, html_common, yt_data_extract, proto
+
 import json
 import urllib
 import html
 from string import Template
 import base64
 from math import ceil
-from youtube.common import default_multi_get, get_thumbnail_url, URL_ORIGIN
-from youtube import common, proto
+
 
 with open("yt_search_results_template.html", "r") as file:
     yt_search_results_template = file.read()
@@ -54,7 +55,7 @@ def get_search_json(query, page, autocorrect, sort, filters):
         'X-YouTube-Client-Version': '2.20180418',
     }
     url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
-    content = common.fetch_url(url, headers=headers, report_text="Got search results")
+    content = util.fetch_url(url, headers=headers, report_text="Got search results")
     info = json.loads(content)
     return info
     
@@ -70,9 +71,9 @@ def get_search_page(env, start_response):
     start_response('200 OK', [('Content-type','text/html'),])
     parameters = env['parameters']
     if len(parameters) == 0:
-        return common.yt_basic_template.substitute(
+        return html_common.yt_basic_template.substitute(
             page_title = "Search",
-            header = common.get_header(),
+            header = html_common.get_header(),
             style = '',
             page = '',
         ).encode('utf-8')
@@ -100,24 +101,24 @@ def get_search_page(env, start_response):
             renderer = renderer[type]
             corrected_query_string = parameters.copy()
             corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
-            corrected_query_url = URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
+            corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
             corrections = did_you_mean.substitute(
                 corrected_query_url = corrected_query_url,
-                corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
+                corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
             )
             continue
         if type == 'showingResultsForRenderer':
             renderer = renderer[type]
             no_autocorrect_query_string = parameters.copy()
             no_autocorrect_query_string['autocorrect'] = ['0']
-            no_autocorrect_query_url = URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
+            no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
             corrections = showing_results_for.substitute(
-                corrected_query = common.format_text_runs(renderer['correctedQuery']['runs']),
+                corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
                 original_query_url = no_autocorrect_query_url,
                 original_query = html.escape(renderer['originalQuery']['simpleText']),
             )
             continue
-        result_list_html += common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
+        result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING'])
         
     page = int(page)
     if page <= 5:
@@ -129,13 +130,13 @@ def get_search_page(env, start_response):
         
     
     result = Template(yt_search_results_template).substitute(
-        header              = common.get_header(query),
+        header              = html_common.get_header(query),
         results             = result_list_html, 
         page_title          = query + " - Search", 
         search_box_value    = html.escape(query),
         number_of_results   = '{:,}'.format(estimated_results),
         number_of_pages     = '{:,}'.format(estimated_pages),
-        page_buttons        = common.page_buttons_html(page, estimated_pages, URL_ORIGIN + "/search", env['QUERY_STRING']),
+        page_buttons        = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']),
         corrections         = corrections
         )
     return result.encode('utf-8')
diff --git a/youtube/util.py b/youtube/util.py
new file mode 100644
index 0000000..2ebd8bc
--- /dev/null
+++ b/youtube/util.py
@@ -0,0 +1,153 @@
+import socks, sockshandler
+import gzip
+import brotli
+import urllib.parse
+import re
+import time
+import settings
+
+
+URL_ORIGIN = "/https://www.youtube.com"
+
+
+class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
+    '''Separate cookiejars for receiving and sending'''
+    def __init__(self, cookiejar_send=None, cookiejar_receive=None):
+        import http.cookiejar
+        self.cookiejar_send = cookiejar_send
+        self.cookiejar_receive = cookiejar_receive
+
+    def http_request(self, request):
+        if self.cookiejar_send is not None:
+            self.cookiejar_send.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        if self.cookiejar_receive is not None:
+            self.cookiejar_receive.extract_cookies(response, request)
+        return response
+
+    https_request = http_request
+    https_response = http_response
+
+
+def decode_content(content, encoding_header):
+    encodings = encoding_header.replace(' ', '').split(',')
+    for encoding in reversed(encodings):
+        if encoding == 'identity':
+            continue
+        if encoding == 'br':
+            content = brotli.decompress(content)
+        elif encoding == 'gzip':
+            content = gzip.decompress(content)
+    return content
+
+def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
+    '''
+    When cookiejar_send is set to a CookieJar object,
+     those cookies will be sent in the request (but cookies in response will not be merged into it)
+    When cookiejar_receive is set to a CookieJar object,
+     cookies received in the response will be merged into the object (nothing will be sent from it)
+    When both are set to the same object, cookies will be sent from the object,
+     and response cookies will be merged into it.
+    '''
+    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
+    headers['Accept-Encoding'] = 'gzip, br'
+
+    # prevent python version being leaked by urllib if User-Agent isn't provided
+    #  (urllib will use ex. Python-urllib/3.6 otherwise)
+    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
+        headers['User-Agent'] = 'Python-urllib'
+
+    if data is not None:
+        if isinstance(data, str):
+            data = data.encode('ascii')
+        elif not isinstance(data, bytes):
+            data = urllib.parse.urlencode(data).encode('ascii')
+
+    start_time = time.time()
+
+
+    req = urllib.request.Request(url, data=data, headers=headers)
+
+    cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
+
+    if use_tor and settings.route_tor:
+        opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
+    else:
+        opener = urllib.request.build_opener(cookie_processor)
+
+    response = opener.open(req, timeout=timeout)
+    response_time = time.time()
+
+
+    content = response.read()
+    read_finish = time.time()
+    if report_text:
+        print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
+    content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
+    return content
+
+mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
+mobile_ua = (('User-Agent', mobile_user_agent),)
+desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
+desktop_ua = (('User-Agent', desktop_user_agent),)
+
+
+
+
+
+
+
+
+
+
+def dict_add(*dicts):
+    for dictionary in dicts[1:]:
+        dicts[0].update(dictionary)
+    return dicts[0]
+
+def video_id(url):
+    url_parts = urllib.parse.urlparse(url)
+    return urllib.parse.parse_qs(url_parts.query)['v'][0]
+
+def default_multi_get(object, *keys, default):
+    ''' Like dict.get(), but for nested dictionaries/sequences, supporting keys or indices. Last argument is the default value to use in case of any IndexErrors or KeyErrors '''
+    try:
+        for key in keys:
+            object = object[key]
+        return object
+    except (IndexError, KeyError):
+        return default
+
+
+# default, sddefault, mqdefault, hqdefault, hq720
+def get_thumbnail_url(video_id):
+    return "/i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
+    
+def seconds_to_timestamp(seconds):
+    seconds = int(seconds)
+    hours, seconds = divmod(seconds,3600)
+    minutes, seconds = divmod(seconds,60)
+    if hours != 0:
+        timestamp = str(hours) + ":"
+        timestamp += str(minutes).zfill(2)  # zfill pads with zeros
+    else:
+        timestamp = str(minutes)
+
+    timestamp += ":" + str(seconds).zfill(2)
+    return timestamp
+
+
+
+def update_query_string(query_string, items):
+    parameters = urllib.parse.parse_qs(query_string)
+    parameters.update(items)
+    return urllib.parse.urlencode(parameters, doseq=True)
+
+
+
+def uppercase_escape(s):
+     return re.sub(
+         r'\\U([0-9a-fA-F]{8})',
+         lambda m: chr(int(m.group(1), base=16)), s)
\ No newline at end of file
diff --git a/youtube/watch.py b/youtube/watch.py
index 04a5b5d..e8c10a2 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -1,12 +1,12 @@
+from youtube import util, html_common, comments
+
 from youtube_dl.YoutubeDL import YoutubeDL
 from youtube_dl.extractor.youtube import YoutubeError
 import json
 import urllib
 from string import Template
 import html
-import youtube.common as common
-from youtube.common import default_multi_get, get_thumbnail_url, video_id, URL_ORIGIN
-import youtube.comments as comments
+
 import gevent
 import settings
 import os
@@ -127,9 +127,9 @@ def get_related_items_html(info):
     result = ""
     for item in info['related_vids']:
         if 'list' in item:  # playlist:
-            result += common.small_playlist_item_html(watch_page_related_playlist_info(item))
+            result += html_common.small_playlist_item_html(watch_page_related_playlist_info(item))
         else:
-            result += common.small_video_item_html(watch_page_related_video_info(item))
+            result += html_common.small_video_item_html(watch_page_related_video_info(item))
     return result
 
     
@@ -137,7 +137,7 @@ def get_related_items_html(info):
 # converts these to standard names
 def watch_page_related_video_info(item):
     result = {key: item[key] for key in ('id', 'title', 'author')}
-    result['duration'] = common.seconds_to_timestamp(item['length_seconds'])
+    result['duration'] = util.seconds_to_timestamp(item['length_seconds'])
     try:
         result['views'] = item['short_view_count_text']
     except KeyError:
@@ -155,9 +155,9 @@ def watch_page_related_playlist_info(item):
     
 def sort_formats(info):
     sorted_formats = info['formats'].copy()
-    sorted_formats.sort(key=lambda x: default_multi_get(_formats, x['format_id'], 'height', default=0))
+    sorted_formats.sort(key=lambda x: util.default_multi_get(_formats, x['format_id'], 'height', default=0))
     for index, format in enumerate(sorted_formats):
-        if default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
+        if util.default_multi_get(_formats, format['format_id'], 'height', default=0) >= 360:
             break
     sorted_formats = sorted_formats[index:] + sorted_formats[0:index]
     sorted_formats = [format for format in info['formats'] if format['acodec'] != 'none' and format['vcodec'] != 'none']
@@ -236,7 +236,7 @@ def get_watch_page(env, start_response):
 
         start_response('200 OK', [('Content-type','text/html'),])
 
-        lc = common.default_multi_get(env['parameters'], 'lc', 0, default='')
+        lc = util.default_multi_get(env['parameters'], 'lc', 0, default='')
         if settings.route_tor:
             proxy = 'socks5://127.0.0.1:9150/'
         else:
@@ -256,17 +256,17 @@ def get_watch_page(env, start_response):
         #chosen_format = choose_format(info)
 
         if isinstance(info, str): # youtube error
-            return common.yt_basic_template.substitute(
+            return html_common.yt_basic_template.substitute(
                 page_title = "Error",
                 style = "",
-                header = common.get_header(),
+                header = html_common.get_header(),
                 page = html.escape(info),
             ).encode('utf-8')
             
         sorted_formats = sort_formats(info)
         
         video_info = {
-            "duration": common.seconds_to_timestamp(info["duration"]),
+            "duration": util.seconds_to_timestamp(info["duration"]),
             "id":       info['id'],
             "title":    info['title'],
             "author":   info['uploader'],
@@ -338,7 +338,7 @@ def get_watch_page(env, start_response):
         page = yt_watch_template.substitute(
             video_title             = html.escape(info["title"]),
             page_title              = html.escape(info["title"]),
-            header                  = common.get_header(),
+            header                  = html_common.get_header(),
             uploader                = html.escape(info["uploader"]),
             uploader_channel_url    = '/' + info["uploader_url"],
             upload_date             = upload_date,
diff --git a/youtube/youtube.py b/youtube/youtube.py
index b6b12fb..a6a216e 100644
--- a/youtube/youtube.py
+++ b/youtube/youtube.py
@@ -1,7 +1,7 @@
 import mimetypes
 import urllib.parse
 import os
-from youtube import local_playlist, watch, search, playlist, channel, comments, common, post_comment, accounts
+from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util
 import settings
 YOUTUBE_FILES = (
     "/shared.css",
@@ -64,7 +64,7 @@ def youtube(env, start_response):
 
         elif path.startswith("/api/"):
             start_response('200 OK',  [('Content-type', 'text/vtt'),] )
-            result = common.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
+            result = util.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
             result = result.replace(b"align:start position:0%", b"")
             return result
 
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py
new file mode 100644
index 0000000..5483911
--- /dev/null
+++ b/youtube/yt_data_extract.py
@@ -0,0 +1,205 @@
+import html
+
+# videos (all of type str):
+
+# id
+# title
+# url
+# author
+# author_url
+# thumbnail
+# description
+# published
+# duration
+# likes
+# dislikes
+# views
+# playlist_index
+
+# playlists:
+
+# id
+# title
+# url
+# author
+# author_url
+# thumbnail
+# description
+# updated
+# size
+# first_video_id
+
+
+
+
+
+
+
+def get_plain_text(node):
+    try:
+        return html.escape(node['simpleText'])
+    except KeyError:
+        return unformmated_text_runs(node['runs'])
+        
+def unformmated_text_runs(runs):
+    result = ''
+    for text_run in runs:
+        result += html.escape(text_run["text"])
+    return result
+
+def format_text_runs(runs):
+    if isinstance(runs, str):
+        return runs
+    result = ''
+    for text_run in runs:
+        if text_run.get("bold", False):
+            result += "<b>" + html.escape(text_run["text"]) + "</b>"
+        elif text_run.get('italics', False):
+            result += "<i>" + html.escape(text_run["text"]) + "</i>"
+        else:
+            result += html.escape(text_run["text"])
+    return result
+
+
+
+
+
+
+
+
+def get_url(node):
+    try:
+        return node['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+    except KeyError:
+        return node['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+
+
+def get_text(node):
+    try:
+        return node['simpleText']
+    except KeyError:
+            pass
+    try:
+        return node['runs'][0]['text']
+    except IndexError: # empty text runs
+        return ''
+
+def get_formatted_text(node):
+    try:
+        return node['runs']
+    except KeyError:
+        return node['simpleText']
+
+def get_badges(node):
+    badges = []
+    for badge_node in node:
+        badge = badge_node['metadataBadgeRenderer']['label']
+        if badge.lower() != 'new':
+            badges.append(badge)
+    return badges
+
+def get_thumbnail(node):
+    try:
+        return node['thumbnails'][0]['url']     # polymer format
+    except KeyError:
+        return node['url']     # ajax format
+
+dispatch = {
+
+# polymer format    
+    'title':                ('title',       get_text),
+    'publishedTimeText':    ('published',   get_text),
+    'videoId':              ('id',          lambda node: node),
+    'descriptionSnippet':   ('description', get_formatted_text),
+    'lengthText':           ('duration',    get_text),
+    'thumbnail':            ('thumbnail',   get_thumbnail),
+    'thumbnails':           ('thumbnail',   lambda node: node[0]['thumbnails'][0]['url']),
+
+    'viewCountText':        ('views',       get_text),
+    'numVideosText':        ('size',        lambda node: get_text(node).split(' ')[0]),     # the format is "324 videos"
+    'videoCountText':       ('size',        get_text),
+    'playlistId':           ('id',          lambda node: node),
+    'descriptionText':      ('description', get_formatted_text),
+
+    'subscriberCountText':  ('subscriber_count',    get_text),
+    'channelId':            ('id',          lambda node: node),
+    'badges':               ('badges',      get_badges),
+
+# ajax format
+    'view_count_text':  ('views',       get_text),
+    'num_videos_text':  ('size',        lambda node: get_text(node).split(' ')[0]),
+    'owner_text':       ('author',      get_text),
+    'owner_endpoint':   ('author_url',  lambda node: node['url']),
+    'description':      ('description', get_formatted_text),
+    'index':            ('playlist_index', get_text),
+    'short_byline':     ('author',      get_text),
+    'length':           ('duration',    get_text),
+    'video_id':         ('id',          lambda node: node),
+
+}
+
+def renderer_info(renderer):
+    try:
+        info = {}
+        if 'viewCountText' in renderer:     # prefer this one as it contains all the digits
+            info['views'] = get_text(renderer['viewCountText'])
+        elif 'shortViewCountText' in renderer:
+            info['views'] = get_text(renderer['shortViewCountText'])
+
+        if 'ownerText' in renderer:
+            info['author'] = renderer['ownerText']['runs'][0]['text']
+            info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+        try:
+            overlays = renderer['thumbnailOverlays']
+        except KeyError:
+            pass
+        else:
+            for overlay in overlays:
+                if 'thumbnailOverlayTimeStatusRenderer' in overlay:
+                    info['duration'] = get_text(overlay['thumbnailOverlayTimeStatusRenderer']['text'])
+                # show renderers don't have videoCountText
+                elif 'thumbnailOverlayBottomPanelRenderer' in overlay:
+                    info['size'] = get_text(overlay['thumbnailOverlayBottomPanelRenderer']['text'])
+
+        # show renderers don't have playlistId, have to dig into the url to get it
+        try:
+            info['id'] = renderer['navigationEndpoint']['watchEndpoint']['playlistId']
+        except KeyError:
+            pass
+        for key, node in renderer.items():
+            if key in ('longBylineText', 'shortBylineText'):
+                info['author'] = get_text(node)
+                try:
+                    info['author_url'] = get_url(node)
+                except KeyError:
+                    pass
+
+            # show renderers don't have thumbnail key at top level, dig into thumbnailRenderer
+            elif key == 'thumbnailRenderer' and 'showCustomThumbnailRenderer' in node:
+                info['thumbnail'] = node['showCustomThumbnailRenderer']['thumbnail']['thumbnails'][0]['url']
+            else:
+                try:
+                    simple_key, function = dispatch[key]
+                except KeyError:
+                    continue
+                info[simple_key] = function(node)
+        return info
+    except KeyError:
+        print(renderer)
+        raise
+    
+def ajax_info(item_json):
+    try:
+        info = {}          
+        for key, node in item_json.items():
+            try:
+                simple_key, function = dispatch[key]
+            except KeyError:
+                continue
+            info[simple_key] = function(node)
+        return info
+    except KeyError:
+        print(item_json)
+        raise
+    
+
-- 
cgit v1.2.3


From 1e1f55c9e5d98ba076bc67e7abe9e4d77d84c65b Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Mon, 25 Feb 2019 20:47:49 -0800
Subject: Use persistent connections

---
 youtube/util.py | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 87 insertions(+), 11 deletions(-)

(limited to 'youtube')

diff --git a/youtube/util.py b/youtube/util.py
index 2ebd8bc..e0f4a2c 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,14 +1,75 @@
+import settings
 import socks, sockshandler
 import gzip
 import brotli
 import urllib.parse
 import re
 import time
-import settings
 
+# The trouble with the requests library: It ships its own certificate bundle via certifi
+#  instead of using the system certificate store, meaning self-signed certificates
+#  configured by the user will not work. Some draconian networks block TLS unless a corporate
+#  certificate is installed on the system. Additionally, some users install a self signed cert
+#  in order to use programs to modify or monitor requests made by programs on the system.
+
+# Finally, certificates expire and need to be updated, or are sometimes revoked. Sometimes
+#  certificate authorites go rogue and need to be untrusted. Since we are going through Tor exit nodes,
+#  this becomes all the more important. A rogue CA could issue a fake certificate for accounts.google.com, and a
+#  malicious exit node could use this to decrypt traffic when logging in and retrieve passwords. Examples:
+#   https://www.engadget.com/2015/10/29/google-warns-symantec-over-certificates/
+#   https://nakedsecurity.sophos.com/2013/12/09/serious-security-google-finds-fake-but-trusted-ssl-certificates-for-its-domains-made-in-france/
+
+# In the requests documentation it says:
+#    "Before version 2.16, Requests bundled a set of root CAs that it trusted, sourced from the Mozilla trust store.
+#     The certificates were only updated once for each Requests version. When certifi was not installed,
+#     this led to extremely out-of-date certificate bundles when using significantly older versions of Requests.
+#     For the sake of security we recommend upgrading certifi frequently!"
+#   (http://docs.python-requests.org/en/master/user/advanced/#ca-certificates)
+
+# Expecting users to remember to manually update certifi on Linux isn't reasonable in my view.
+#  On windows, this is even worse since I am distributing all dependencies. This program is not
+#  updated frequently, and using requests would lead to outdated certificates. Certificates
+#  should be updated with OS updates, instead of thousands of developers of different programs
+#  being expected to do this correctly 100% of the time.
+
+# There is hope that this might be fixed eventually:
+#   https://github.com/kennethreitz/requests/issues/2966
+
+# Until then, I will use a mix of urllib3 and urllib.
+import urllib3
+import urllib3.contrib.socks
 
 URL_ORIGIN = "/https://www.youtube.com"
 
+connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
+
+old_tor_connection_pool = None
+tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED')
+
+tor_pool_refresh_time = time.monotonic()   # prevent problems due to clock changes
+
+def get_pool(use_tor):
+    global old_tor_connection_pool
+    global tor_connection_pool
+    global tor_pool_refresh_time
+
+    if not use_tor:
+        return connection_pool
+
+    # Tor changes circuits after 10 minutes: https://tor.stackexchange.com/questions/262/for-how-long-does-a-circuit-stay-alive
+    current_time = time.monotonic()
+    if current_time - tor_pool_refresh_time > 300:   # close pool after 5 minutes
+        tor_connection_pool.clear()
+
+        # Keep a reference for 5 min to avoid it getting garbage collected while sockets still in use
+        old_tor_connection_pool = tor_connection_pool
+
+        tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED')
+        tor_pool_refresh_time = current_time
+
+    return tor_connection_pool
+
+
 
 class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
     '''Separate cookiejars for receiving and sending'''
@@ -42,7 +103,7 @@ def decode_content(content, encoding_header):
             content = gzip.decompress(content)
     return content
 
-def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True):
+def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False):
     '''
     When cookiejar_send is set to a CookieJar object,
      those cookies will be sent in the request (but cookies in response will not be merged into it)
@@ -59,7 +120,9 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
     if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
         headers['User-Agent'] = 'Python-urllib'
 
+    method = "GET"
     if data is not None:
+        method = "POST"
         if isinstance(data, str):
             data = data.encode('ascii')
         elif not isinstance(data, bytes):
@@ -67,25 +130,38 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
 
     start_time = time.time()
 
+    if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
+        req = urllib.request.Request(url, data=data, headers=headers)
 
-    req = urllib.request.Request(url, data=data, headers=headers)
+        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
 
-    cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
+        if use_tor and settings.route_tor:
+            opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
+        else:
+            opener = urllib.request.build_opener(cookie_processor)
 
-    if use_tor and settings.route_tor:
-        opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
-    else:
-        opener = urllib.request.build_opener(cookie_processor)
+        response = opener.open(req, timeout=timeout)
+        response_time = time.time()
 
-    response = opener.open(req, timeout=timeout)
-    response_time = time.time()
 
+        content = response.read()
+
+    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
+        pool = get_pool(use_tor and settings.route_tor)
+
+        response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False)
+        response_time = time.time()
+
+        content = response.read()
+        response.release_conn()
 
-    content = response.read()
     read_finish = time.time()
     if report_text:
         print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
     content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
+
+    if return_response:
+        return content, response
     return content
 
 mobile_user_agent = 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1'
-- 
cgit v1.2.3


From 89daa27be257c6170e7d730529bee9ce1294f73d Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Tue, 26 Feb 2019 17:15:51 -0800
Subject: prevent urllib3 from decoding content

---
 youtube/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube')

diff --git a/youtube/util.py b/youtube/util.py
index e0f4a2c..9950815 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -149,7 +149,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
     else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
         pool = get_pool(use_tor and settings.route_tor)
 
-        response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False)
+        response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
         response_time = time.time()
 
         content = response.read()
-- 
cgit v1.2.3


From 8ceaa34a53ebe4c1fb78342bbf70763641cbd186 Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Tue, 26 Feb 2019 17:27:53 -0800
Subject: html_common: remove x_item_html functions

---
 youtube/html_common.py | 50 --------------------------------------------------
 youtube/watch.py       |  8 ++++++--
 2 files changed, 6 insertions(+), 52 deletions(-)

(limited to 'youtube')

diff --git a/youtube/html_common.py b/youtube/html_common.py
index 27a582b..8e65a1f 100644
--- a/youtube/html_common.py
+++ b/youtube/html_common.py
@@ -103,56 +103,6 @@ medium_channel_item_template = Template('''
 
 
 
-# -----
-# HTML
-# -----
-
-def small_video_item_html(item):
-    video_info = json.dumps({key: item[key] for key in ('id', 'title', 'author', 'duration')})
-    return small_video_item_template.substitute(
-        title       = html.escape(item["title"]),
-        views       = item["views"],
-        author      = html.escape(item["author"]),
-        duration    = item["duration"],
-        url         = util.URL_ORIGIN + "/watch?v=" + item["id"],
-        thumbnail   = util.get_thumbnail_url(item['id']),
-        video_info  = html.escape(video_info),
-    )
-
-def small_playlist_item_html(item):
-    return small_playlist_item_template.substitute(
-        title=html.escape(item["title"]),
-        size = item['size'],
-        author="",
-        url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
-        thumbnail= util.get_thumbnail_url(item['first_video_id']),
-    )
-
-def medium_playlist_item_html(item):
-    return medium_playlist_item_template.substitute(
-        title=html.escape(item["title"]),
-        size = item['size'],
-        author=item['author'],
-        author_url= util.URL_ORIGIN + item['author_url'],
-        url = util.URL_ORIGIN + "/playlist?list=" + item["id"],
-        thumbnail= item['thumbnail'],
-    )
-
-def medium_video_item_html(medium_video_info):
-    info = medium_video_info
-       
-    return medium_video_item_template.substitute(
-            title=html.escape(info["title"]),
-            views=info["views"],
-            published = info["published"],
-            description = yt_data_extract.format_text_runs(info["description"]),
-            author=html.escape(info["author"]),
-            author_url=info["author_url"],
-            duration=info["duration"],
-            url = util.URL_ORIGIN + "/watch?v=" + info["id"],
-            thumbnail=info['thumbnail'],
-            datetime='', # TODO
-        )
 
 
 header_template = Template('''
diff --git a/youtube/watch.py b/youtube/watch.py
index e8c10a2..06b525a 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -127,9 +127,11 @@ def get_related_items_html(info):
     result = ""
     for item in info['related_vids']:
         if 'list' in item:  # playlist:
-            result += html_common.small_playlist_item_html(watch_page_related_playlist_info(item))
+            item = watch_page_related_playlist_info(item)
+            result += html_common.playlist_item_html(item, html_common.small_playlist_item_template)
         else:
-            result += html_common.small_video_item_html(watch_page_related_video_info(item))
+            item = watch_page_related_video_info(item)
+            result += html_common.video_item_html(item, html_common.small_video_item_template)
     return result
 
     
@@ -142,6 +144,7 @@ def watch_page_related_video_info(item):
         result['views'] = item['short_view_count_text']
     except KeyError:
         result['views'] = ''
+    result['thumbnail'] = util.get_thumbnail_url(item['id'])
     return result
     
 def watch_page_related_playlist_info(item):
@@ -150,6 +153,7 @@ def watch_page_related_playlist_info(item):
         'title': item['playlist_title'],
         'id': item['list'],
         'first_video_id': item['video_id'],
+        'thumbnail': util.get_thumbnail_url(item['video_id']),
     }
 
     
-- 
cgit v1.2.3


From 0ecf3d08be9206b1067fa37301fb187256dc17ab Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Mon, 25 Mar 2019 13:15:54 -0700
Subject: Fix error with channel searches with non-ascii characters

---
 youtube/proto.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube')

diff --git a/youtube/proto.py b/youtube/proto.py
index 004375a..d966455 100644
--- a/youtube/proto.py
+++ b/youtube/proto.py
@@ -60,7 +60,7 @@ def unpadded_b64encode(data):
 
 def as_bytes(value):
     if isinstance(value, str):
-        return value.encode('ascii')
+        return value.encode('utf-8')
     return value
 
 
-- 
cgit v1.2.3


From 9f93b9429c77e631972186049fbc7518e2cf5d4b Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Thu, 9 May 2019 23:07:43 -0700
Subject: Fix comment parsing error due to comments from deleted channels

---
 youtube/comments.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'youtube')

diff --git a/youtube/comments.py b/youtube/comments.py
index 4087b47..94b086e 100644
--- a/youtube/comments.py
+++ b/youtube/comments.py
@@ -234,10 +234,7 @@ def parse_comments_polymer(content, replies=False):
             
             comment_raw = comment_raw['commentRenderer']
             comment = {
-            'author': yt_data_extract.get_plain_text(comment_raw['authorText']),
-            'author_url': comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url'],
-            'author_channel_id': comment_raw['authorEndpoint']['browseEndpoint']['browseId'],
-            'author_id': comment_raw['authorId'],
+            'author_id': comment_raw.get('authorId', ''),
             'author_avatar': comment_raw['authorThumbnail']['thumbnails'][0]['url'],
             'likes': comment_raw['likeCount'],
             'published': yt_data_extract.get_plain_text(comment_raw['publishedTimeText']),
@@ -247,6 +244,16 @@ def parse_comments_polymer(content, replies=False):
             'video_id': video_id,
             'comment_id': comment_raw['commentId'],
             }
+
+            if 'authorText' in comment_raw:     # deleted channels have no name or channel link
+                comment['author'] = yt_data_extract.get_plain_text(comment_raw['authorText'])
+                comment['author_url'] = comment_raw['authorEndpoint']['commandMetadata']['webCommandMetadata']['url']
+                comment['author_channel_id'] = comment_raw['authorEndpoint']['browseEndpoint']['browseId']
+            else:
+                comment['author'] = ''
+                comment['author_url'] = ''
+                comment['author_channel_id'] = ''
+
             comments.append(comment)
     except Exception as e:
         print('Error parsing comments: ' + str(e))
-- 
cgit v1.2.3