From e158c4842b123944137b985e5d436ecebd2f51f4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 15 Feb 2019 20:27:37 -0800 Subject: subscriptions: basic database code --- youtube/subscriptions.py | 82 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 12 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 47f1ea3..39957bf 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -1,18 +1,76 @@ -import urllib +from youtube import common, settings +import sqlite3 +import os -with open("subscriptions.txt", 'r', encoding='utf-8') as file: - subscriptions = file.read() - -# Line format: "channel_id channel_name" -# Example: -# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown +# https://stackabuse.com/a-sqlite-tutorial-with-python/ -subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines()) +database_path = os.path.join(settings.data_dir, "subscriptions.sqlite") -def get_new_videos(): - for channel_id, channel_name in subscriptions: - +def open_database(): + if not os.path.exists(settings.data_dir): + os.makedirs(settings.data_dir) + connection = sqlite3.connect(database_path) + # Create tables if they don't exist + try: + cursor = connection.cursor() + cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( + id integer PRIMARY KEY, + channel_id text NOT NULL, + channel_name text NOT NULL, + time_last_checked integer + )''') + cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( + id integer PRIMARY KEY, + uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, + video_id text NOT NULL, + title text NOT NULL, + time_published integer NOT NULL, + description text, + )''') + connection.commit() + except: + connection.rollback() + connection.close() + raise + return connection -def get_subscriptions_page(): +def _subscribe(channel_id, channel_name): + connection = open_database() + try: + cursor = connection.cursor() + cursor.execute("INSERT INTO subscribed_channels (channel_id, name) VALUES (?, ?)", (channel_id, channel_name)) + connection.commit() + except: + connection.rollback() + raise + finally: + connection.close() + +def _unsubscribe(channel_id): + connection = open_database() + try: + cursor = connection.cursor() + cursor.execute("DELETE FROM subscribed_channels WHERE channel_id=?", (channel_id, )) + connection.commit() + except: + connection.rollback() + raise + finally: + connection.close() + +def _get_videos(number, offset): + connection = open_database() + try: + cursor = connection.cursor() + cursor.execute('''SELECT video_id, title, time_published, description, channel_id, channel_name + FROM videos + INNER JOIN subscribed_channels on videos.uploader_id = subscribed_channels.id + ORDER BY time_published DESC + LIMIT ? OFFSET ?''', number, offset) + except: + connection.rollback() + raise + finally: + connection.close() -- cgit v1.2.3 From c65df7d27df64049e1597e245758f70e808173e1 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 16 Feb 2019 14:00:06 -0800 Subject: subscriptions: Basic new videos checking function for channel --- youtube/subscriptions.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 39957bf..0d31bd4 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -1,6 +1,14 @@ -from youtube import common, settings +from youtube import common, settings, channel import sqlite3 import os +import secrets +import datetime + +# so as to not completely break on people who have updated but don't know of new dependency +try: + import atoma +except ModuleNotFoundError: + print('Error: atoma not installed, subscriptions will not work') # https://stackabuse.com/a-sqlite-tutorial-with-python/ @@ -74,3 +82,96 @@ def _get_videos(number, offset): raise finally: connection.close() + + + +units = { + 'year': 31536000, # 365*24*3600 + 'month': 2592000, # 30*24*3600 + 'week': 604800, # 7*24*3600 + 'day': 86400, # 24*3600 + 'hour': 3600, + 'minute': 60, + 'second': 1, +} +def youtube_timestamp_to_posix(dumb_timestamp): + ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago, + approximates the unix time (seconds since 1/1/1970) ''' + dumb_timestamp = dumb_timestamp.lower() + now = time.time() + if dumb_timestamp == "just now": + return now + split = dumb_timestamp.split(' ') + number, unit = int(split[0]), split[1] + if number > 1: + unit = unit[:-1] # remove s from end + return now - number*units[unit] + + +weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') +months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') +def _get_upstream_videos(channel_id, channel_name, time_last_checked): + feed_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id + headers = {} + + # randomly change time_last_checked up to one day earlier to make tracking harder + time_last_checked = time_last_checked - secrets.randbelow(24*3600) + + # If-Modified-Since header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since + struct_time = time.gmtime(time_last_checked) + weekday = weekdays[struct_time.tm_wday] # dumb requirement + month = months[struct_time.tm_mon - 1] + headers['If-Modified-Since'] = time.strftime(weekday + ', %d ' + month + ' %Y %H:%M:%S GMT', struct_time) + print(headers['If-Modified-Since']) + + + headers['User-Agent'] = 'Python-urllib' # Don't leak python version + headers['Accept-Encoding'] = 'gzip, br' + req = urllib.request.Request(url, headers=headers) + if settings.route_tor: + opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150)) + else: + opener = urllib.request.build_opener() + response = opener.open(req, timeout=15) + + + if response.getcode == '304': + print('No new videos for ' + channel_id) + return [] + + + content = response.read() + print('Retrieved videos for ' + channel_id) + content = common.decode_content(content, response.getheader('Content-Encoding', default='identity')) + + + feed = atoma.parse_atom_bytes(content) + atom_videos = {} + for entry in feed.entries: + video_id = entry.id_[9:] # example of id_: yt:video:q6EoRBvdVPQ + + # standard names used in this program for purposes of html templating + atom_videos[video_id] = { + 'title': entry.title.value, + 'author': entry.authors[0].name, + #'description': '', # Not supported by atoma + #'duration': '', # Youtube's atom feeds don't provide it.. very frustrating + 'published': entry.published.strftime('%m/%d/%Y'), + 'time_published': int(entry.published.timestamp()), + } + + + # final list + videos = [] + + # Now check channel page to retrieve missing information for videos + json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) + for json_video in json_channel_videos: + info = renderer_info(json_video) + if info['id'] in atom_videos: + info.update(atom_videos[info['id']]) + else: + info['author'] = channel_name + info['time published'] = youtube_timestamp_to_posix(info['published']) + videos.append(info) + return videos -- cgit v1.2.3 From 4a54c4fe301f37ae63e98defe8240a5d526361c6 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 16 Feb 2019 16:11:53 -0800 Subject: subscriptions: store video duration in database --- youtube/subscriptions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 0d31bd4..a0f7e48 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -33,6 +33,7 @@ def open_database(): uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, video_id text NOT NULL, title text NOT NULL, + duration text, time_published integer NOT NULL, description text, )''') @@ -72,7 +73,7 @@ def _get_videos(number, offset): connection = open_database() try: cursor = connection.cursor() - cursor.execute('''SELECT video_id, title, time_published, description, channel_id, channel_name + cursor.execute('''SELECT video_id, title, duration, time_published, description, channel_id, channel_name FROM videos INNER JOIN subscribed_channels on videos.uploader_id = subscribed_channels.id ORDER BY time_published DESC -- cgit v1.2.3 From 24642455d0dc5841ddec99f456598c4f763c1e8a Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 16 Feb 2019 16:56:46 -0800 Subject: subscriptions page --- youtube/subscriptions.py | 37 +++++++++++++++++++++++++++++++++---- youtube/youtube.py | 4 +++- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index a0f7e48..82916dd 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -1,4 +1,6 @@ -from youtube import common, settings, channel +from youtube import common, channel +import settings +from string import Template import sqlite3 import os import secrets @@ -10,6 +12,10 @@ try: except ModuleNotFoundError: print('Error: atoma not installed, subscriptions will not work') +with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: + subscriptions_template = Template(f.read()) + + # https://stackabuse.com/a-sqlite-tutorial-with-python/ database_path = os.path.join(settings.data_dir, "subscriptions.sqlite") @@ -35,7 +41,7 @@ def open_database(): title text NOT NULL, duration text, time_published integer NOT NULL, - description text, + description text )''') connection.commit() except: @@ -73,11 +79,19 @@ def _get_videos(number, offset): connection = open_database() try: cursor = connection.cursor() - cursor.execute('''SELECT video_id, title, duration, time_published, description, channel_id, channel_name + db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name FROM videos INNER JOIN subscribed_channels on videos.uploader_id = subscribed_channels.id ORDER BY time_published DESC - LIMIT ? OFFSET ?''', number, offset) + LIMIT ? OFFSET ?''', (number, offset)) + + for db_video in db_videos: + yield { + 'id': db_video[0], + 'title': db_video[1], + 'duration': db_video[2], + 'author': db_video[3], + } except: connection.rollback() raise @@ -176,3 +190,18 @@ def _get_upstream_videos(channel_id, channel_name, time_last_checked): info['time published'] = youtube_timestamp_to_posix(info['published']) videos.append(info) return videos + +def get_subscriptions_page(env, start_response): + items_html = '''''' + + start_response('200 OK', [('Content-type','text/html'),]) + return subscriptions_template.substitute( + header = common.get_header(), + items = items_html, + page_buttons = '', + ).encode('utf-8') + diff --git a/youtube/youtube.py b/youtube/youtube.py index b6b12fb..ad73a6e 100644 --- a/youtube/youtube.py +++ b/youtube/youtube.py @@ -1,7 +1,7 @@ import mimetypes import urllib.parse import os -from youtube import local_playlist, watch, search, playlist, channel, comments, common, post_comment, accounts +from youtube import local_playlist, watch, search, playlist, channel, comments, common, post_comment, accounts, subscriptions import settings YOUTUBE_FILES = ( "/shared.css", @@ -24,6 +24,8 @@ get_handlers = { 'post_comment': post_comment.get_post_comment_page, 'delete_comment': post_comment.get_delete_comment_page, 'login': accounts.get_account_login_page, + + 'subscriptions': subscriptions.get_subscriptions_page, } post_handlers = { 'edit_playlist': local_playlist.edit_playlist, -- cgit v1.2.3 From 3905e7e64059b45479894ba1fdfb0ef9cef64475 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 16 Feb 2019 23:41:52 -0800 Subject: basic subscriptions system --- youtube/channel.py | 4 +++ youtube/subscriptions.py | 71 +++++++++++++++++++++++++++++++++++++++--------- youtube/youtube.py | 2 ++ 3 files changed, 64 insertions(+), 13 deletions(-) (limited to 'youtube') diff --git a/youtube/channel.py b/youtube/channel.py index 9577525..c83d7d1 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -248,6 +248,7 @@ def channel_videos_html(polymer_json, current_page=1, current_sort=3, number_of_ return yt_channel_items_template.substitute( header = common.get_header(), channel_title = microformat['title'], + channel_id = channel_id, channel_tabs = channel_tabs_html(channel_id, 'Videos'), sort_buttons = channel_sort_buttons_html(channel_id, 'videos', current_sort), avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], @@ -269,6 +270,7 @@ def channel_playlists_html(polymer_json, current_sort=3): return yt_channel_items_template.substitute( header = common.get_header(), channel_title = microformat['title'], + channel_id = channel_id, channel_tabs = channel_tabs_html(channel_id, 'Playlists'), sort_buttons = channel_sort_buttons_html(channel_id, 'playlists', current_sort), avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], @@ -333,6 +335,7 @@ def channel_about_page(polymer_json): description = description, links = channel_links, stats = stats, + channel_id = channel_metadata['channelId'], channel_tabs = channel_tabs_html(channel_metadata['channelId'], 'About'), ) @@ -353,6 +356,7 @@ def channel_search_page(polymer_json, query, current_page=1, number_of_videos = return yt_channel_items_template.substitute( header = common.get_header(), channel_title = html.escape(microformat['title']), + channel_id = channel_id, channel_tabs = channel_tabs_html(channel_id, '', query), avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'], page_title = html.escape(query + ' - Channel search'), diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 82916dd..ff7d0df 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -5,6 +5,10 @@ import sqlite3 import os import secrets import datetime +import itertools +import time +import urllib +import socks, sockshandler # so as to not completely break on people who have updated but don't know of new dependency try: @@ -51,11 +55,16 @@ def open_database(): return connection -def _subscribe(channel_id, channel_name): +def _subscribe(channels): + ''' channels is a list of (channel_id, channel_name) ''' + + # set time_last_checked to 0 on all channels being subscribed to + channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels) + connection = open_database() try: cursor = connection.cursor() - cursor.execute("INSERT INTO subscribed_channels (channel_id, name) VALUES (?, ?)", (channel_id, channel_name)) + cursor.executemany("INSERT INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) connection.commit() except: connection.rollback() @@ -63,11 +72,12 @@ def _subscribe(channel_id, channel_name): finally: connection.close() -def _unsubscribe(channel_id): +def _unsubscribe(channel_ids): + ''' channel_ids is a list of channel_ids ''' connection = open_database() try: cursor = connection.cursor() - cursor.execute("DELETE FROM subscribed_channels WHERE channel_id=?", (channel_id, )) + cursor.executemany("DELETE FROM subscribed_channels WHERE channel_id=?", ((channel_id, ) for channel_id in channel_ids)) connection.commit() except: connection.rollback() @@ -125,12 +135,14 @@ def youtube_timestamp_to_posix(dumb_timestamp): weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') -def _get_upstream_videos(channel_id, channel_name, time_last_checked): +def _get_upstream_videos(channel_id, time_last_checked): feed_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id headers = {} # randomly change time_last_checked up to one day earlier to make tracking harder time_last_checked = time_last_checked - secrets.randbelow(24*3600) + if time_last_checked < 0: # happens when time_last_checked is initialized to 0 when checking for first time + time_last_checked = 0 # If-Modified-Since header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since struct_time = time.gmtime(time_last_checked) @@ -142,7 +154,7 @@ def _get_upstream_videos(channel_id, channel_name, time_last_checked): headers['User-Agent'] = 'Python-urllib' # Don't leak python version headers['Accept-Encoding'] = 'gzip, br' - req = urllib.request.Request(url, headers=headers) + req = urllib.request.Request(feed_url, headers=headers) if settings.route_tor: opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150)) else: @@ -165,13 +177,10 @@ def _get_upstream_videos(channel_id, channel_name, time_last_checked): for entry in feed.entries: video_id = entry.id_[9:] # example of id_: yt:video:q6EoRBvdVPQ - # standard names used in this program for purposes of html templating atom_videos[video_id] = { 'title': entry.title.value, - 'author': entry.authors[0].name, #'description': '', # Not supported by atoma #'duration': '', # Youtube's atom feeds don't provide it.. very frustrating - 'published': entry.published.strftime('%m/%d/%Y'), 'time_published': int(entry.published.timestamp()), } @@ -182,12 +191,13 @@ def _get_upstream_videos(channel_id, channel_name, time_last_checked): # Now check channel page to retrieve missing information for videos json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) for json_video in json_channel_videos: - info = renderer_info(json_video) + info = common.renderer_info(json_video['gridVideoRenderer']) + if 'description' not in info: + info['description'] = '' if info['id'] in atom_videos: info.update(atom_videos[info['id']]) else: - info['author'] = channel_name - info['time published'] = youtube_timestamp_to_posix(info['published']) + info['time_published'] = youtube_timestamp_to_posix(info['published']) videos.append(info) return videos @@ -195,7 +205,7 @@ def get_subscriptions_page(env, start_response): items_html = '''''' start_response('200 OK', [('Content-type','text/html'),]) @@ -205,3 +215,38 @@ def get_subscriptions_page(env, start_response): page_buttons = '', ).encode('utf-8') +def post_subscriptions_page(env, start_response): + params = env['parameters'] + action = params['action'][0] + if action == 'subscribe': + if len(params['channel_id']) != len(params['channel_name']): + start_response('400 Bad Request', ()) + return b'400 Bad Request, length of channel_id != length of channel_name' + _subscribe(zip(params['channel_id'], params['channel_name'])) + + elif action == 'unsubscribe': + _unsubscribe(params['channel_id']) + + elif action == 'refresh': + connection = open_database() + try: + cursor = connection.cursor() + for uploader_id, channel_id, time_last_checked in cursor.execute('''SELECT id, channel_id, time_last_checked FROM subscribed_channels'''): + db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id, time_last_checked) ) + cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) + + cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) + connection.commit() + except: + connection.rollback() + raise + finally: + connection.close() + + start_response('303 See Other', [('Location', common.URL_ORIGIN + '/subscriptions'),] ) + return b'' + else: + start_response('400 Bad Request', ()) + return b'400 Bad Request' + start_response('204 No Content', ()) + return b'' diff --git a/youtube/youtube.py b/youtube/youtube.py index ad73a6e..288f68b 100644 --- a/youtube/youtube.py +++ b/youtube/youtube.py @@ -35,6 +35,8 @@ post_handlers = { 'comments': post_comment.post_comment, 'post_comment': post_comment.post_comment, 'delete_comment': post_comment.delete_comment, + + 'subscriptions': subscriptions.post_subscriptions_page, } def youtube(env, start_response): -- cgit v1.2.3 From 1807bfac4952c23dd4031007b44026e920951336 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Mon, 3 Jun 2019 02:05:30 -0700 Subject: Remove feed parsing for now since youtube doesn't honor If-Modified-Since header --- youtube/subscriptions.py | 71 +++--------------------------------------------- 1 file changed, 4 insertions(+), 67 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 0c7e8a5..a7b94bc 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -10,12 +10,6 @@ import time import urllib import socks, sockshandler -# so as to not completely break on people who have updated but don't know of new dependency -try: - import atoma -except ModuleNotFoundError: - print('Error: atoma not installed, subscriptions will not work') - with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) @@ -132,72 +126,15 @@ def youtube_timestamp_to_posix(dumb_timestamp): unit = unit[:-1] # remove s from end return now - number*units[unit] - -weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') -months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') -def _get_upstream_videos(channel_id, time_last_checked): - feed_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id - headers = {} - - # randomly change time_last_checked up to one day earlier to make tracking harder - time_last_checked = time_last_checked - secrets.randbelow(24*3600) - if time_last_checked < 0: # happens when time_last_checked is initialized to 0 when checking for first time - time_last_checked = 0 - - # If-Modified-Since header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since - struct_time = time.gmtime(time_last_checked) - weekday = weekdays[struct_time.tm_wday] # dumb requirement - month = months[struct_time.tm_mon - 1] - headers['If-Modified-Since'] = time.strftime(weekday + ', %d ' + month + ' %Y %H:%M:%S GMT', struct_time) - print(headers['If-Modified-Since']) - - - headers['User-Agent'] = 'Python-urllib' # Don't leak python version - headers['Accept-Encoding'] = 'gzip, br' - req = urllib.request.Request(feed_url, headers=headers) - if settings.route_tor: - opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150)) - else: - opener = urllib.request.build_opener() - response = opener.open(req, timeout=15) - - - if response.getcode == '304': - print('No new videos for ' + channel_id) - return [] - - - content = response.read() - print('Retrieved videos for ' + channel_id) - content = util.decode_content(content, response.getheader('Content-Encoding', default='identity')) - - - feed = atoma.parse_atom_bytes(content) - atom_videos = {} - for entry in feed.entries: - video_id = entry.id_[9:] # example of id_: yt:video:q6EoRBvdVPQ - - atom_videos[video_id] = { - 'title': entry.title.value, - #'description': '', # Not supported by atoma - #'duration': '', # Youtube's atom feeds don't provide it.. very frustrating - 'time_published': int(entry.published.timestamp()), - } - - - # final list +def _get_upstream_videos(channel_id): videos = [] - # Now check channel page to retrieve missing information for videos json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) for json_video in json_channel_videos: info = yt_data_extract.renderer_info(json_video['gridVideoRenderer']) if 'description' not in info: info['description'] = '' - if info['id'] in atom_videos: - info.update(atom_videos[info['id']]) - else: - info['time_published'] = youtube_timestamp_to_posix(info['published']) + info['time_published'] = youtube_timestamp_to_posix(info['published']) videos.append(info) return videos @@ -231,8 +168,8 @@ def post_subscriptions_page(env, start_response): connection = open_database() try: cursor = connection.cursor() - for uploader_id, channel_id, time_last_checked in cursor.execute('''SELECT id, channel_id, time_last_checked FROM subscribed_channels'''): - db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id, time_last_checked) ) + for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels'''): + db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) ) cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) -- cgit v1.2.3 From ccb795e31f2f9944b1c8b061906eb752009f8ea7 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Mon, 3 Jun 2019 03:20:44 -0700 Subject: Ensure videos in subscriptions from same channel in order --- youtube/subscriptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index a7b94bc..dc8412b 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -130,11 +130,11 @@ def _get_upstream_videos(channel_id): videos = [] json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) - for json_video in json_channel_videos: + for i, json_video in enumerate(json_channel_videos): info = yt_data_extract.renderer_info(json_video['gridVideoRenderer']) if 'description' not in info: info['description'] = '' - info['time_published'] = youtube_timestamp_to_posix(info['published']) + info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order videos.append(info) return videos -- cgit v1.2.3 From ae5fd9eb009c6522e7d7971e0bdea57faaaf2b3c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 5 Jun 2019 00:41:15 -0700 Subject: Make thumbnails work and other stuff --- youtube/local_playlist.py | 30 ++------------------------- youtube/subscriptions.py | 53 ++++++++++++++++++++++++++++++++++++++--------- youtube/util.py | 32 ++++++++++++++++++++++++++++ youtube/youtube.py | 2 +- 4 files changed, 78 insertions(+), 39 deletions(-) (limited to 'youtube') diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py index e354013..d083e33 100644 --- a/youtube/local_playlist.py +++ b/youtube/local_playlist.py @@ -33,33 +33,7 @@ def add_to_playlist(name, video_info_list): if id not in ids: file.write(info + "\n") missing_thumbnails.append(id) - gevent.spawn(download_thumbnails, name, missing_thumbnails) - -def download_thumbnail(playlist_name, video_id): - url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" - save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg") - try: - thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id) - except urllib.error.HTTPError as e: - print("Failed to download thumbnail for " + video_id + ": " + str(e)) - return - try: - f = open(save_location, 'wb') - except FileNotFoundError: - os.makedirs(os.path.join(thumbnails_directory, playlist_name)) - f = open(save_location, 'wb') - f.write(thumbnail) - f.close() - -def download_thumbnails(playlist_name, ids): - # only do 5 at a time - # do the n where n is divisible by 5 - i = -1 - for i in range(0, int(len(ids)/5) - 1 ): - gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5, i*5 + 5)]) - # do the remainders (< 5) - gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5 + 5, len(ids))]) - + gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) def get_local_playlist_page(name): @@ -84,7 +58,7 @@ def get_local_playlist_page(name): videos_html += html_common.video_item_html(info, html_common.small_video_item_template) except json.decoder.JSONDecodeError: pass - gevent.spawn(download_thumbnails, name, missing_thumbnails) + gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) return local_playlist_template.substitute( page_title = name + ' - Local playlist', header = html_common.get_header(), diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index dc8412b..93d064d 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -3,16 +3,13 @@ import settings from string import Template import sqlite3 import os -import secrets -import datetime -import itertools import time -import urllib -import socks, sockshandler +import gevent with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) +thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails") # https://stackabuse.com/a-sqlite-tutorial-with-python/ @@ -28,14 +25,14 @@ def open_database(): cursor = connection.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( id integer PRIMARY KEY, - channel_id text NOT NULL, + channel_id text UNIQUE NOT NULL, channel_name text NOT NULL, time_last_checked integer )''') cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( id integer PRIMARY KEY, uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, - video_id text NOT NULL, + video_id text UNIQUE NOT NULL, title text NOT NULL, duration text, time_published integer NOT NULL, @@ -58,7 +55,7 @@ def _subscribe(channels): connection = open_database() try: cursor = connection.cursor() - cursor.executemany("INSERT INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) + cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) connection.commit() except: connection.rollback() @@ -104,6 +101,10 @@ def _get_videos(number, offset): + + + + units = { 'year': 31536000, # 365*24*3600 'month': 2592000, # 30*24*3600 @@ -126,6 +127,16 @@ def youtube_timestamp_to_posix(dumb_timestamp): unit = unit[:-1] # remove s from end return now - number*units[unit] +# Use this to mark a thumbnail acceptable to be retrieved at the request of the browser +downloading_thumbnails = set() +def download_thumbnails(thumbnails_directory, thumbnails): + try: + g = gevent.spawn(util.download_thumbnails, thumbnails_directory, thumbnails) + g.join() + finally: + downloading_thumbnails.difference_update(thumbnails) + + def _get_upstream_videos(channel_id): videos = [] @@ -136,12 +147,34 @@ def _get_upstream_videos(channel_id): info['description'] = '' info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order videos.append(info) + + try: + existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory)) + except FileNotFoundError: + existing_thumbnails = set() + missing_thumbnails = set(video['id'] for video in videos) - existing_thumbnails + downloading_thumbnails.update(missing_thumbnails) + gevent.spawn(download_thumbnails, thumbnails_directory, missing_thumbnails) + return videos + + + + + + + + def get_subscriptions_page(env, start_response): items_html = '''''' @@ -168,9 +201,9 @@ def post_subscriptions_page(env, start_response): connection = open_database() try: cursor = connection.cursor() - for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels'''): + for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels''').fetchall(): db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) ) - cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) + cursor.executemany('''INSERT OR IGNORE INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) connection.commit() diff --git a/youtube/util.py b/youtube/util.py index 9950815..42d76a3 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -5,6 +5,8 @@ import brotli import urllib.parse import re import time +import os +import gevent # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -176,6 +178,36 @@ desktop_ua = (('User-Agent', desktop_user_agent),) +def download_thumbnail(save_directory, video_id): + url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + save_location = os.path.join(save_directory, video_id + ".jpg") + try: + thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) + except urllib.error.HTTPError as e: + print("Failed to download thumbnail for " + video_id + ": " + str(e)) + return + try: + f = open(save_location, 'wb') + except FileNotFoundError: + os.makedirs(save_directory) + f = open(save_location, 'wb') + f.write(thumbnail) + f.close() + +def download_thumbnails(save_directory, ids): + if not isinstance(ids, (list, tuple)): + ids = list(ids) + # only do 5 at a time + # do the n where n is divisible by 5 + i = -1 + for i in range(0, int(len(ids)/5) - 1 ): + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) + # do the remainders (< 5) + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) + + + + def dict_add(*dicts): diff --git a/youtube/youtube.py b/youtube/youtube.py index 4ec7962..c629bbb 100644 --- a/youtube/youtube.py +++ b/youtube/youtube.py @@ -61,7 +61,7 @@ def youtube(env, start_response): start_response('200 OK', (('Content-type',mime_type),) ) return f.read() - elif path.startswith("/data/playlist_thumbnails/"): + elif path.startswith('/data/playlist_thumbnails/') or path.startswith('/data/subscription_thumbnails/'): with open(os.path.join(settings.data_dir, os.path.normpath(path[6:])), 'rb') as f: start_response('200 OK', (('Content-type', "image/jpeg"),) ) return f.read() -- cgit v1.2.3 From 60fe4b2e75e8475b20001c84e01b642a3b3ebcc3 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 7 Jun 2019 21:15:16 -0700 Subject: Add subscription manager system --- youtube/shared.css | 5 +- youtube/subscriptions.py | 284 +++++++++++++++++++++++++++++++++++++++++++++-- youtube/youtube.py | 59 +++++++++- 3 files changed, 334 insertions(+), 14 deletions(-) (limited to 'youtube') diff --git a/youtube/shared.css b/youtube/shared.css index 1b25d7f..cd82164 100644 --- a/youtube/shared.css +++ b/youtube/shared.css @@ -1,7 +1,10 @@ +* { + box-sizing: border-box; +} + h1, h2, h3, h4, h5, h6, div, button{ margin:0; padding:0; - } diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 93d064d..4591f32 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -5,10 +5,17 @@ import sqlite3 import os import time import gevent +import html +import json +import traceback with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) +with open('yt_subscription_manager_template.html', 'r', encoding='utf-8') as f: + subscription_manager_template = Template(f.read()) + + thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails") # https://stackabuse.com/a-sqlite-tutorial-with-python/ @@ -25,19 +32,26 @@ def open_database(): cursor = connection.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( id integer PRIMARY KEY, - channel_id text UNIQUE NOT NULL, + yt_channel_id text UNIQUE NOT NULL, channel_name text NOT NULL, time_last_checked integer )''') cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( id integer PRIMARY KEY, - uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, + sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, video_id text UNIQUE NOT NULL, title text NOT NULL, duration text, time_published integer NOT NULL, description text )''') + cursor.execute('''CREATE TABLE IF NOT EXISTS tag_associations ( + id integer PRIMARY KEY, + tag text NOT NULL, + sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, + UNIQUE(tag, sql_channel_id) + )''') + connection.commit() except: connection.rollback() @@ -55,7 +69,7 @@ def _subscribe(channels): connection = open_database() try: cursor = connection.cursor() - cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) + cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) connection.commit() except: connection.rollback() @@ -63,12 +77,13 @@ def _subscribe(channels): finally: connection.close() +# TODO: delete thumbnails def _unsubscribe(channel_ids): ''' channel_ids is a list of channel_ids ''' connection = open_database() try: cursor = connection.cursor() - cursor.executemany("DELETE FROM subscribed_channels WHERE channel_id=?", ((channel_id, ) for channel_id in channel_ids)) + cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) connection.commit() except: connection.rollback() @@ -82,7 +97,7 @@ def _get_videos(number, offset): cursor = connection.cursor() db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name FROM videos - INNER JOIN subscribed_channels on videos.uploader_id = subscribed_channels.id + INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id ORDER BY time_published DESC LIMIT ? OFFSET ?''', (number, offset)) @@ -99,10 +114,98 @@ def _get_videos(number, offset): finally: connection.close() +def _get_subscribed_channels(): + connection = open_database() + try: + cursor = connection.cursor() + for item in cursor.execute('''SELECT channel_name, yt_channel_id + FROM subscribed_channels + ORDER BY channel_name'''): + yield item + except: + connection.rollback() + raise + finally: + connection.close() + +def _add_tags(channel_ids, tags): + connection = open_database() + try: + cursor = connection.cursor() + + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + + cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) + SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) + connection.commit() + except: + connection.rollback() + raise + finally: + connection.close() + + + +def _remove_tags(channel_ids, tags): + connection = open_database() + try: + cursor = connection.cursor() + + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''DELETE FROM tag_associations WHERE tag = ? AND sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', pairs) + connection.commit() + except: + connection.rollback() + raise + finally: + connection.close() +def _get_tags(channel_id): + connection = open_database() + try: + cursor = connection.cursor() + return [row[0] for row in cursor.execute('''SELECT tag + FROM tag_associations + WHERE sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', (channel_id,))] + except: + connection.rollback() + raise + finally: + connection.close() + +def _get_all_tags(): + connection = open_database() + try: + cursor = connection.cursor() + return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] + except: + connection.rollback() + raise + finally: + connection.close() + +def _get_channel_names(channel_ids): + ''' returns list of (channel_id, channel_name) ''' + connection = open_database() + try: + cursor = connection.cursor() + result = [] + for channel_id in channel_ids: + row = cursor.execute('''SELECT channel_name FROM subscribed_channels WHERE yt_channel_id = ?''', (channel_id,)).fetchone() + result.append( (channel_id, row[0]) ) + return result + except: + connection.rollback() + raise + finally: + connection.close() units = { @@ -166,11 +269,156 @@ def _get_upstream_videos(channel_id): +def import_subscriptions(env, start_response): + content_type = env['parameters']['subscriptions_file'][0] + file = env['parameters']['subscriptions_file'][1] + + file = file.decode('utf-8') + + if content_type == 'application/json': + try: + file = json.loads(file) + except json.decoder.JSONDecodeError: + traceback.print_exc() + start_response('400 Bad Request', () ) + return b'400 Bad Request: Invalid json file' + + try: + channels = ( (item['snippet']['resourceId']['channelId'], item['snippet']['title']) for item in file) + except (KeyError, IndexError): + traceback.print_exc() + start_response('400 Bad Request', () ) + return b'400 Bad Request: Unknown json structure' + else: + raise NotImplementedError() + + _subscribe(channels) + + start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscription_manager'),] ) + return b'' + + + +sub_list_item_template = Template(''' +
  • + $channel_name + $tags + +
  • ''') + +def get_subscription_manager_page(env, start_response): + + sub_list_html = '' + for channel_name, channel_id in _get_subscribed_channels(): + sub_list_html += sub_list_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + channel_id = channel_id, + tags = ', '.join(_get_tags(channel_id)), + ) + + + + start_response('200 OK', [('Content-type','text/html'),]) + return subscription_manager_template.substitute( + header = html_common.get_header(), + sub_list = sub_list_html, + page_buttons = '', + ).encode('utf-8') + +def list_from_comma_separated_tags(string): + tags = [] + prev_comma = -1 + next_comma = string.find(',') + while next_comma != -1: + tag = string[prev_comma+1:next_comma].strip() + if tag: + tags.append(tag) + + prev_comma = next_comma + next_comma = string.find(',', prev_comma+1) + + last_tag = string[prev_comma+1:].strip() + if last_tag: + tags.append(last_tag) + return tags + + +unsubscribe_list_item_template = Template(''' +
  • $channel_name
  • ''') +def post_subscription_manager_page(env, start_response): + params = env['parameters'] + action = params['action'][0] + + if action == 'add_tags': + _add_tags(params['channel_ids'], [tag.lower() for tag in list_from_comma_separated_tags(params['tags'][0])]) + elif action == 'remove_tags': + _remove_tags(params['channel_ids'], [tag.lower() for tag in list_from_comma_separated_tags(params['tags'][0])]) + elif action == 'unsubscribe': + _unsubscribe(params['channel_ids']) + elif action == 'unsubscribe_verify': + page = ''' + Are you sure you want to unsubscribe from these channels? +
    ''' + + for channel_id in params['channel_ids']: + page += '\n' + + page += ''' + + +
    +
      ''' + for channel_id, channel_name in _get_channel_names(params['channel_ids']): + page += unsubscribe_list_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + ) + page += '''
    ''' + + start_response('200 OK', [('Content-type','text/html'),]) + return html_common.yt_basic_template.substitute( + page_title = 'Unsubscribe?', + style = '', + header = html_common.get_header(), + page = page, + ).encode('utf-8') + else: + start_response('400 Bad Request', ()) + return b'400 Bad Request' + + start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscription_manager'),] ) + return b'' + + + +sidebar_tag_item_template = Template(''' +
  • + $tag_name + +
  • ''') + + +sidebar_channel_item_template = Template(''' +
  • + $channel_name + +
  • ''') + def get_subscriptions_page(env, start_response): items_html = '''''' + + tag_list_html = '' + for tag_name in _get_all_tags(): + tag_list_html += sidebar_tag_item_template.substitute(tag_name = tag_name) + + + sub_list_html = '' + for channel_name, channel_id in _get_subscribed_channels(): + sub_list_html += sidebar_channel_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + channel_id = channel_id, + ) + + + start_response('200 OK', [('Content-type','text/html'),]) return subscriptions_template.substitute( header = html_common.get_header(), items = items_html, + tags = tag_list_html, + sub_list = sub_list_html, page_buttons = '', ).encode('utf-8') @@ -201,9 +467,9 @@ def post_subscriptions_page(env, start_response): connection = open_database() try: cursor = connection.cursor() - for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels''').fetchall(): - db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) ) - cursor.executemany('''INSERT OR IGNORE INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) + for sql_channel_id, yt_channel_id in cursor.execute('''SELECT id, yt_channel_id FROM subscribed_channels''').fetchall(): + db_videos = ( (sql_channel_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(yt_channel_id) ) + cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) connection.commit() diff --git a/youtube/youtube.py b/youtube/youtube.py index c629bbb..c0be4fe 100644 --- a/youtube/youtube.py +++ b/youtube/youtube.py @@ -1,6 +1,7 @@ import mimetypes import urllib.parse import os +import re from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util, subscriptions import settings YOUTUBE_FILES = ( @@ -25,7 +26,8 @@ get_handlers = { 'delete_comment': post_comment.get_delete_comment_page, 'login': accounts.get_account_login_page, - 'subscriptions': subscriptions.get_subscriptions_page, + 'subscriptions': subscriptions.get_subscriptions_page, + 'subscription_manager': subscriptions.get_subscription_manager_page, } post_handlers = { 'edit_playlist': local_playlist.edit_playlist, @@ -37,6 +39,8 @@ post_handlers = { 'delete_comment': post_comment.delete_comment, 'subscriptions': subscriptions.post_subscriptions_page, + 'subscription_manager': subscriptions.post_subscription_manager_page, + 'import_subscriptions': subscriptions.import_subscriptions, } def youtube(env, start_response): @@ -90,9 +94,56 @@ def youtube(env, start_response): return channel.get_channel_page_general_url(env, start_response) elif method == "POST": - post_parameters = urllib.parse.parse_qs(env['wsgi.input'].read().decode()) - env['post_parameters'] = post_parameters - env['parameters'].update(post_parameters) + content_type = env['CONTENT_TYPE'] + if content_type == 'application/x-www-form-urlencoded': + post_parameters = urllib.parse.parse_qs(env['wsgi.input'].read().decode()) + env['post_parameters'] = post_parameters + env['parameters'].update(post_parameters) + + # Ugly hack that will be removed once I clean up this trainwreck and switch to a microframework + # Only supports a single file with no other fields + elif content_type.startswith('multipart/form-data'): + content = env['wsgi.input'].read() + + # find double line break + file_start = content.find(b'\r\n\r\n') + if file_start == -1: + start_response('400 Bad Request', ()) + return b'400 Bad Request' + + file_start += 4 + + lines = content[0:file_start].splitlines() + boundary = lines[0] + + file_end = content.find(boundary, file_start) + if file_end == -1: + start_response('400 Bad Request', ()) + return b'400 Bad Request' + file_end -= 2 # Subtract newlines + file = content[file_start:file_end] + + properties = dict() + for line in lines[1:]: + line = line.decode('utf-8') + colon = line.find(':') + if colon == -1: + continue + properties[line[0:colon]] = line[colon+2:] + + mime_type = properties['Content-Type'] + field_name = re.search(r'name="([^"]*)"' , properties['Content-Disposition']) + if field_name is None: + start_response('400 Bad Request', ()) + return b'400 Bad Request' + field_name = field_name.group(1) + + env['post_parameters'] = {field_name: (mime_type, file)} + env['parameters'][field_name] = (mime_type, file) + + else: + start_response('400 Bad Request', ()) + return b'400 Bad Request' try: handler = post_handlers[path_parts[0]] -- cgit v1.2.3 From c8be729e6ba706654849ed39e6238aebf85f1f36 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 8 Jun 2019 02:32:13 -0700 Subject: Reduce boilerplate in db functions with context managers --- youtube/subscriptions.py | 203 ++++++++++++++++------------------------------- 1 file changed, 70 insertions(+), 133 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 4591f32..a3af40f 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -8,6 +8,7 @@ import gevent import html import json import traceback +import contextlib with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) @@ -58,7 +59,8 @@ def open_database(): connection.close() raise - return connection + # https://stackoverflow.com/questions/19522505/using-sqlite3-in-python-with-with-keyword + return contextlib.closing(connection) def _subscribe(channels): ''' channels is a list of (channel_id, channel_name) ''' @@ -66,146 +68,88 @@ def _subscribe(channels): # set time_last_checked to 0 on all channels being subscribed to channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels) - connection = open_database() - try: - cursor = connection.cursor() - cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) - connection.commit() - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + cursor.executemany('''INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) + VALUES (?, ?, ?)''', channels) # TODO: delete thumbnails def _unsubscribe(channel_ids): ''' channel_ids is a list of channel_ids ''' - connection = open_database() - try: - cursor = connection.cursor() - cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) - connection.commit() - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) def _get_videos(number, offset): - connection = open_database() - try: - cursor = connection.cursor() - db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name - FROM videos - INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id - ORDER BY time_published DESC - LIMIT ? OFFSET ?''', (number, offset)) - - for db_video in db_videos: - yield { - 'id': db_video[0], - 'title': db_video[1], - 'duration': db_video[2], - 'author': db_video[3], - } - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name + FROM videos + INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id + ORDER BY time_published DESC + LIMIT ? OFFSET ?''', (number, offset)) + + for db_video in db_videos: + yield { + 'id': db_video[0], + 'title': db_video[1], + 'duration': db_video[2], + 'author': db_video[3], + } def _get_subscribed_channels(): - connection = open_database() - try: - cursor = connection.cursor() - for item in cursor.execute('''SELECT channel_name, yt_channel_id - FROM subscribed_channels - ORDER BY channel_name'''): - yield item - except: - connection.rollback() - raise - finally: - connection.close() - -def _add_tags(channel_ids, tags): - connection = open_database() - try: - cursor = connection.cursor() + with open_database() as connection: + with connection as cursor: + for item in cursor.execute('''SELECT channel_name, yt_channel_id + FROM subscribed_channels + ORDER BY channel_name'''): + yield item - pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] - - cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) - SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) - connection.commit() - except: - connection.rollback() - raise - finally: - connection.close() +def _add_tags(channel_ids, tags): + with open_database() as connection: + with connection as cursor: + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) + SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) def _remove_tags(channel_ids, tags): - connection = open_database() - try: - cursor = connection.cursor() + with open_database() as connection: + with connection as cursor: + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''DELETE FROM tag_associations + WHERE tag = ? AND sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', pairs) - pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] - - cursor.executemany('''DELETE FROM tag_associations WHERE tag = ? AND sql_channel_id = ( - SELECT id FROM subscribed_channels WHERE yt_channel_id = ? - )''', pairs) - connection.commit() - except: - connection.rollback() - raise - finally: - connection.close() def _get_tags(channel_id): - connection = open_database() - try: - cursor = connection.cursor() - - return [row[0] for row in cursor.execute('''SELECT tag - FROM tag_associations - WHERE sql_channel_id = ( - SELECT id FROM subscribed_channels WHERE yt_channel_id = ? - )''', (channel_id,))] - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + return [row[0] for row in cursor.execute('''SELECT tag + FROM tag_associations + WHERE sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', (channel_id,))] def _get_all_tags(): - connection = open_database() - try: - cursor = connection.cursor() - return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] def _get_channel_names(channel_ids): ''' returns list of (channel_id, channel_name) ''' - connection = open_database() - try: - cursor = connection.cursor() - result = [] - for channel_id in channel_ids: - row = cursor.execute('''SELECT channel_name FROM subscribed_channels WHERE yt_channel_id = ?''', (channel_id,)).fetchone() - result.append( (channel_id, row[0]) ) - return result - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + result = [] + for channel_id in channel_ids: + row = cursor.execute('''SELECT channel_name + FROM subscribed_channels + WHERE yt_channel_id = ?''', (channel_id,)).fetchone() + result.append( (channel_id, row[0]) ) + return result units = { @@ -464,20 +408,13 @@ def post_subscriptions_page(env, start_response): _unsubscribe(params['channel_id']) elif action == 'refresh': - connection = open_database() - try: - cursor = connection.cursor() - for sql_channel_id, yt_channel_id in cursor.execute('''SELECT id, yt_channel_id FROM subscribed_channels''').fetchall(): - db_videos = ( (sql_channel_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(yt_channel_id) ) - cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) - - cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) - connection.commit() - except: - connection.rollback() - raise - finally: - connection.close() + with open_database() as connection: + with connection as cursor: + for sql_channel_id, yt_channel_id in cursor.execute('''SELECT id, yt_channel_id FROM subscribed_channels''').fetchall(): + db_videos = ( (sql_channel_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(yt_channel_id) ) + cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) + + cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscriptions'),] ) return b'' -- cgit v1.2.3 From f5c76462d722d45645029754b4fc85252cf8212e Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 8 Jun 2019 02:35:21 -0700 Subject: Don't spam database with opening and closing when getting tags for many channels --- youtube/subscriptions.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index a3af40f..ff74d94 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -125,14 +125,12 @@ def _remove_tags(channel_ids, tags): -def _get_tags(channel_id): - with open_database() as connection: - with connection as cursor: - return [row[0] for row in cursor.execute('''SELECT tag - FROM tag_associations - WHERE sql_channel_id = ( - SELECT id FROM subscribed_channels WHERE yt_channel_id = ? - )''', (channel_id,))] +def _get_tags(cursor, channel_id): + return [row[0] for row in cursor.execute('''SELECT tag + FROM tag_associations + WHERE sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', (channel_id,))] def _get_all_tags(): with open_database() as connection: @@ -253,13 +251,15 @@ sub_list_item_template = Template(''' def get_subscription_manager_page(env, start_response): sub_list_html = '' - for channel_name, channel_id in _get_subscribed_channels(): - sub_list_html += sub_list_item_template.substitute( - channel_url = util.URL_ORIGIN + '/channel/' + channel_id, - channel_name = html.escape(channel_name), - channel_id = channel_id, - tags = ', '.join(_get_tags(channel_id)), - ) + with open_database() as connection: + with connection as cursor: + for channel_name, channel_id in _get_subscribed_channels(): + sub_list_html += sub_list_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + channel_id = channel_id, + tags = ', '.join(_get_tags(cursor, channel_id)), + ) -- cgit v1.2.3 From 27ee2990e97b1e73df41c1512332a1facd56f759 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 9 Jun 2019 16:03:20 -0700 Subject: Overhaul refresh system, make it asynchronous --- youtube/subscriptions.py | 144 +++++++++++++++++++++++++++++++++++++++-------- youtube/util.py | 54 +++++++++++++++++- 2 files changed, 171 insertions(+), 27 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index ff74d94..ba27655 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -26,7 +26,7 @@ database_path = os.path.join(settings.data_dir, "subscriptions.sqlite") def open_database(): if not os.path.exists(settings.data_dir): os.makedirs(settings.data_dir) - connection = sqlite3.connect(database_path) + connection = sqlite3.connect(database_path, check_same_thread=False) # Create tables if they don't exist try: @@ -172,17 +172,75 @@ def youtube_timestamp_to_posix(dumb_timestamp): unit = unit[:-1] # remove s from end return now - number*units[unit] + +try: + existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory)) +except FileNotFoundError: + existing_thumbnails = set() + + +thumbnails_queue = util.RateLimitedQueue() +check_channels_queue = util.RateLimitedQueue() + + # Use this to mark a thumbnail acceptable to be retrieved at the request of the browser +# can't simply check if it's in the queue because items are removed when the download starts, not when it finishes downloading_thumbnails = set() -def download_thumbnails(thumbnails_directory, thumbnails): - try: - g = gevent.spawn(util.download_thumbnails, thumbnails_directory, thumbnails) - g.join() - finally: - downloading_thumbnails.difference_update(thumbnails) + +checking_channels = set() + +# Just to use for printing channel checking status to console without opening database +channel_names = dict() + +def download_thumbnail_worker(): + while True: + video_id = thumbnails_queue.get() + try: + success = util.download_thumbnail(thumbnails_directory, video_id) + if success: + existing_thumbnails.add(video_id) + except Exception: + traceback.print_exc() + finally: + downloading_thumbnails.remove(video_id) + +def check_channel_worker(): + while True: + channel_id = check_channels_queue.get() + try: + _get_upstream_videos(channel_id) + finally: + checking_channels.remove(channel_id) + +for i in range(0,5): + gevent.spawn(download_thumbnail_worker) + gevent.spawn(check_channel_worker) + + + + + + +def download_thumbnails_if_necessary(thumbnails): + for video_id in thumbnails: + if video_id not in existing_thumbnails and video_id not in downloading_thumbnails: + downloading_thumbnails.add(video_id) + thumbnails_queue.put(video_id) + +def check_channels_if_necessary(channel_ids): + for channel_id in channel_ids: + if channel_id not in checking_channels: + checking_channels.add(channel_id) + check_channels_queue.put(channel_id) + def _get_upstream_videos(channel_id): + try: + print("Checking channel: " + channel_names[channel_id]) + except KeyError: + print("Checking channel " + channel_id) + videos = [] json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) @@ -190,23 +248,56 @@ def _get_upstream_videos(channel_id): info = yt_data_extract.renderer_info(json_video['gridVideoRenderer']) if 'description' not in info: info['description'] = '' - info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order - videos.append(info) + try: + info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order + except KeyError: + print(info) + videos.append((channel_id, info['id'], info['title'], info['duration'], info['time_published'], info['description'])) + + now = time.time() + download_thumbnails_if_necessary(video[1] for video in videos if (now - video[4]) < 30*24*3600) # Don't download thumbnails from videos older than a month + + with open_database() as connection: + with connection as cursor: + cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description) + VALUES ((SELECT id FROM subscribed_channels WHERE yt_channel_id=?), ?, ?, ?, ?, ?)''', videos) + cursor.execute('''UPDATE subscribed_channels + SET time_last_checked = ? + WHERE yt_channel_id=?''', [int(time.time()), channel_id]) - try: - existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory)) - except FileNotFoundError: - existing_thumbnails = set() - missing_thumbnails = set(video['id'] for video in videos) - existing_thumbnails - downloading_thumbnails.update(missing_thumbnails) - gevent.spawn(download_thumbnails, thumbnails_directory, missing_thumbnails) - return videos +def check_all_channels(): + with open_database() as connection: + with connection as cursor: + channel_id_name_list = cursor.execute('''SELECT yt_channel_id, channel_name FROM subscribed_channels''').fetchall() + channel_names.update(channel_id_name_list) + check_channels_if_necessary([item[0] for item in channel_id_name_list]) +def check_tags(tags): + channel_id_name_list = [] + with open_database() as connection: + with connection as cursor: + for tag in tags: + channel_id_name_list += cursor.execute('''SELECT yt_channel_id, channel_name + FROM subscribed_channels + WHERE subscribed_channels.id IN ( + SELECT tag_associations.sql_channel_id FROM tag_associations WHERE tag=? + )''', [tag]).fetchall() + channel_names.update(channel_id_name_list) + check_channels_if_necessary([item[0] for item in channel_id_name_list]) +def check_specific_channels(channel_ids): + with open_database() as connection: + with connection as cursor: + for channel_id in channel_ids: + channel_id_name_list += cursor.execute('''SELECT yt_channel_id, channel_name + FROM subscribed_channels + WHERE yt_channel_id=?''', [channel_id]).fetchall() + channel_names.update(channel_id_name_list) + check_channels_if_necessary(channel_ids) @@ -408,15 +499,18 @@ def post_subscriptions_page(env, start_response): _unsubscribe(params['channel_id']) elif action == 'refresh': - with open_database() as connection: - with connection as cursor: - for sql_channel_id, yt_channel_id in cursor.execute('''SELECT id, yt_channel_id FROM subscribed_channels''').fetchall(): - db_videos = ( (sql_channel_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(yt_channel_id) ) - cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) - - cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) + type = params['type'][0] + if type == 'all': + check_all_channels() + elif type == 'tag': + check_tags(params['tag_name']) + elif type == 'channel': + check_specific_channels(params['channel_id']) + else: + start_response('400 Bad Request', ()) + return b'400 Bad Request' - start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscriptions'),] ) + start_response('204 No Content', ()) return b'' else: start_response('400 Bad Request', ()) diff --git a/youtube/util.py b/youtube/util.py index 42d76a3..c4e1aff 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -7,6 +7,8 @@ import re import time import os import gevent +import gevent.queue +import gevent.lock # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -176,6 +178,53 @@ desktop_ua = (('User-Agent', desktop_user_agent),) +class RateLimitedQueue(gevent.queue.Queue): + ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. ''' + + def __init__(self, initial_burst=30, waiting_period=5, subsequent_bursts=10): + self.initial_burst = initial_burst + self.waiting_period = waiting_period + self.subsequent_bursts = subsequent_bursts + + self.count_since_last_wait = 0 + self.surpassed_initial = False + + self.lock = gevent.lock.BoundedSemaphore(1) + self.currently_empty = False + self.empty_start = 0 + gevent.queue.Queue.__init__(self) + + + def get(self): + self.lock.acquire() # blocks if another greenlet currently has the lock + if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 + + elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial: + self.surpassed_initial = True + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 + + self.count_since_last_wait += 1 + + if not self.currently_empty and self.empty(): + self.currently_empty = True + self.empty_start = time.monotonic() + + item = gevent.queue.Queue.get(self) # blocks when nothing left + + if self.currently_empty: + if time.monotonic() - self.empty_start >= self.waiting_period: + self.count_since_last_wait = 0 + self.surpassed_initial = False + + self.currently_empty = False + + self.lock.release() + + return item + def download_thumbnail(save_directory, video_id): @@ -185,14 +234,15 @@ def download_thumbnail(save_directory, video_id): thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) except urllib.error.HTTPError as e: print("Failed to download thumbnail for " + video_id + ": " + str(e)) - return + return False try: f = open(save_location, 'wb') except FileNotFoundError: - os.makedirs(save_directory) + os.makedirs(save_directory, exist_ok = True) f = open(save_location, 'wb') f.write(thumbnail) f.close() + return True def download_thumbnails(save_directory, ids): if not isinstance(ids, (list, tuple)): -- cgit v1.2.3 From 83f3dfaff54e27f59153980466685e77c4bf12cb Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 9 Jun 2019 18:01:46 -0700 Subject: simplify list_from_comma_separated_tags --- youtube/subscriptions.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index ba27655..be4ea1d 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -362,21 +362,7 @@ def get_subscription_manager_page(env, start_response): ).encode('utf-8') def list_from_comma_separated_tags(string): - tags = [] - prev_comma = -1 - next_comma = string.find(',') - while next_comma != -1: - tag = string[prev_comma+1:next_comma].strip() - if tag: - tags.append(tag) - - prev_comma = next_comma - next_comma = string.find(',', prev_comma+1) - - last_tag = string[prev_comma+1:].strip() - if last_tag: - tags.append(last_tag) - return tags + return [tag.strip() for tag in string.split(',') if tag.strip()] unsubscribe_list_item_template = Template(''' -- cgit v1.2.3 From f1cd3eb7519b10e55bb0a9780d34c1493efca176 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 9 Jun 2019 20:23:59 -0700 Subject: Support opml for subscriptions import --- youtube/subscriptions.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index be4ea1d..d43c8fd 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -9,6 +9,7 @@ import html import json import traceback import contextlib +import defusedxml.ElementTree with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) @@ -322,8 +323,27 @@ def import_subscriptions(env, start_response): traceback.print_exc() start_response('400 Bad Request', () ) return b'400 Bad Request: Unknown json structure' + elif content_type in ('application/xml', 'text/xml', 'text/x-opml'): + try: + root = defusedxml.ElementTree.fromstring(file) + assert root.tag == 'opml' + channels = [] + for outline_element in root[0][0]: + if (outline_element.tag != 'outline') or ('xmlUrl' not in outline_element.attrib): + continue + + + channel_name = outline_element.attrib['text'] + channel_rss_url = outline_element.attrib['xmlUrl'] + channel_id = channel_rss_url[channel_rss_url.find('channel_id=')+11:].strip() + channels.append( (channel_id, channel_name) ) + + except (AssertionError, IndexError, defusedxml.ElementTree.ParseError) as e: + start_response('400 Bad Request', () ) + return b'400 Bad Request: Unable to read opml xml file, or the file is not the expected format' else: - raise NotImplementedError() + start_response('400 Bad Request', () ) + return b'400 Bad Request: Unsupported file format: ' + html.escape(content_type).encode('utf-8') + b'. Only subscription.json files (from Google Takeouts) and XML OPML files exported from Youtube\'s subscription manager page are supported' _subscribe(channels) -- cgit v1.2.3 From d637f5b29c2153b0e4fde2f7183230100b2f6309 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Mon, 10 Jun 2019 02:29:55 -0700 Subject: Group by tags feature and improve appearance of sub list items --- youtube/subscriptions.py | 101 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 16 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index d43c8fd..7728c0c 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -151,6 +151,21 @@ def _get_channel_names(channel_ids): return result +def _channels_with_tag(cursor, tag, order=False): + ''' returns list of (channel_id, channel_name) ''' + + statement = '''SELECT yt_channel_id, channel_name + FROM subscribed_channels + WHERE subscribed_channels.id IN ( + SELECT tag_associations.sql_channel_id FROM tag_associations WHERE tag=? + ) + ''' + if order: + statement += '''ORDER BY channel_name''' + + return cursor.execute(statement, [tag]).fetchall() + + units = { 'year': 31536000, # 365*24*3600 'month': 2592000, # 30*24*3600 @@ -281,11 +296,8 @@ def check_tags(tags): with open_database() as connection: with connection as cursor: for tag in tags: - channel_id_name_list += cursor.execute('''SELECT yt_channel_id, channel_name - FROM subscribed_channels - WHERE subscribed_channels.id IN ( - SELECT tag_associations.sql_channel_id FROM tag_associations WHERE tag=? - )''', [tag]).fetchall() + channel_id_name_list += _channels_with_tag(cursor, tag) + channel_names.update(channel_id_name_list) check_channels_if_necessary([item[0] for item in channel_id_name_list]) @@ -354,30 +366,87 @@ def import_subscriptions(env, start_response): sub_list_item_template = Template('''
  • + $channel_name $tags -
  • ''') +tag_group_template = Template(''' +
  • +

    $tag

    +
      +$sub_list +
    +
  • +''') def get_subscription_manager_page(env, start_response): - - sub_list_html = '' with open_database() as connection: with connection as cursor: - for channel_name, channel_id in _get_subscribed_channels(): - sub_list_html += sub_list_item_template.substitute( - channel_url = util.URL_ORIGIN + '/channel/' + channel_id, - channel_name = html.escape(channel_name), - channel_id = channel_id, - tags = ', '.join(_get_tags(cursor, channel_id)), - ) + if env['parameters'].get('group_by_tags', '0')[0] == '1': + + sort_name = "Don't group" + sort_link = util.URL_ORIGIN + '/subscription_manager' + + main_list_html = '
      ' + for tag in _get_all_tags(): + sub_list_html = '' + for channel_id, channel_name in _channels_with_tag(cursor, tag, order=True): + sub_list_html += sub_list_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + channel_id = channel_id, + tags = ', '.join(t for t in _get_tags(cursor, channel_id) if t != tag), + ) + main_list_html += tag_group_template.substitute( + tag = tag, + sub_list = sub_list_html, + ) + + # Channels with no tags + channel_list = cursor.execute('''SELECT yt_channel_id, channel_name + FROM subscribed_channels + WHERE id NOT IN ( + SELECT sql_channel_id FROM tag_associations + ) + ORDER BY channel_name''').fetchall() + if channel_list: + sub_list_html = '' + for channel_id, channel_name in channel_list: + sub_list_html += sub_list_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + channel_id = channel_id, + tags = '', + ) + main_list_html += tag_group_template.substitute( + tag = "No tags", + sub_list = sub_list_html, + ) + main_list_html += '
    ' + + else: + + sort_name = "Group by tags" + sort_link = util.URL_ORIGIN + '/subscription_manager?group_by_tags=1' + + main_list_html = '
      ' + for channel_name, channel_id in _get_subscribed_channels(): + main_list_html += sub_list_item_template.substitute( + channel_url = util.URL_ORIGIN + '/channel/' + channel_id, + channel_name = html.escape(channel_name), + channel_id = channel_id, + tags = ', '.join(_get_tags(cursor, channel_id)), + ) + main_list_html += '
    ' start_response('200 OK', [('Content-type','text/html'),]) return subscription_manager_template.substitute( header = html_common.get_header(), - sub_list = sub_list_html, + main_list = main_list_html, + sort_name = sort_name, + sort_link = sort_link, page_buttons = '', ).encode('utf-8') -- cgit v1.2.3 From 9da073000a54fba8f6d38dc37bfcfd9ca0a60d9a Mon Sep 17 00:00:00 2001 From: James Taylor Date: Mon, 10 Jun 2019 15:27:17 -0700 Subject: Use a single cursor for db operations when possible instead of reopening multiple times --- youtube/subscriptions.py | 168 ++++++++++++++++++++++------------------------- 1 file changed, 80 insertions(+), 88 deletions(-) (limited to 'youtube') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 7728c0c..5707286 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -63,66 +63,60 @@ def open_database(): # https://stackoverflow.com/questions/19522505/using-sqlite3-in-python-with-with-keyword return contextlib.closing(connection) -def _subscribe(channels): +def with_open_db(function, *args, **kwargs): + with open_database() as connection: + with connection as cursor: + return function(cursor, *args, **kwargs) + + +def _subscribe(cursor, channels): ''' channels is a list of (channel_id, channel_name) ''' # set time_last_checked to 0 on all channels being subscribed to channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels) - with open_database() as connection: - with connection as cursor: - cursor.executemany('''INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) - VALUES (?, ?, ?)''', channels) + cursor.executemany('''INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) + VALUES (?, ?, ?)''', channels) # TODO: delete thumbnails -def _unsubscribe(channel_ids): +def _unsubscribe(cursor, channel_ids): ''' channel_ids is a list of channel_ids ''' - with open_database() as connection: - with connection as cursor: - cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) + cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) -def _get_videos(number, offset): - with open_database() as connection: - with connection as cursor: - db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name - FROM videos - INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id - ORDER BY time_published DESC - LIMIT ? OFFSET ?''', (number, offset)) - - for db_video in db_videos: - yield { - 'id': db_video[0], - 'title': db_video[1], - 'duration': db_video[2], - 'author': db_video[3], - } - -def _get_subscribed_channels(): - with open_database() as connection: - with connection as cursor: - for item in cursor.execute('''SELECT channel_name, yt_channel_id - FROM subscribed_channels - ORDER BY channel_name'''): - yield item +def _get_videos(cursor, number, offset): + db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name + FROM videos + INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id + ORDER BY time_published DESC + LIMIT ? OFFSET ?''', (number, offset)) + for db_video in db_videos: + yield { + 'id': db_video[0], + 'title': db_video[1], + 'duration': db_video[2], + 'author': db_video[3], + } -def _add_tags(channel_ids, tags): - with open_database() as connection: - with connection as cursor: - pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] - cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) - SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) +def _get_subscribed_channels(cursor): + for item in cursor.execute('''SELECT channel_name, yt_channel_id + FROM subscribed_channels + ORDER BY channel_name'''): + yield item -def _remove_tags(channel_ids, tags): - with open_database() as connection: - with connection as cursor: - pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] - cursor.executemany('''DELETE FROM tag_associations - WHERE tag = ? AND sql_channel_id = ( - SELECT id FROM subscribed_channels WHERE yt_channel_id = ? - )''', pairs) +def _add_tags(cursor, channel_ids, tags): + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) + SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) + + +def _remove_tags(cursor, channel_ids, tags): + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''DELETE FROM tag_associations + WHERE tag = ? AND sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', pairs) @@ -133,22 +127,18 @@ def _get_tags(cursor, channel_id): SELECT id FROM subscribed_channels WHERE yt_channel_id = ? )''', (channel_id,))] -def _get_all_tags(): - with open_database() as connection: - with connection as cursor: - return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] +def _get_all_tags(cursor): + return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] -def _get_channel_names(channel_ids): +def _get_channel_names(cursor, channel_ids): ''' returns list of (channel_id, channel_name) ''' - with open_database() as connection: - with connection as cursor: - result = [] - for channel_id in channel_ids: - row = cursor.execute('''SELECT channel_name - FROM subscribed_channels - WHERE yt_channel_id = ?''', (channel_id,)).fetchone() - result.append( (channel_id, row[0]) ) - return result + result = [] + for channel_id in channel_ids: + row = cursor.execute('''SELECT channel_name + FROM subscribed_channels + WHERE yt_channel_id = ?''', (channel_id,)).fetchone() + result.append( (channel_id, row[0]) ) + return result def _channels_with_tag(cursor, tag, order=False): @@ -357,7 +347,7 @@ def import_subscriptions(env, start_response): start_response('400 Bad Request', () ) return b'400 Bad Request: Unsupported file format: ' + html.escape(content_type).encode('utf-8') + b'. Only subscription.json files (from Google Takeouts) and XML OPML files exported from Youtube\'s subscription manager page are supported' - _subscribe(channels) + with_open_db(_subscribe, channels) start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscription_manager'),] ) return b'' @@ -388,7 +378,7 @@ def get_subscription_manager_page(env, start_response): sort_link = util.URL_ORIGIN + '/subscription_manager' main_list_html = '