diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/channel.py | 14 | ||||
-rw-r--r-- | youtube/local_playlist.py | 30 | ||||
-rw-r--r-- | youtube/static/shared.css | 5 | ||||
-rw-r--r-- | youtube/subscriptions.py | 723 | ||||
-rw-r--r-- | youtube/templates/channel.html | 9 | ||||
-rw-r--r-- | youtube/templates/subscription_manager.html | 142 | ||||
-rw-r--r-- | youtube/templates/subscriptions.html | 116 | ||||
-rw-r--r-- | youtube/templates/unsubscribe_verify.html | 19 | ||||
-rw-r--r-- | youtube/util.py | 81 |
9 files changed, 1094 insertions, 45 deletions
diff --git a/youtube/channel.py b/youtube/channel.py index 4c7d380..de75eaa 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -1,5 +1,5 @@ import base64 -from youtube import util, yt_data_extract, local_playlist +from youtube import util, yt_data_extract, local_playlist, subscriptions from youtube import yt_app import urllib @@ -83,13 +83,15 @@ def channel_ctoken(channel_id, page, sort, tab, view=1): return base64.urlsafe_b64encode(pointless_nest).decode('ascii') -def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): +def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True): ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D') url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken - print("Sending channel tab ajax request") + if print_status: + print("Sending channel tab ajax request") content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab') - print("Finished recieving channel tab response") + if print_status: + print("Finished recieving channel tab response") return content @@ -312,7 +314,7 @@ def get_channel_page(channel_id, tab='videos'): info['current_sort'] = sort elif tab == 'search': info['search_box_value'] = query - + info['subscribed'] = subscriptions.is_subscribed(info['channel_id']) return flask.render_template('channel.html', parameters_dictionary = request.args, @@ -352,7 +354,7 @@ def get_channel_page_general_url(base_url, tab, request): info['current_sort'] = sort elif tab == 'search': info['search_box_value'] = query - + info['subscribed'] = subscriptions.is_subscribed(info['channel_id']) return flask.render_template('channel.html', parameters_dictionary = request.args, diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py index bb05d1a..88d020f 100644 --- a/youtube/local_playlist.py +++ b/youtube/local_playlist.py @@ -34,33 +34,7 @@ def add_to_playlist(name, video_info_list): if id not in ids: file.write(info + "\n") missing_thumbnails.append(id) - gevent.spawn(download_thumbnails, name, missing_thumbnails) - -def download_thumbnail(playlist_name, video_id): - url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" - save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg") - try: - thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id) - except urllib.error.HTTPError as e: - print("Failed to download thumbnail for " + video_id + ": " + str(e)) - return - try: - f = open(save_location, 'wb') - except FileNotFoundError: - os.makedirs(os.path.join(thumbnails_directory, playlist_name)) - f = open(save_location, 'wb') - f.write(thumbnail) - f.close() - -def download_thumbnails(playlist_name, ids): - # only do 5 at a time - # do the n where n is divisible by 5 - i = -1 - for i in range(0, int(len(ids)/5) - 1 ): - gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5, i*5 + 5)]) - # do the remainders (< 5) - gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5 + 5, len(ids))]) - + gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) def get_local_playlist_videos(name, offset=0, amount=50): @@ -89,7 +63,7 @@ def get_local_playlist_videos(name, offset=0, amount=50): except json.decoder.JSONDecodeError: if not video_json.strip() == '': print('Corrupt playlist video entry: ' + video_json) - gevent.spawn(download_thumbnails, name, missing_thumbnails) + gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) return videos[offset:offset+amount], len(videos) def get_playlist_names(): diff --git a/youtube/static/shared.css b/youtube/static/shared.css index a360972..848b8da 100644 --- a/youtube/static/shared.css +++ b/youtube/static/shared.css @@ -1,7 +1,10 @@ +* { + box-sizing: border-box; +} + h1, h2, h3, h4, h5, h6, div, button{ margin:0; padding:0; - } diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 47f1ea3..739b2c5 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -1,18 +1,721 @@ +from youtube import util, yt_data_extract, channel +from youtube import yt_app +import settings + +import sqlite3 +import os +import time +import gevent +import json +import traceback +import contextlib +import defusedxml.ElementTree import urllib +import math +import secrets + +import flask +from flask import request + + +thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails") + +# https://stackabuse.com/a-sqlite-tutorial-with-python/ + +database_path = os.path.join(settings.data_dir, "subscriptions.sqlite") + +def open_database(): + if not os.path.exists(settings.data_dir): + os.makedirs(settings.data_dir) + connection = sqlite3.connect(database_path, check_same_thread=False) + + try: + cursor = connection.cursor() + cursor.execute('''PRAGMA foreign_keys = 1''') + # Create tables if they don't exist + cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( + id integer PRIMARY KEY, + yt_channel_id text UNIQUE NOT NULL, + channel_name text NOT NULL, + time_last_checked integer, + next_check_time integer, + muted integer DEFAULT 0 + )''') + cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( + id integer PRIMARY KEY, + sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, + video_id text UNIQUE NOT NULL, + title text NOT NULL, + duration text, + time_published integer NOT NULL, + description text + )''') + cursor.execute('''CREATE TABLE IF NOT EXISTS tag_associations ( + id integer PRIMARY KEY, + tag text NOT NULL, + sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, + UNIQUE(tag, sql_channel_id) + )''') + + connection.commit() + except: + connection.rollback() + connection.close() + raise + + # https://stackoverflow.com/questions/19522505/using-sqlite3-in-python-with-with-keyword + return contextlib.closing(connection) + +def with_open_db(function, *args, **kwargs): + with open_database() as connection: + with connection as cursor: + return function(cursor, *args, **kwargs) + +def is_subscribed(channel_id): + if not os.path.exists(database_path): + return False + + with open_database() as connection: + with connection as cursor: + result = cursor.execute('''SELECT EXISTS( + SELECT 1 + FROM subscribed_channels + WHERE yt_channel_id=? + LIMIT 1 + )''', [channel_id]).fetchone() + return bool(result[0]) + + +def _subscribe(cursor, channels): + ''' channels is a list of (channel_id, channel_name) ''' + + # set time_last_checked to 0 on all channels being subscribed to + channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels) + + cursor.executemany('''INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) + VALUES (?, ?, ?)''', channels) + + +def delete_thumbnails(to_delete): + for thumbnail in to_delete: + try: + video_id = thumbnail[0:-4] + if video_id in existing_thumbnails: + os.remove(os.path.join(thumbnails_directory, thumbnail)) + existing_thumbnails.remove(video_id) + except Exception: + print('Failed to delete thumbnail: ' + thumbnail) + traceback.print_exc() + +def _unsubscribe(cursor, channel_ids): + ''' channel_ids is a list of channel_ids ''' + to_delete = [] + for channel_id in channel_ids: + rows = cursor.execute('''SELECT video_id + FROM videos + WHERE sql_channel_id = ( + SELECT id + FROM subscribed_channels + WHERE yt_channel_id=? + )''', (channel_id,)).fetchall() + to_delete += [row[0] + '.jpg' for row in rows] + + gevent.spawn(delete_thumbnails, to_delete) + cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) + +def _get_videos(cursor, number_per_page, offset, tag = None): + '''Returns a full page of videos with an offset, and a value good enough to be used as the total number of videos''' + # We ask for the next 9 pages from the database + # Then the actual length of the results tell us if there are more than 9 pages left, and if not, how many there actually are + # This is done since there are only 9 page buttons on display at a time + # If there are more than 9 pages left, we give a fake value in place of the real number of results if the entire database was queried without limit + # This fake value is sufficient to get the page button generation macro to display 9 page buttons + # If we wish to display more buttons this logic must change + # We cannot use tricks with the sql id for the video since we frequently have filters and other restrictions in place on the results anyway + # TODO: This is probably not the ideal solution + if tag is not None: + db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name + FROM videos + INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id + INNER JOIN tag_associations on videos.sql_channel_id = tag_associations.sql_channel_id + WHERE tag = ? + ORDER BY time_published DESC + LIMIT ? OFFSET ?''', (tag, number_per_page*9, offset)).fetchall() + else: + db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name + FROM videos + INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id + ORDER BY time_published DESC + LIMIT ? OFFSET ?''', (number_per_page*9, offset)).fetchall() + + pseudo_number_of_videos = offset + len(db_videos) + + videos = [] + for db_video in db_videos[0:number_per_page]: + videos.append({ + 'id': db_video[0], + 'title': db_video[1], + 'duration': db_video[2], + 'author': db_video[3], + }) + + return videos, pseudo_number_of_videos + + + + +def _get_subscribed_channels(cursor): + for item in cursor.execute('''SELECT channel_name, yt_channel_id, muted + FROM subscribed_channels + ORDER BY channel_name COLLATE NOCASE'''): + yield item + + +def _add_tags(cursor, channel_ids, tags): + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) + SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) + + +def _remove_tags(cursor, channel_ids, tags): + pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] + cursor.executemany('''DELETE FROM tag_associations + WHERE tag = ? AND sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', pairs) + + + +def _get_tags(cursor, channel_id): + return [row[0] for row in cursor.execute('''SELECT tag + FROM tag_associations + WHERE sql_channel_id = ( + SELECT id FROM subscribed_channels WHERE yt_channel_id = ? + )''', (channel_id,))] + +def _get_all_tags(cursor): + return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] + +def _get_channel_names(cursor, channel_ids): + ''' returns list of (channel_id, channel_name) ''' + result = [] + for channel_id in channel_ids: + row = cursor.execute('''SELECT channel_name + FROM subscribed_channels + WHERE yt_channel_id = ?''', (channel_id,)).fetchone() + result.append( (channel_id, row[0]) ) + return result + + +def _channels_with_tag(cursor, tag, order=False, exclude_muted=False, include_muted_status=False): + ''' returns list of (channel_id, channel_name) ''' + + statement = '''SELECT yt_channel_id, channel_name''' + + if include_muted_status: + statement += ''', muted''' + + statement += ''' + FROM subscribed_channels + WHERE subscribed_channels.id IN ( + SELECT tag_associations.sql_channel_id FROM tag_associations WHERE tag=? + ) + ''' + if exclude_muted: + statement += '''AND muted != 1\n''' + if order: + statement += '''ORDER BY channel_name COLLATE NOCASE''' + + return cursor.execute(statement, [tag]).fetchall() + +def _schedule_checking(cursor, channel_id, next_check_time): + cursor.execute('''UPDATE subscribed_channels SET next_check_time = ? WHERE yt_channel_id = ?''', [int(next_check_time), channel_id]) + +def _is_muted(cursor, channel_id): + return bool(cursor.execute('''SELECT muted FROM subscribed_channels WHERE yt_channel_id=?''', [channel_id]).fetchone()[0]) + +units = { + 'year': 31536000, # 365*24*3600 + 'month': 2592000, # 30*24*3600 + 'week': 604800, # 7*24*3600 + 'day': 86400, # 24*3600 + 'hour': 3600, + 'minute': 60, + 'second': 1, +} +def youtube_timestamp_to_posix(dumb_timestamp): + ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago, + approximates the unix time (seconds since 1/1/1970) ''' + dumb_timestamp = dumb_timestamp.lower() + now = time.time() + if dumb_timestamp == "just now": + return now + split = dumb_timestamp.split(' ') + number, unit = int(split[0]), split[1] + if number > 1: + unit = unit[:-1] # remove s from end + return now - number*units[unit] + + +try: + existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory)) +except FileNotFoundError: + existing_thumbnails = set() + + +# --- Manual checking system. Rate limited in order to support very large numbers of channels to be checked --- +# Auto checking system plugs into this for convenience, though it doesn't really need the rate limiting + +check_channels_queue = util.RateLimitedQueue() +checking_channels = set() + +# Just to use for printing channel checking status to console without opening database +channel_names = dict() + +def check_channel_worker(): + while True: + channel_id = check_channels_queue.get() + try: + _get_upstream_videos(channel_id) + finally: + checking_channels.remove(channel_id) + +for i in range(0,5): + gevent.spawn(check_channel_worker) +# ---------------------------- + + + +# --- Auto checking system --- + +if settings.autocheck_subscriptions: + # job application format: dict with keys (channel_id, channel_name, next_check_time) + autocheck_job_application = gevent.queue.Queue() # only really meant to hold 1 item, just reusing gevent's wait and timeout machinery + + autocheck_jobs = [] # list of dicts with the keys (channel_id, channel_name, next_check_time). Stores all the channels that need to be autochecked and when to check them + with open_database() as connection: + with connection as cursor: + now = time.time() + for row in cursor.execute('''SELECT yt_channel_id, channel_name, next_check_time FROM subscribed_channels WHERE next_check_time IS NOT NULL AND muted != 1''').fetchall(): + if row[2] < now: # expired, check randomly within the 30 minutes + next_check_time = now + 3600*secrets.randbelow(60)/60 + row = (row[0], row[1], next_check_time) + _schedule_checking(cursor, row[0], next_check_time) + autocheck_jobs.append({'channel_id': row[0], 'channel_name': row[1], 'next_check_time': row[2]}) + + + + def autocheck_dispatcher(): + '''Scans the auto_check_list. Sleeps until the earliest job is due, then adds that channel to the checking queue above. Can be sent a new job through autocheck_job_application''' + while True: + if len(autocheck_jobs) == 0: + new_job = autocheck_job_application.get() + autocheck_jobs.append(new_job) + else: + earliest_job_index = min(range(0, len(autocheck_jobs)), key=lambda index: autocheck_jobs[index]['next_check_time']) # https://stackoverflow.com/a/11825864 + earliest_job = autocheck_jobs[earliest_job_index] + time_until_earliest_job = earliest_job['next_check_time'] - time.time() + + if time_until_earliest_job <= 0: + print('ERROR: autocheck_dispatcher got job scheduled in the past, skipping and rescheduling: ' + earliest_job['channel_id'] + ', ' + earliest_job['channel_name'] + ', ' + str(earliest_job['next_check_time'])) + next_check_time = time.time() + 3600*secrets.randbelow(60)/60 + with_open_db(_schedule_checking, earliest_job['channel_id'], next_check_time) + autocheck_jobs[earliest_job_index]['next_check_time'] = next_check_time + continue + + # make sure it's not muted + if with_open_db(_is_muted, earliest_job['channel_id']): + del autocheck_jobs[earliest_job_index] + continue + + try: + new_job = autocheck_job_application.get(timeout = time_until_earliest_job) # sleep for time_until_earliest_job time, but allow to be interrupted by new jobs + except gevent.queue.Empty: # no new jobs, time to execute the earliest job + channel_names[earliest_job['channel_id']] = earliest_job['channel_name'] + checking_channels.add(earliest_job['channel_id']) + check_channels_queue.put(earliest_job['channel_id']) + del autocheck_jobs[earliest_job_index] + else: # new job, add it to the list + autocheck_jobs.append(new_job) + + + gevent.spawn(autocheck_dispatcher) +# ---------------------------- + -with open("subscriptions.txt", 'r', encoding='utf-8') as file: - subscriptions = file.read() - -# Line format: "channel_id channel_name" -# Example: -# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown -subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines()) +def check_channels_if_necessary(channel_ids): + for channel_id in channel_ids: + if channel_id not in checking_channels: + checking_channels.add(channel_id) + check_channels_queue.put(channel_id) -def get_new_videos(): - for channel_id, channel_name in subscriptions: - +def _get_upstream_videos(channel_id): + try: + channel_status_name = channel_names[channel_id] + except KeyError: + channel_status_name = channel_id + print("Checking channel: " + channel_status_name) + + videos = [] + + channel_videos = channel.extract_info(json.loads(channel.get_channel_tab(channel_id, print_status=False)), 'videos')['items'] + for i, video_item in enumerate(channel_videos): + if 'description' not in video_item: + video_item['description'] = '' + try: + video_item['time_published'] = youtube_timestamp_to_posix(video_item['published']) - i # subtract a few seconds off the videos so they will be in the right order + except KeyError: + print(video_item) + videos.append((channel_id, video_item['id'], video_item['title'], video_item['duration'], video_item['time_published'], video_item['description'])) + + + if len(videos) == 0: + average_upload_period = 4*7*24*3600 # assume 1 month for channel with no videos + elif len(videos) < 5: + average_upload_period = int((time.time() - videos[len(videos)-1][4])/len(videos)) + else: + average_upload_period = int((time.time() - videos[4][4])/5) # equivalent to averaging the time between videos for the last 5 videos + + # calculate when to check next for auto checking + # add some quantization and randomness to make pattern analysis by Youtube slightly harder + quantized_upload_period = average_upload_period - (average_upload_period % (4*3600)) + 4*3600 # round up to nearest 4 hours + randomized_upload_period = quantized_upload_period*(1 + secrets.randbelow(50)/50*0.5) # randomly between 1x and 1.5x + next_check_delay = randomized_upload_period/10 # check at 10x the channel posting rate. might want to fine tune this number + next_check_time = int(time.time() + next_check_delay) + + with open_database() as connection: + with connection as cursor: + # calculate how many new videos there are + row = cursor.execute('''SELECT video_id + FROM videos + INNER JOIN subscribed_channels ON videos.sql_channel_id = subscribed_channels.id + WHERE yt_channel_id=? + ORDER BY time_published DESC + LIMIT 1''', [channel_id]).fetchone() + if row is None: + number_of_new_videos = len(videos) + else: + latest_video_id = row[0] + index = 0 + for video in videos: + if video[1] == latest_video_id: + break + index += 1 + number_of_new_videos = index + + cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description) + VALUES ((SELECT id FROM subscribed_channels WHERE yt_channel_id=?), ?, ?, ?, ?, ?)''', videos) + cursor.execute('''UPDATE subscribed_channels + SET time_last_checked = ?, next_check_time = ? + WHERE yt_channel_id=?''', [int(time.time()), next_check_time, channel_id]) + + if settings.autocheck_subscriptions: + if not _is_muted(cursor, channel_id): + autocheck_job_application.put({'channel_id': channel_id, 'channel_name': channel_names[channel_id], 'next_check_time': next_check_time}) + + if number_of_new_videos == 0: + print('No new videos from ' + channel_status_name) + elif number_of_new_videos == 1: + print('1 new video from ' + channel_status_name) + else: + print(str(number_of_new_videos) + ' new videos from ' + channel_status_name) + + + +def check_all_channels(): + with open_database() as connection: + with connection as cursor: + channel_id_name_list = cursor.execute('''SELECT yt_channel_id, channel_name + FROM subscribed_channels + WHERE muted != 1''').fetchall() + + channel_names.update(channel_id_name_list) + check_channels_if_necessary([item[0] for item in channel_id_name_list]) + + +def check_tags(tags): + channel_id_name_list = [] + with open_database() as connection: + with connection as cursor: + for tag in tags: + channel_id_name_list += _channels_with_tag(cursor, tag, exclude_muted=True) + + channel_names.update(channel_id_name_list) + check_channels_if_necessary([item[0] for item in channel_id_name_list]) + + +def check_specific_channels(channel_ids): + with open_database() as connection: + with connection as cursor: + channel_id_name_list = [] + for channel_id in channel_ids: + channel_id_name_list += cursor.execute('''SELECT yt_channel_id, channel_name + FROM subscribed_channels + WHERE yt_channel_id=?''', [channel_id]).fetchall() + channel_names.update(channel_id_name_list) + check_channels_if_necessary(channel_ids) + + + +@yt_app.route('/import_subscriptions', methods=['POST']) +def import_subscriptions(): + + # check if the post request has the file part + if 'subscriptions_file' not in request.files: + #flash('No file part') + return flask.redirect(util.URL_ORIGIN + request.full_path) + file = request.files['subscriptions_file'] + # if user does not select file, browser also + # submit an empty part without filename + if file.filename == '': + #flash('No selected file') + return flask.redirect(util.URL_ORIGIN + request.full_path) + + + mime_type = file.mimetype + + if mime_type == 'application/json': + file = file.read().decode('utf-8') + try: + file = json.loads(file) + except json.decoder.JSONDecodeError: + traceback.print_exc() + return '400 Bad Request: Invalid json file', 400 + + try: + channels = ( (item['snippet']['resourceId']['channelId'], item['snippet']['title']) for item in file) + except (KeyError, IndexError): + traceback.print_exc() + return '400 Bad Request: Unknown json structure', 400 + elif mime_type in ('application/xml', 'text/xml', 'text/x-opml'): + file = file.read().decode('utf-8') + try: + root = defusedxml.ElementTree.fromstring(file) + assert root.tag == 'opml' + channels = [] + for outline_element in root[0][0]: + if (outline_element.tag != 'outline') or ('xmlUrl' not in outline_element.attrib): + continue + + + channel_name = outline_element.attrib['text'] + channel_rss_url = outline_element.attrib['xmlUrl'] + channel_id = channel_rss_url[channel_rss_url.find('channel_id=')+11:].strip() + channels.append( (channel_id, channel_name) ) + + except (AssertionError, IndexError, defusedxml.ElementTree.ParseError) as e: + return '400 Bad Request: Unable to read opml xml file, or the file is not the expected format', 400 + else: + return '400 Bad Request: Unsupported file format: ' + mime_type + '. Only subscription.json files (from Google Takeouts) and XML OPML files exported from Youtube\'s subscription manager page are supported', 400 + + with_open_db(_subscribe, channels) + + return flask.redirect(util.URL_ORIGIN + '/subscription_manager', 303) + + + +@yt_app.route('/subscription_manager', methods=['GET']) +def get_subscription_manager_page(): + group_by_tags = request.args.get('group_by_tags', '0') == '1' + with open_database() as connection: + with connection as cursor: + if group_by_tags: + tag_groups = [] + + for tag in _get_all_tags(cursor): + sub_list = [] + for channel_id, channel_name, muted in _channels_with_tag(cursor, tag, order=True, include_muted_status=True): + sub_list.append({ + 'channel_url': util.URL_ORIGIN + '/channel/' + channel_id, + 'channel_name': channel_name, + 'channel_id': channel_id, + 'muted': muted, + 'tags': [t for t in _get_tags(cursor, channel_id) if t != tag], + }) + + tag_groups.append( (tag, sub_list) ) + + # Channels with no tags + channel_list = cursor.execute('''SELECT yt_channel_id, channel_name, muted + FROM subscribed_channels + WHERE id NOT IN ( + SELECT sql_channel_id FROM tag_associations + ) + ORDER BY channel_name COLLATE NOCASE''').fetchall() + if channel_list: + sub_list = [] + for channel_id, channel_name, muted in channel_list: + sub_list.append({ + 'channel_url': util.URL_ORIGIN + '/channel/' + channel_id, + 'channel_name': channel_name, + 'channel_id': channel_id, + 'muted': muted, + 'tags': [], + }) + + tag_groups.append( ('No tags', sub_list) ) + else: + sub_list = [] + for channel_name, channel_id, muted in _get_subscribed_channels(cursor): + sub_list.append({ + 'channel_url': util.URL_ORIGIN + '/channel/' + channel_id, + 'channel_name': channel_name, + 'channel_id': channel_id, + 'muted': muted, + 'tags': _get_tags(cursor, channel_id), + }) + + + + + if group_by_tags: + return flask.render_template('subscription_manager.html', + group_by_tags = True, + tag_groups = tag_groups, + ) + else: + return flask.render_template('subscription_manager.html', + group_by_tags = False, + sub_list = sub_list, + ) + +def list_from_comma_separated_tags(string): + return [tag.strip() for tag in string.split(',') if tag.strip()] + + +@yt_app.route('/subscription_manager', methods=['POST']) +def post_subscription_manager_page(): + action = request.values['action'] + + with open_database() as connection: + with connection as cursor: + if action == 'add_tags': + _add_tags(cursor, request.values.getlist('channel_ids'), [tag.lower() for tag in list_from_comma_separated_tags(request.values['tags'])]) + elif action == 'remove_tags': + _remove_tags(cursor, request.values.getlist('channel_ids'), [tag.lower() for tag in list_from_comma_separated_tags(request.values['tags'])]) + elif action == 'unsubscribe': + _unsubscribe(cursor, request.values.getlist('channel_ids')) + elif action == 'unsubscribe_verify': + unsubscribe_list = _get_channel_names(cursor, request.values.getlist('channel_ids')) + return flask.render_template('unsubscribe_verify.html', unsubscribe_list = unsubscribe_list) + + elif action == 'mute': + cursor.executemany('''UPDATE subscribed_channels + SET muted = 1 + WHERE yt_channel_id = ?''', [(ci,) for ci in request.values.getlist('channel_ids')]) + elif action == 'unmute': + cursor.executemany('''UPDATE subscribed_channels + SET muted = 0 + WHERE yt_channel_id = ?''', [(ci,) for ci in request.values.getlist('channel_ids')]) + else: + flask.abort(400) + + return flask.redirect(util.URL_ORIGIN + request.full_path, 303) + +@yt_app.route('/subscriptions', methods=['GET']) +@yt_app.route('/feed/subscriptions', methods=['GET']) def get_subscriptions_page(): + page = int(request.args.get('page', 1)) + with open_database() as connection: + with connection as cursor: + tag = request.args.get('tag', None) + videos, number_of_videos_in_db = _get_videos(cursor, 60, (page - 1)*60, tag) + for video in videos: + video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg' + video['type'] = 'video' + video['item_size'] = 'small' + yt_data_extract.add_extra_html_info(video) + + tags = _get_all_tags(cursor) + + + subscription_list = [] + for channel_name, channel_id, muted in _get_subscribed_channels(cursor): + subscription_list.append({ + 'channel_url': util.URL_ORIGIN + '/channel/' + channel_id, + 'channel_name': channel_name, + 'channel_id': channel_id, + 'muted': muted, + }) + + return flask.render_template('subscriptions.html', + videos = videos, + num_pages = math.ceil(number_of_videos_in_db/60), + parameters_dictionary = request.args, + tags = tags, + current_tag = tag, + subscription_list = subscription_list, + ) + +@yt_app.route('/subscriptions', methods=['POST']) +@yt_app.route('/feed/subscriptions', methods=['POST']) +def post_subscriptions_page(): + action = request.values['action'] + if action == 'subscribe': + if len(request.values.getlist('channel_id')) != len(request.values.getlist('channel_name')): + return '400 Bad Request, length of channel_id != length of channel_name', 400 + with_open_db(_subscribe, zip(request.values.getlist('channel_id'), request.values.getlist('channel_name'))) + + elif action == 'unsubscribe': + with_open_db(_unsubscribe, request.values.getlist('channel_id')) + + elif action == 'refresh': + type = request.values['type'] + if type == 'all': + check_all_channels() + elif type == 'tag': + check_tags(request.values.getlist('tag_name')) + elif type == 'channel': + check_specific_channels(request.values.getlist('channel_id')) + else: + flask.abort(400) + else: + flask.abort(400) + + return '', 204 + + +@yt_app.route('/data/subscription_thumbnails/<thumbnail>') +def serve_subscription_thumbnail(thumbnail): + '''Serves thumbnail from disk if it's been saved already. If not, downloads the thumbnail, saves to disk, and serves it.''' + assert thumbnail[-4:] == '.jpg' + video_id = thumbnail[0:-4] + thumbnail_path = os.path.join(thumbnails_directory, thumbnail) + + if video_id in existing_thumbnails: + try: + f = open(thumbnail_path, 'rb') + except FileNotFoundError: + existing_thumbnails.remove(video_id) + else: + image = f.read() + f.close() + return flask.Response(image, mimetype='image/jpeg') + + url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + try: + image = util.fetch_url(url, report_text="Saved thumbnail: " + video_id) + except urllib.error.HTTPError as e: + print("Failed to download thumbnail for " + video_id + ": " + str(e)) + abort(e.code) + try: + f = open(thumbnail_path, 'wb') + except FileNotFoundError: + os.makedirs(thumbnails_directory, exist_ok = True) + f = open(thumbnail_path, 'wb') + f.write(image) + f.close() + existing_thumbnails.add(video_id) + + return flask.Response(image, mimetype='image/jpeg') + + + + + + + diff --git a/youtube/templates/channel.html b/youtube/templates/channel.html index 069e33b..a9f3ac9 100644 --- a/youtube/templates/channel.html +++ b/youtube/templates/channel.html @@ -23,6 +23,9 @@ grid-column:2; margin-left: 5px; } + .summary subscribe-unsubscribe, .summary short-description{ + margin-top: 10px; + } main .channel-tabs{ grid-row:2; grid-column: 1 / span 2; @@ -90,6 +93,12 @@ <div class="summary"> <h2 class="title">{{ channel_name }}</h2> <p class="short-description">{{ short_description }}</p> + <form method="POST" action="/youtube.com/subscriptions" class="subscribe-unsubscribe"> + <input type="submit" value="{{ 'Unsubscribe' if subscribed else 'Subscribe' }}"> + <input type="hidden" name="channel_id" value="{{ channel_id }}"> + <input type="hidden" name="channel_name" value="{{ channel_name }}"> + <input type="hidden" name="action" value="{{ 'unsubscribe' if subscribed else 'subscribe' }}"> + </form> </div> <nav class="channel-tabs"> {% for tab_name in ('Videos', 'Playlists', 'About') %} diff --git a/youtube/templates/subscription_manager.html b/youtube/templates/subscription_manager.html new file mode 100644 index 0000000..3145f54 --- /dev/null +++ b/youtube/templates/subscription_manager.html @@ -0,0 +1,142 @@ +{% set page_title = 'Subscription Manager' %} +{% extends "base.html" %} +{% block style %} + .import-export{ + display: flex; + flex-direction: row; + } + .subscriptions-import-form{ + background-color: #dadada; + display: flex; + flex-direction: column; + align-items: flex-start; + max-width: 300px; + padding:10px; + } + .subscriptions-import-form h2{ + font-size: 20px; + margin-bottom: 10px; + } + + .import-submit-button{ + margin-top:15px; + align-self: flex-end; + } + + + .subscriptions-export-links{ + margin: 0px 0px 0px 20px; + background-color: #dadada; + list-style: none; + max-width: 300px; + padding:10px; + } + + .sub-list-controls{ + background-color: #dadada; + padding:10px; + } + + + .tag-group-list{ + list-style: none; + margin-left: 10px; + margin-right: 10px; + padding: 0px; + } + .tag-group{ + border-style: solid; + margin-bottom: 10px; + } + + .sub-list{ + list-style: none; + padding:10px; + column-width: 300px; + column-gap: 40px; + } + .sub-list-item{ + display:flex; + margin-bottom: 10px; + break-inside:avoid; + background-color: #dadada; + } + .tag-list{ + margin-left:15px; + font-weight:bold; + } + .sub-list-item-name{ + margin-left:15px; + } + .sub-list-checkbox{ + height: 1.5em; + min-width: 1.5em; // need min-width otherwise browser doesn't respect the width and squishes the checkbox down when there's too many tags + } + .muted{ + background-color: #888888; + } +{% endblock style %} + + +{% macro subscription_list(sub_list) %} + {% for subscription in sub_list %} + <li class="sub-list-item {{ 'muted' if subscription['muted'] else '' }}"> + <input class="sub-list-checkbox" name="channel_ids" value="{{ subscription['channel_id'] }}" form="subscription-manager-form" type="checkbox"> + <a href="{{ subscription['channel_url'] }}" class="sub-list-item-name" title="{{ subscription['channel_name'] }}">{{ subscription['channel_name'] }}</a> + <span class="tag-list">{{ ', '.join(subscription['tags']) }}</span> + </li> + {% endfor %} +{% endmacro %} + + + +{% block main %} + <div class="import-export"> + <form class="subscriptions-import-form" enctype="multipart/form-data" action="/youtube.com/import_subscriptions" method="POST"> + <h2>Import subscriptions</h2> + <input type="file" id="subscriptions-import" accept="application/json, application/xml, text/x-opml" name="subscriptions_file"> + <input type="submit" value="Import" class="import-submit-button"> + </form> + + <ul class="subscriptions-export-links"> + <li><a href="/youtube.com/subscriptions.opml">Export subscriptions (OPML)</a></li> + <li><a href="/youtube.com/subscriptions.xml">Export subscriptions (RSS)</a></li> + </ul> + </div> + + <hr> + + <form id="subscription-manager-form" class="sub-list-controls" method="POST"> + {% if group_by_tags %} + <a class="sort-button" href="/https://www.youtube.com/subscription_manager?group_by_tags=0">Don't group</a> + {% else %} + <a class="sort-button" href="/https://www.youtube.com/subscription_manager?group_by_tags=1">Group by tags</a> + {% endif %} + <input type="text" name="tags"> + <button type="submit" name="action" value="add_tags">Add tags</button> + <button type="submit" name="action" value="remove_tags">Remove tags</button> + <button type="submit" name="action" value="unsubscribe_verify">Unsubscribe</button> + <button type="submit" name="action" value="mute">Mute</button> + <button type="submit" name="action" value="unmute">Unmute</button> + <input type="reset" value="Clear Selection"> + </form> + + + {% if group_by_tags %} + <ul class="tag-group-list"> + {% for tag_name, sub_list in tag_groups %} + <li class="tag-group"> + <h2 class="tag-group-name">{{ tag_name }}</h2> + <ol class="sub-list"> + {{ subscription_list(sub_list) }} + </ol> + </li> + {% endfor %} + </ul> + {% else %} + <ol class="sub-list"> + {{ subscription_list(sub_list) }} + </ol> + {% endif %} + +{% endblock main %} diff --git a/youtube/templates/subscriptions.html b/youtube/templates/subscriptions.html new file mode 100644 index 0000000..fa6b5bf --- /dev/null +++ b/youtube/templates/subscriptions.html @@ -0,0 +1,116 @@ +{% set page_title = 'Subscriptions' %} +{% extends "base.html" %} +{% import "common_elements.html" as common_elements %} + +{% block style %} + main{ + display:flex; + flex-direction: row; + } + .video-section{ + flex-grow: 1; + } + .video-section .page-button-row{ + justify-content: center; + } + .subscriptions-sidebar{ + flex-basis: 300px; + background-color: #dadada; + border-left: 2px; + } + .sidebar-links{ + display:flex; + justify-content: space-between; + padding-left:10px; + padding-right: 10px; + } + + .sidebar-list{ + list-style: none; + padding-left:10px; + padding-right: 10px; + } + .sidebar-list-item{ + display:flex; + justify-content: space-between; + margin-bottom: 5px; + } + .sub-refresh-list .sidebar-item-name{ + text-overflow: clip; + white-space: nowrap; + overflow: hidden; + max-width: 200px; + } + .muted{ + background-color: #888888; + } +{% endblock style %} + +{% block main %} + <div class="video-section"> + <nav class="item-grid"> + {% for video_info in videos %} + {{ common_elements.item(video_info, include_author=false) }} + {% endfor %} + </nav> + + <nav class="page-button-row"> + {{ common_elements.page_buttons(num_pages, '/youtube.com/subscriptions', parameters_dictionary) }} + </nav> + </div> + + <div class="subscriptions-sidebar"> + <div class="sidebar-links"> + <a href="/youtube.com/subscription_manager" class="sub-manager-link">Subscription Manager</a> + <form method="POST" class="refresh-all"> + <input type="submit" value="Check All"> + <input type="hidden" name="action" value="refresh"> + <input type="hidden" name="type" value="all"> + </form> + </div> + + <hr> + + <ol class="sidebar-list tags"> + {% if current_tag %} + <li class="sidebar-list-item"> + <a href="/youtube.com/subscriptions" class="sidebar-item-name">Any tag</a> + </li> + {% endif %} + + {% for tag in tags %} + <li class="sidebar-list-item"> + {% if tag == current_tag %} + <span class="sidebar-item-name">{{ tag }}</span> + {% else %} + <a href="?tag={{ tag|urlencode }}" class="sidebar-item-name">{{ tag }}</a> + {% endif %} + <form method="POST" class="sidebar-item-refresh"> + <input type="submit" value="Check"> + <input type="hidden" name="action" value="refresh"> + <input type="hidden" name="type" value="tag"> + <input type="hidden" name="tag_name" value="{{ tag }}"> + </form> + </li> + {% endfor %} + </ol> + + <hr> + + <ol class="sidebar-list sub-refresh-list"> + {% for subscription in subscription_list %} + <li class="sidebar-list-item {{ 'muted' if subscription['muted'] else '' }}"> + <a href="{{ subscription['channel_url'] }}" class="sidebar-item-name" title="{{ subscription['channel_name'] }}">{{ subscription['channel_name'] }}</a> + <form method="POST" class="sidebar-item-refresh"> + <input type="submit" value="Check"> + <input type="hidden" name="action" value="refresh"> + <input type="hidden" name="type" value="channel"> + <input type="hidden" name="channel_id" value="{{ subscription['channel_id'] }}"> + </form> + </li> + {% endfor %} + </ol> + + </div> + +{% endblock main %} diff --git a/youtube/templates/unsubscribe_verify.html b/youtube/templates/unsubscribe_verify.html new file mode 100644 index 0000000..98581c0 --- /dev/null +++ b/youtube/templates/unsubscribe_verify.html @@ -0,0 +1,19 @@ +{% set page_title = 'Unsubscribe?' %} +{% extends "base.html" %} + +{% block main %} + <span>Are you sure you want to unsubscribe from these channels?</span> + <form class="subscriptions-import-form" action="/youtube.com/subscription_manager" method="POST"> + {% for channel_id, channel_name in unsubscribe_list %} + <input type="hidden" name="channel_ids" value="{{ channel_id }}"> + {% endfor %} + + <input type="hidden" name="action" value="unsubscribe"> + <input type="submit" value="Yes, unsubscribe"> + </form> + <ul> + {% for channel_id, channel_name in unsubscribe_list %} + <li><a href="{{ '/https://www.youtube.com/channel/' + channel_id }}" title="{{ channel_name }}">{{ channel_name }}</a></li> + {% endfor %} + </ul> +{% endblock main %} diff --git a/youtube/util.py b/youtube/util.py index 2f80f11..2205645 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -6,6 +6,9 @@ import urllib.parse import re import time import os +import gevent +import gevent.queue +import gevent.lock # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -183,6 +186,84 @@ desktop_ua = (('User-Agent', desktop_user_agent),) +class RateLimitedQueue(gevent.queue.Queue): + ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. ''' + + def __init__(self, initial_burst=30, waiting_period=5, subsequent_bursts=10): + self.initial_burst = initial_burst + self.waiting_period = waiting_period + self.subsequent_bursts = subsequent_bursts + + self.count_since_last_wait = 0 + self.surpassed_initial = False + + self.lock = gevent.lock.BoundedSemaphore(1) + self.currently_empty = False + self.empty_start = 0 + gevent.queue.Queue.__init__(self) + + + def get(self): + self.lock.acquire() # blocks if another greenlet currently has the lock + if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 + + elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial: + self.surpassed_initial = True + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 + + self.count_since_last_wait += 1 + + if not self.currently_empty and self.empty(): + self.currently_empty = True + self.empty_start = time.monotonic() + + item = gevent.queue.Queue.get(self) # blocks when nothing left + + if self.currently_empty: + if time.monotonic() - self.empty_start >= self.waiting_period: + self.count_since_last_wait = 0 + self.surpassed_initial = False + + self.currently_empty = False + + self.lock.release() + + return item + + + +def download_thumbnail(save_directory, video_id): + url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + save_location = os.path.join(save_directory, video_id + ".jpg") + try: + thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) + except urllib.error.HTTPError as e: + print("Failed to download thumbnail for " + video_id + ": " + str(e)) + return False + try: + f = open(save_location, 'wb') + except FileNotFoundError: + os.makedirs(save_directory, exist_ok = True) + f = open(save_location, 'wb') + f.write(thumbnail) + f.close() + return True + +def download_thumbnails(save_directory, ids): + if not isinstance(ids, (list, tuple)): + ids = list(ids) + # only do 5 at a time + # do the n where n is divisible by 5 + i = -1 + for i in range(0, int(len(ids)/5) - 1 ): + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) + # do the remainders (< 5) + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) + + |