from youtube import util, yt_data_extract, html_common, channel import settings from string import Template import sqlite3 import os import time import gevent import html import json import traceback import contextlib with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) with open('yt_subscription_manager_template.html', 'r', encoding='utf-8') as f: subscription_manager_template = Template(f.read()) thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails") # https://stackabuse.com/a-sqlite-tutorial-with-python/ database_path = os.path.join(settings.data_dir, "subscriptions.sqlite") def open_database(): if not os.path.exists(settings.data_dir): os.makedirs(settings.data_dir) connection = sqlite3.connect(database_path) # Create tables if they don't exist try: cursor = connection.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( id integer PRIMARY KEY, yt_channel_id text UNIQUE NOT NULL, channel_name text NOT NULL, time_last_checked integer )''') cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( id integer PRIMARY KEY, sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, video_id text UNIQUE NOT NULL, title text NOT NULL, duration text, time_published integer NOT NULL, description text )''') cursor.execute('''CREATE TABLE IF NOT EXISTS tag_associations ( id integer PRIMARY KEY, tag text NOT NULL, sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, UNIQUE(tag, sql_channel_id) )''') connection.commit() except: connection.rollback() connection.close() raise # https://stackoverflow.com/questions/19522505/using-sqlite3-in-python-with-with-keyword return contextlib.closing(connection) def _subscribe(channels): ''' channels is a list of (channel_id, channel_name) ''' # set time_last_checked to 0 on all channels being subscribed to channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels) with open_database() as connection: with connection as cursor: cursor.executemany('''INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)''', channels) # TODO: delete thumbnails def _unsubscribe(channel_ids): ''' channel_ids is a list of channel_ids ''' with open_database() as connection: with connection as cursor: cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids)) def _get_videos(number, offset): with open_database() as connection: with connection as cursor: db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name FROM videos INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id ORDER BY time_published DESC LIMIT ? OFFSET ?''', (number, offset)) for db_video in db_videos: yield { 'id': db_video[0], 'title': db_video[1], 'duration': db_video[2], 'author': db_video[3], } def _get_subscribed_channels(): with open_database() as connection: with connection as cursor: for item in cursor.execute('''SELECT channel_name, yt_channel_id FROM subscribed_channels ORDER BY channel_name'''): yield item def _add_tags(channel_ids, tags): with open_database() as connection: with connection as cursor: pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id) SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs) def _remove_tags(channel_ids, tags): with open_database() as connection: with connection as cursor: pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids] cursor.executemany('''DELETE FROM tag_associations WHERE tag = ? AND sql_channel_id = ( SELECT id FROM subscribed_channels WHERE yt_channel_id = ? )''', pairs) def _get_tags(channel_id): with open_database() as connection: with connection as cursor: return [row[0] for row in cursor.execute('''SELECT tag FROM tag_associations WHERE sql_channel_id = ( SELECT id FROM subscribed_channels WHERE yt_channel_id = ? )''', (channel_id,))] def _get_all_tags(): with open_database() as connection: with connection as cursor: return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')] def _get_channel_names(channel_ids): ''' returns list of (channel_id, channel_name) ''' with open_database() as connection: with connection as cursor: result = [] for channel_id in channel_ids: row = cursor.execute('''SELECT channel_name FROM subscribed_channels WHERE yt_channel_id = ?''', (channel_id,)).fetchone() result.append( (channel_id, row[0]) ) return result units = { 'year': 31536000, # 365*24*3600 'month': 2592000, # 30*24*3600 'week': 604800, # 7*24*3600 'day': 86400, # 24*3600 'hour': 3600, 'minute': 60, 'second': 1, } def youtube_timestamp_to_posix(dumb_timestamp): ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago, approximates the unix time (seconds since 1/1/1970) ''' dumb_timestamp = dumb_timestamp.lower() now = time.time() if dumb_timestamp == "just now": return now split = dumb_timestamp.split(' ') number, unit = int(split[0]), split[1] if number > 1: unit = unit[:-1] # remove s from end return now - number*units[unit] # Use this to mark a thumbnail acceptable to be retrieved at the request of the browser downloading_thumbnails = set() def download_thumbnails(thumbnails_directory, thumbnails): try: g = gevent.spawn(util.download_thumbnails, thumbnails_directory, thumbnails) g.join() finally: downloading_thumbnails.difference_update(thumbnails) def _get_upstream_videos(channel_id): videos = [] json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) for i, json_video in enumerate(json_channel_videos): info = yt_data_extract.renderer_info(json_video['gridVideoRenderer']) if 'description' not in info: info['description'] = '' info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order videos.append(info) try: existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory)) except FileNotFoundError: existing_thumbnails = set() missing_thumbnails = set(video['id'] for video in videos) - existing_thumbnails downloading_thumbnails.update(missing_thumbnails) gevent.spawn(download_thumbnails, thumbnails_directory, missing_thumbnails) return videos def import_subscriptions(env, start_response): content_type = env['parameters']['subscriptions_file'][0] file = env['parameters']['subscriptions_file'][1] file = file.decode('utf-8') if content_type == 'application/json': try: file = json.loads(file) except json.decoder.JSONDecodeError: traceback.print_exc() start_response('400 Bad Request', () ) return b'400 Bad Request: Invalid json file' try: channels = ( (item['snippet']['resourceId']['channelId'], item['snippet']['title']) for item in file) except (KeyError, IndexError): traceback.print_exc() start_response('400 Bad Request', () ) return b'400 Bad Request: Unknown json structure' else: raise NotImplementedError() _subscribe(channels) start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscription_manager'),] ) return b'' sub_list_item_template = Template('''