aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/subscriptions.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/subscriptions.py')
-rw-r--r--youtube/subscriptions.py633
1 files changed, 621 insertions, 12 deletions
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 47f1ea3..fdba114 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -1,18 +1,627 @@
-import urllib
+from youtube import util, yt_data_extract, html_common, channel
+import settings
+from string import Template
+import sqlite3
+import os
+import time
+import gevent
+import html
+import json
+import traceback
+import contextlib
+import defusedxml.ElementTree
-with open("subscriptions.txt", 'r', encoding='utf-8') as file:
- subscriptions = file.read()
-
-# Line format: "channel_id channel_name"
-# Example:
-# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown
+with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f:
+ subscriptions_template = Template(f.read())
-subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines())
+with open('yt_subscription_manager_template.html', 'r', encoding='utf-8') as f:
+ subscription_manager_template = Template(f.read())
-def get_new_videos():
- for channel_id, channel_name in subscriptions:
-
+thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails")
+# https://stackabuse.com/a-sqlite-tutorial-with-python/
-def get_subscriptions_page():
+database_path = os.path.join(settings.data_dir, "subscriptions.sqlite")
+
+def open_database():
+ if not os.path.exists(settings.data_dir):
+ os.makedirs(settings.data_dir)
+ connection = sqlite3.connect(database_path, check_same_thread=False)
+
+ # Create tables if they don't exist
+ try:
+ cursor = connection.cursor()
+ cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels (
+ id integer PRIMARY KEY,
+ yt_channel_id text UNIQUE NOT NULL,
+ channel_name text NOT NULL,
+ time_last_checked integer,
+ muted integer DEFAULT 0,
+ upload_frequency integer
+ )''')
+ cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
+ id integer PRIMARY KEY,
+ sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE,
+ video_id text UNIQUE NOT NULL,
+ title text NOT NULL,
+ duration text,
+ time_published integer NOT NULL,
+ description text
+ )''')
+ cursor.execute('''CREATE TABLE IF NOT EXISTS tag_associations (
+ id integer PRIMARY KEY,
+ tag text NOT NULL,
+ sql_channel_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE,
+ UNIQUE(tag, sql_channel_id)
+ )''')
+
+ connection.commit()
+ except:
+ connection.rollback()
+ connection.close()
+ raise
+
+ # https://stackoverflow.com/questions/19522505/using-sqlite3-in-python-with-with-keyword
+ return contextlib.closing(connection)
+
+def with_open_db(function, *args, **kwargs):
+ with open_database() as connection:
+ with connection as cursor:
+ return function(cursor, *args, **kwargs)
+
+def is_subscribed(channel_id):
+ if not os.path.exists(database_path):
+ return False
+
+ with open_database() as connection:
+ with connection as cursor:
+ result = cursor.execute('''SELECT EXISTS(
+ SELECT 1
+ FROM subscribed_channels
+ WHERE yt_channel_id=?
+ LIMIT 1
+ )''', [channel_id]).fetchone()
+ return bool(result[0])
+
+
+def _subscribe(cursor, channels):
+ ''' channels is a list of (channel_id, channel_name) '''
+
+ # set time_last_checked to 0 on all channels being subscribed to
+ channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels)
+
+ cursor.executemany('''INSERT OR IGNORE INTO subscribed_channels (yt_channel_id, channel_name, time_last_checked)
+ VALUES (?, ?, ?)''', channels)
+
+# TODO: delete thumbnails
+def _unsubscribe(cursor, channel_ids):
+ ''' channel_ids is a list of channel_ids '''
+ cursor.executemany("DELETE FROM subscribed_channels WHERE yt_channel_id=?", ((channel_id, ) for channel_id in channel_ids))
+
+def _get_videos(cursor, number, offset):
+ db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name
+ FROM videos
+ INNER JOIN subscribed_channels on videos.sql_channel_id = subscribed_channels.id
+ ORDER BY time_published DESC
+ LIMIT ? OFFSET ?''', (number, offset))
+
+ for db_video in db_videos:
+ yield {
+ 'id': db_video[0],
+ 'title': db_video[1],
+ 'duration': db_video[2],
+ 'author': db_video[3],
+ }
+
+def _get_subscribed_channels(cursor):
+ for item in cursor.execute('''SELECT channel_name, yt_channel_id, muted
+ FROM subscribed_channels
+ ORDER BY channel_name COLLATE NOCASE'''):
+ yield item
+
+
+def _add_tags(cursor, channel_ids, tags):
+ pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids]
+ cursor.executemany('''INSERT OR IGNORE INTO tag_associations (tag, sql_channel_id)
+ SELECT ?, id FROM subscribed_channels WHERE yt_channel_id = ? ''', pairs)
+
+
+def _remove_tags(cursor, channel_ids, tags):
+ pairs = [(tag, yt_channel_id) for tag in tags for yt_channel_id in channel_ids]
+ cursor.executemany('''DELETE FROM tag_associations
+ WHERE tag = ? AND sql_channel_id = (
+ SELECT id FROM subscribed_channels WHERE yt_channel_id = ?
+ )''', pairs)
+
+
+
+def _get_tags(cursor, channel_id):
+ return [row[0] for row in cursor.execute('''SELECT tag
+ FROM tag_associations
+ WHERE sql_channel_id = (
+ SELECT id FROM subscribed_channels WHERE yt_channel_id = ?
+ )''', (channel_id,))]
+
+def _get_all_tags(cursor):
+ return [row[0] for row in cursor.execute('''SELECT DISTINCT tag FROM tag_associations''')]
+
+def _get_channel_names(cursor, channel_ids):
+ ''' returns list of (channel_id, channel_name) '''
+ result = []
+ for channel_id in channel_ids:
+ row = cursor.execute('''SELECT channel_name
+ FROM subscribed_channels
+ WHERE yt_channel_id = ?''', (channel_id,)).fetchone()
+ result.append( (channel_id, row[0]) )
+ return result
+
+
+def _channels_with_tag(cursor, tag, order=False, exclude_muted=False, include_muted_status=False):
+ ''' returns list of (channel_id, channel_name) '''
+
+ statement = '''SELECT yt_channel_id, channel_name'''
+
+ if include_muted_status:
+ statement += ''', muted'''
+
+ statement += '''
+ FROM subscribed_channels
+ WHERE subscribed_channels.id IN (
+ SELECT tag_associations.sql_channel_id FROM tag_associations WHERE tag=?
+ )
+ '''
+ if exclude_muted:
+ statement += '''AND muted != 1\n'''
+ if order:
+ statement += '''ORDER BY channel_name COLLATE NOCASE'''
+
+ return cursor.execute(statement, [tag]).fetchall()
+
+
+units = {
+ 'year': 31536000, # 365*24*3600
+ 'month': 2592000, # 30*24*3600
+ 'week': 604800, # 7*24*3600
+ 'day': 86400, # 24*3600
+ 'hour': 3600,
+ 'minute': 60,
+ 'second': 1,
+}
+def youtube_timestamp_to_posix(dumb_timestamp):
+ ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago,
+ approximates the unix time (seconds since 1/1/1970) '''
+ dumb_timestamp = dumb_timestamp.lower()
+ now = time.time()
+ if dumb_timestamp == "just now":
+ return now
+ split = dumb_timestamp.split(' ')
+ number, unit = int(split[0]), split[1]
+ if number > 1:
+ unit = unit[:-1] # remove s from end
+ return now - number*units[unit]
+
+
+try:
+ existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory))
+except FileNotFoundError:
+ existing_thumbnails = set()
+
+
+thumbnails_queue = util.RateLimitedQueue()
+check_channels_queue = util.RateLimitedQueue()
+
+
+# Use this to mark a thumbnail acceptable to be retrieved at the request of the browser
+# can't simply check if it's in the queue because items are removed when the download starts, not when it finishes
+downloading_thumbnails = set()
+
+checking_channels = set()
+
+# Just to use for printing channel checking status to console without opening database
+channel_names = dict()
+
+def download_thumbnail_worker():
+ while True:
+ video_id = thumbnails_queue.get()
+ try:
+ success = util.download_thumbnail(thumbnails_directory, video_id)
+ if success:
+ existing_thumbnails.add(video_id)
+ except Exception:
+ traceback.print_exc()
+ finally:
+ downloading_thumbnails.remove(video_id)
+
+def check_channel_worker():
+ while True:
+ channel_id = check_channels_queue.get()
+ try:
+ _get_upstream_videos(channel_id)
+ finally:
+ checking_channels.remove(channel_id)
+
+for i in range(0,5):
+ gevent.spawn(download_thumbnail_worker)
+ gevent.spawn(check_channel_worker)
+
+
+
+
+
+
+def download_thumbnails_if_necessary(thumbnails):
+ for video_id in thumbnails:
+ if video_id not in existing_thumbnails and video_id not in downloading_thumbnails:
+ downloading_thumbnails.add(video_id)
+ thumbnails_queue.put(video_id)
+
+def check_channels_if_necessary(channel_ids):
+ for channel_id in channel_ids:
+ if channel_id not in checking_channels:
+ checking_channels.add(channel_id)
+ check_channels_queue.put(channel_id)
+
+
+
+def _get_upstream_videos(channel_id):
+ try:
+ print("Checking channel: " + channel_names[channel_id])
+ except KeyError:
+ print("Checking channel " + channel_id)
+
+ videos = []
+
+ json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response'])
+ for i, json_video in enumerate(json_channel_videos):
+ info = yt_data_extract.renderer_info(json_video['gridVideoRenderer'])
+ if 'description' not in info:
+ info['description'] = ''
+ try:
+ info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order
+ except KeyError:
+ print(info)
+ videos.append((channel_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']))
+
+ now = time.time()
+ download_thumbnails_if_necessary(video[1] for video in videos if (now - video[4]) < 30*24*3600) # Don't download thumbnails from videos older than a month
+
+ with open_database() as connection:
+ with connection as cursor:
+ cursor.executemany('''INSERT OR IGNORE INTO videos (sql_channel_id, video_id, title, duration, time_published, description)
+ VALUES ((SELECT id FROM subscribed_channels WHERE yt_channel_id=?), ?, ?, ?, ?, ?)''', videos)
+ cursor.execute('''UPDATE subscribed_channels
+ SET time_last_checked = ?
+ WHERE yt_channel_id=?''', [int(time.time()), channel_id])
+
+
+def check_all_channels():
+ with open_database() as connection:
+ with connection as cursor:
+ channel_id_name_list = cursor.execute('''SELECT yt_channel_id, channel_name
+ FROM subscribed_channels
+ WHERE muted != 1''').fetchall()
+
+ channel_names.update(channel_id_name_list)
+ check_channels_if_necessary([item[0] for item in channel_id_name_list])
+
+
+def check_tags(tags):
+ channel_id_name_list = []
+ with open_database() as connection:
+ with connection as cursor:
+ for tag in tags:
+ channel_id_name_list += _channels_with_tag(cursor, tag, exclude_muted=True)
+
+ channel_names.update(channel_id_name_list)
+ check_channels_if_necessary([item[0] for item in channel_id_name_list])
+
+
+def check_specific_channels(channel_ids):
+ with open_database() as connection:
+ with connection as cursor:
+ channel_id_name_list = []
+ for channel_id in channel_ids:
+ channel_id_name_list += cursor.execute('''SELECT yt_channel_id, channel_name
+ FROM subscribed_channels
+ WHERE yt_channel_id=?''', [channel_id]).fetchall()
+ channel_names.update(channel_id_name_list)
+ check_channels_if_necessary(channel_ids)
+
+
+
+
+def import_subscriptions(env, start_response):
+ content_type = env['parameters']['subscriptions_file'][0]
+ file = env['parameters']['subscriptions_file'][1]
+
+ file = file.decode('utf-8')
+
+ if content_type == 'application/json':
+ try:
+ file = json.loads(file)
+ except json.decoder.JSONDecodeError:
+ traceback.print_exc()
+ start_response('400 Bad Request', () )
+ return b'400 Bad Request: Invalid json file'
+
+ try:
+ channels = ( (item['snippet']['resourceId']['channelId'], item['snippet']['title']) for item in file)
+ except (KeyError, IndexError):
+ traceback.print_exc()
+ start_response('400 Bad Request', () )
+ return b'400 Bad Request: Unknown json structure'
+ elif content_type in ('application/xml', 'text/xml', 'text/x-opml'):
+ try:
+ root = defusedxml.ElementTree.fromstring(file)
+ assert root.tag == 'opml'
+ channels = []
+ for outline_element in root[0][0]:
+ if (outline_element.tag != 'outline') or ('xmlUrl' not in outline_element.attrib):
+ continue
+
+
+ channel_name = outline_element.attrib['text']
+ channel_rss_url = outline_element.attrib['xmlUrl']
+ channel_id = channel_rss_url[channel_rss_url.find('channel_id=')+11:].strip()
+ channels.append( (channel_id, channel_name) )
+
+ except (AssertionError, IndexError, defusedxml.ElementTree.ParseError) as e:
+ start_response('400 Bad Request', () )
+ return b'400 Bad Request: Unable to read opml xml file, or the file is not the expected format'
+ else:
+ start_response('400 Bad Request', () )
+ return b'400 Bad Request: Unsupported file format: ' + html.escape(content_type).encode('utf-8') + b'. Only subscription.json files (from Google Takeouts) and XML OPML files exported from Youtube\'s subscription manager page are supported'
+
+ with_open_db(_subscribe, channels)
+
+ start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscription_manager'),] )
+ return b''
+
+
+
+sub_list_item_template = Template('''
+<li class="sub-list-item $mute_class">
+ <input class="sub-list-checkbox" name="channel_ids" value="$channel_id" form="subscription-manager-form" type="checkbox">
+ <a href="$channel_url" class="sub-list-item-name" title="$channel_name">$channel_name</a>
+ <span class="tag-list">$tags</span>
+</li>''')
+
+tag_group_template = Template('''
+<li class="tag-group">
+ <h2 class="tag-group-name">$tag</h2>
+ <ol class="sub-list">
+$sub_list
+ </ol>
+</li>
+''')
+def get_subscription_manager_page(env, start_response):
+ with open_database() as connection:
+ with connection as cursor:
+ if env['parameters'].get('group_by_tags', '0')[0] == '1':
+
+ sort_name = "Don't group"
+ sort_link = util.URL_ORIGIN + '/subscription_manager'
+
+ main_list_html = '<ul class="tag-group-list">'
+ for tag in _get_all_tags(cursor):
+ sub_list_html = ''
+ for channel_id, channel_name, muted in _channels_with_tag(cursor, tag, order=True, include_muted_status=True):
+ sub_list_html += sub_list_item_template.substitute(
+ channel_url = util.URL_ORIGIN + '/channel/' + channel_id,
+ channel_name = html.escape(channel_name),
+ channel_id = channel_id,
+ tags = ', '.join(t for t in _get_tags(cursor, channel_id) if t != tag),
+ mute_class = 'muted' if muted else '',
+ )
+ main_list_html += tag_group_template.substitute(
+ tag = tag,
+ sub_list = sub_list_html,
+ )
+
+ # Channels with no tags
+ channel_list = cursor.execute('''SELECT yt_channel_id, channel_name, muted
+ FROM subscribed_channels
+ WHERE id NOT IN (
+ SELECT sql_channel_id FROM tag_associations
+ )
+ ORDER BY channel_name COLLATE NOCASE''').fetchall()
+ if channel_list:
+ sub_list_html = ''
+ for channel_id, channel_name, muted in channel_list:
+ sub_list_html += sub_list_item_template.substitute(
+ channel_url = util.URL_ORIGIN + '/channel/' + channel_id,
+ channel_name = html.escape(channel_name),
+ channel_id = channel_id,
+ tags = '',
+ mute_class = 'muted' if muted else '',
+ )
+ main_list_html += tag_group_template.substitute(
+ tag = "No tags",
+ sub_list = sub_list_html,
+ )
+ main_list_html += '</ul>'
+
+ else:
+
+ sort_name = "Group by tags"
+ sort_link = util.URL_ORIGIN + '/subscription_manager?group_by_tags=1'
+
+ main_list_html = '<ol class="sub-list">'
+ for channel_name, channel_id, muted in _get_subscribed_channels(cursor):
+ main_list_html += sub_list_item_template.substitute(
+ channel_url = util.URL_ORIGIN + '/channel/' + channel_id,
+ channel_name = html.escape(channel_name),
+ channel_id = channel_id,
+ tags = ', '.join(_get_tags(cursor, channel_id)),
+ mute_class = 'muted' if muted else '',
+ )
+ main_list_html += '</ol>'
+
+
+
+ start_response('200 OK', [('Content-type','text/html'),])
+ return subscription_manager_template.substitute(
+ header = html_common.get_header(),
+ main_list = main_list_html,
+ sort_name = sort_name,
+ sort_link = sort_link,
+ page_buttons = '',
+ ).encode('utf-8')
+
+def list_from_comma_separated_tags(string):
+ return [tag.strip() for tag in string.split(',') if tag.strip()]
+
+
+unsubscribe_list_item_template = Template('''
+<li><a href="$channel_url" title="$channel_name">$channel_name</a></li>''')
+def post_subscription_manager_page(env, start_response):
+ params = env['parameters']
+ action = params['action'][0]
+
+ with open_database() as connection:
+ with connection as cursor:
+ if action == 'add_tags':
+ _add_tags(cursor, params['channel_ids'], [tag.lower() for tag in list_from_comma_separated_tags(params['tags'][0])])
+ elif action == 'remove_tags':
+ _remove_tags(cursor, params['channel_ids'], [tag.lower() for tag in list_from_comma_separated_tags(params['tags'][0])])
+ elif action == 'unsubscribe':
+ _unsubscribe(cursor, params['channel_ids'])
+ elif action == 'unsubscribe_verify':
+ page = '''
+ <span>Are you sure you want to unsubscribe from these channels?</span>
+ <form class="subscriptions-import-form" action="/youtube.com/subscription_manager" method="POST">'''
+
+ for channel_id in params['channel_ids']:
+ page += '<input type="hidden" name="channel_ids" value="' + channel_id + '">\n'
+
+ page += '''
+ <input type="hidden" name="action" value="unsubscribe">
+ <input type="submit" value="Yes, unsubscribe">
+ </form>
+ <ul>'''
+ for channel_id, channel_name in _get_channel_names(cursor, params['channel_ids']):
+ page += unsubscribe_list_item_template.substitute(
+ channel_url = util.URL_ORIGIN + '/channel/' + channel_id,
+ channel_name = html.escape(channel_name),
+ )
+ page += '''</ul>'''
+
+ start_response('200 OK', [('Content-type','text/html'),])
+ return html_common.yt_basic_template.substitute(
+ page_title = 'Unsubscribe?',
+ style = '',
+ header = html_common.get_header(),
+ page = page,
+ ).encode('utf-8')
+ elif action == 'mute':
+ cursor.executemany('''UPDATE subscribed_channels
+ SET muted = 1
+ WHERE yt_channel_id = ?''', [(ci,) for ci in params['channel_ids']])
+ elif action == 'unmute':
+ cursor.executemany('''UPDATE subscribed_channels
+ SET muted = 0
+ WHERE yt_channel_id = ?''', [(ci,) for ci in params['channel_ids']])
+
+ else:
+ start_response('400 Bad Request', ())
+ return b'400 Bad Request'
+
+ start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscription_manager'),] )
+ return b''
+
+
+
+sidebar_tag_item_template = Template('''
+<li class="sidebar-list-item">
+ <span class="sidebar-item-name">$tag_name</span>
+ <form method="POST" class="sidebar-item-refresh">
+ <input type="submit" value="Check">
+ <input type="hidden" name="action" value="refresh">
+ <input type="hidden" name="type" value="tag">
+ <input type="hidden" name="tag_name" value="$tag_name">
+ </form>
+</li>''')
+
+
+sidebar_channel_item_template = Template('''
+<li class="sidebar-list-item $mute_class">
+ <a href="$channel_url" class="sidebar-item-name" title="$channel_name">$channel_name</a>
+ <form method="POST" class="sidebar-item-refresh">
+ <input type="submit" value="Check">
+ <input type="hidden" name="action" value="refresh">
+ <input type="hidden" name="type" value="channel">
+ <input type="hidden" name="channel_id" value="$channel_id">
+ </form>
+</li>''')
+
+def get_subscriptions_page(env, start_response):
+ with open_database() as connection:
+ with connection as cursor:
+ items_html = '''<nav class="item-grid">\n'''
+
+ for item in _get_videos(cursor, 60, 0):
+ if item['id'] in downloading_thumbnails:
+ item['thumbnail'] = util.get_thumbnail_url(item['id'])
+ else:
+ item['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + item['id'] + '.jpg'
+ items_html += html_common.video_item_html(item, html_common.small_video_item_template)
+ items_html += '''\n</nav>'''
+
+
+ tag_list_html = ''
+ for tag_name in _get_all_tags(cursor):
+ tag_list_html += sidebar_tag_item_template.substitute(tag_name = tag_name)
+
+
+ sub_list_html = ''
+ for channel_name, channel_id, muted in _get_subscribed_channels(cursor):
+ sub_list_html += sidebar_channel_item_template.substitute(
+ channel_url = util.URL_ORIGIN + '/channel/' + channel_id,
+ channel_name = html.escape(channel_name),
+ channel_id = channel_id,
+ mute_class = 'muted' if muted else '',
+ )
+
+
+
+ start_response('200 OK', [('Content-type','text/html'),])
+ return subscriptions_template.substitute(
+ header = html_common.get_header(),
+ items = items_html,
+ tags = tag_list_html,
+ sub_list = sub_list_html,
+ page_buttons = '',
+ ).encode('utf-8')
+
+def post_subscriptions_page(env, start_response):
+ params = env['parameters']
+ action = params['action'][0]
+ if action == 'subscribe':
+ if len(params['channel_id']) != len(params['channel_name']):
+ start_response('400 Bad Request', ())
+ return b'400 Bad Request, length of channel_id != length of channel_name'
+ with_open_db(_subscribe, zip(params['channel_id'], params['channel_name']))
+
+ elif action == 'unsubscribe':
+ with_open_db(_unsubscribe, params['channel_id'])
+
+ elif action == 'refresh':
+ type = params['type'][0]
+ if type == 'all':
+ check_all_channels()
+ elif type == 'tag':
+ check_tags(params['tag_name'])
+ elif type == 'channel':
+ check_specific_channels(params['channel_id'])
+ else:
+ start_response('400 Bad Request', ())
+ return b'400 Bad Request'
+
+ start_response('204 No Content', ())
+ return b''
+ else:
+ start_response('400 Bad Request', ())
+ return b'400 Bad Request'
+ start_response('204 No Content', ())
+ return b''