From ae5fd9eb009c6522e7d7971e0bdea57faaaf2b3c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 5 Jun 2019 00:41:15 -0700 Subject: Make thumbnails work and other stuff --- youtube/local_playlist.py | 30 ++------------------------- youtube/subscriptions.py | 53 ++++++++++++++++++++++++++++++++++++++--------- youtube/util.py | 32 ++++++++++++++++++++++++++++ youtube/youtube.py | 2 +- 4 files changed, 78 insertions(+), 39 deletions(-) (limited to 'youtube') diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py index e354013..d083e33 100644 --- a/youtube/local_playlist.py +++ b/youtube/local_playlist.py @@ -33,33 +33,7 @@ def add_to_playlist(name, video_info_list): if id not in ids: file.write(info + "\n") missing_thumbnails.append(id) - gevent.spawn(download_thumbnails, name, missing_thumbnails) - -def download_thumbnail(playlist_name, video_id): - url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" - save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg") - try: - thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id) - except urllib.error.HTTPError as e: - print("Failed to download thumbnail for " + video_id + ": " + str(e)) - return - try: - f = open(save_location, 'wb') - except FileNotFoundError: - os.makedirs(os.path.join(thumbnails_directory, playlist_name)) - f = open(save_location, 'wb') - f.write(thumbnail) - f.close() - -def download_thumbnails(playlist_name, ids): - # only do 5 at a time - # do the n where n is divisible by 5 - i = -1 - for i in range(0, int(len(ids)/5) - 1 ): - gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5, i*5 + 5)]) - # do the remainders (< 5) - gevent.joinall([gevent.spawn(download_thumbnail, playlist_name, ids[j]) for j in range(i*5 + 5, len(ids))]) - + gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) def get_local_playlist_page(name): @@ -84,7 +58,7 @@ def get_local_playlist_page(name): videos_html += html_common.video_item_html(info, html_common.small_video_item_template) except json.decoder.JSONDecodeError: pass - gevent.spawn(download_thumbnails, name, missing_thumbnails) + gevent.spawn(util.download_thumbnails, os.path.join(thumbnails_directory, name), missing_thumbnails) return local_playlist_template.substitute( page_title = name + ' - Local playlist', header = html_common.get_header(), diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index dc8412b..93d064d 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -3,16 +3,13 @@ import settings from string import Template import sqlite3 import os -import secrets -import datetime -import itertools import time -import urllib -import socks, sockshandler +import gevent with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f: subscriptions_template = Template(f.read()) +thumbnails_directory = os.path.join(settings.data_dir, "subscription_thumbnails") # https://stackabuse.com/a-sqlite-tutorial-with-python/ @@ -28,14 +25,14 @@ def open_database(): cursor = connection.cursor() cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels ( id integer PRIMARY KEY, - channel_id text NOT NULL, + channel_id text UNIQUE NOT NULL, channel_name text NOT NULL, time_last_checked integer )''') cursor.execute('''CREATE TABLE IF NOT EXISTS videos ( id integer PRIMARY KEY, uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE, - video_id text NOT NULL, + video_id text UNIQUE NOT NULL, title text NOT NULL, duration text, time_published integer NOT NULL, @@ -58,7 +55,7 @@ def _subscribe(channels): connection = open_database() try: cursor = connection.cursor() - cursor.executemany("INSERT INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) + cursor.executemany("INSERT OR IGNORE INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels) connection.commit() except: connection.rollback() @@ -104,6 +101,10 @@ def _get_videos(number, offset): + + + + units = { 'year': 31536000, # 365*24*3600 'month': 2592000, # 30*24*3600 @@ -126,6 +127,16 @@ def youtube_timestamp_to_posix(dumb_timestamp): unit = unit[:-1] # remove s from end return now - number*units[unit] +# Use this to mark a thumbnail acceptable to be retrieved at the request of the browser +downloading_thumbnails = set() +def download_thumbnails(thumbnails_directory, thumbnails): + try: + g = gevent.spawn(util.download_thumbnails, thumbnails_directory, thumbnails) + g.join() + finally: + downloading_thumbnails.difference_update(thumbnails) + + def _get_upstream_videos(channel_id): videos = [] @@ -136,12 +147,34 @@ def _get_upstream_videos(channel_id): info['description'] = '' info['time_published'] = youtube_timestamp_to_posix(info['published']) - i # subtract a few seconds off the videos so they will be in the right order videos.append(info) + + try: + existing_thumbnails = set(os.path.splitext(name)[0] for name in os.listdir(thumbnails_directory)) + except FileNotFoundError: + existing_thumbnails = set() + missing_thumbnails = set(video['id'] for video in videos) - existing_thumbnails + downloading_thumbnails.update(missing_thumbnails) + gevent.spawn(download_thumbnails, thumbnails_directory, missing_thumbnails) + return videos + + + + + + + + def get_subscriptions_page(env, start_response): items_html = '''''' @@ -168,9 +201,9 @@ def post_subscriptions_page(env, start_response): connection = open_database() try: cursor = connection.cursor() - for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels'''): + for uploader_id, channel_id in cursor.execute('''SELECT id, channel_id FROM subscribed_channels''').fetchall(): db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id) ) - cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) + cursor.executemany('''INSERT OR IGNORE INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos) cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) ) connection.commit() diff --git a/youtube/util.py b/youtube/util.py index 9950815..42d76a3 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -5,6 +5,8 @@ import brotli import urllib.parse import re import time +import os +import gevent # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -176,6 +178,36 @@ desktop_ua = (('User-Agent', desktop_user_agent),) +def download_thumbnail(save_directory, video_id): + url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + save_location = os.path.join(save_directory, video_id + ".jpg") + try: + thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) + except urllib.error.HTTPError as e: + print("Failed to download thumbnail for " + video_id + ": " + str(e)) + return + try: + f = open(save_location, 'wb') + except FileNotFoundError: + os.makedirs(save_directory) + f = open(save_location, 'wb') + f.write(thumbnail) + f.close() + +def download_thumbnails(save_directory, ids): + if not isinstance(ids, (list, tuple)): + ids = list(ids) + # only do 5 at a time + # do the n where n is divisible by 5 + i = -1 + for i in range(0, int(len(ids)/5) - 1 ): + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) + # do the remainders (< 5) + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) + + + + def dict_add(*dicts): diff --git a/youtube/youtube.py b/youtube/youtube.py index 4ec7962..c629bbb 100644 --- a/youtube/youtube.py +++ b/youtube/youtube.py @@ -61,7 +61,7 @@ def youtube(env, start_response): start_response('200 OK', (('Content-type',mime_type),) ) return f.read() - elif path.startswith("/data/playlist_thumbnails/"): + elif path.startswith('/data/playlist_thumbnails/') or path.startswith('/data/subscription_thumbnails/'): with open(os.path.join(settings.data_dir, os.path.normpath(path[6:])), 'rb') as f: start_response('200 OK', (('Content-type', "image/jpeg"),) ) return f.read() -- cgit v1.2.3