From ae5fd9eb009c6522e7d7971e0bdea57faaaf2b3c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 5 Jun 2019 00:41:15 -0700 Subject: Make thumbnails work and other stuff --- youtube/util.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'youtube/util.py') diff --git a/youtube/util.py b/youtube/util.py index 9950815..42d76a3 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -5,6 +5,8 @@ import brotli import urllib.parse import re import time +import os +import gevent # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -176,6 +178,36 @@ desktop_ua = (('User-Agent', desktop_user_agent),) +def download_thumbnail(save_directory, video_id): + url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg" + save_location = os.path.join(save_directory, video_id + ".jpg") + try: + thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) + except urllib.error.HTTPError as e: + print("Failed to download thumbnail for " + video_id + ": " + str(e)) + return + try: + f = open(save_location, 'wb') + except FileNotFoundError: + os.makedirs(save_directory) + f = open(save_location, 'wb') + f.write(thumbnail) + f.close() + +def download_thumbnails(save_directory, ids): + if not isinstance(ids, (list, tuple)): + ids = list(ids) + # only do 5 at a time + # do the n where n is divisible by 5 + i = -1 + for i in range(0, int(len(ids)/5) - 1 ): + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5, i*5 + 5)]) + # do the remainders (< 5) + gevent.joinall([gevent.spawn(download_thumbnail, save_directory, ids[j]) for j in range(i*5 + 5, len(ids))]) + + + + def dict_add(*dicts): -- cgit v1.2.3 From 27ee2990e97b1e73df41c1512332a1facd56f759 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 9 Jun 2019 16:03:20 -0700 Subject: Overhaul refresh system, make it asynchronous --- youtube/util.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) (limited to 'youtube/util.py') diff --git a/youtube/util.py b/youtube/util.py index 42d76a3..c4e1aff 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -7,6 +7,8 @@ import re import time import os import gevent +import gevent.queue +import gevent.lock # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -176,6 +178,53 @@ desktop_ua = (('User-Agent', desktop_user_agent),) +class RateLimitedQueue(gevent.queue.Queue): + ''' Does initial_burst (def. 30) at first, then alternates between waiting waiting_period (def. 5) seconds and doing subsequent_bursts (def. 10) queries. After 5 seconds with nothing left in the queue, resets rate limiting. ''' + + def __init__(self, initial_burst=30, waiting_period=5, subsequent_bursts=10): + self.initial_burst = initial_burst + self.waiting_period = waiting_period + self.subsequent_bursts = subsequent_bursts + + self.count_since_last_wait = 0 + self.surpassed_initial = False + + self.lock = gevent.lock.BoundedSemaphore(1) + self.currently_empty = False + self.empty_start = 0 + gevent.queue.Queue.__init__(self) + + + def get(self): + self.lock.acquire() # blocks if another greenlet currently has the lock + if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 + + elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial: + self.surpassed_initial = True + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 + + self.count_since_last_wait += 1 + + if not self.currently_empty and self.empty(): + self.currently_empty = True + self.empty_start = time.monotonic() + + item = gevent.queue.Queue.get(self) # blocks when nothing left + + if self.currently_empty: + if time.monotonic() - self.empty_start >= self.waiting_period: + self.count_since_last_wait = 0 + self.surpassed_initial = False + + self.currently_empty = False + + self.lock.release() + + return item + def download_thumbnail(save_directory, video_id): @@ -185,14 +234,15 @@ def download_thumbnail(save_directory, video_id): thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) except urllib.error.HTTPError as e: print("Failed to download thumbnail for " + video_id + ": " + str(e)) - return + return False try: f = open(save_location, 'wb') except FileNotFoundError: - os.makedirs(save_directory) + os.makedirs(save_directory, exist_ok = True) f = open(save_location, 'wb') f.write(thumbnail) f.close() + return True def download_thumbnails(save_directory, ids): if not isinstance(ids, (list, tuple)): -- cgit v1.2.3