diff options
Diffstat (limited to 'youtube/util.py')
| -rw-r--r-- | youtube/util.py | 69 |
1 files changed, 48 insertions, 21 deletions
diff --git a/youtube/util.py b/youtube/util.py index 3a8fd01..5e60d1c 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -1,5 +1,6 @@ from datetime import datetime import logging +import random import settings import socks import sockshandler @@ -19,11 +20,11 @@ import gevent.queue import gevent.lock import collections import stem - -logger = logging.getLogger(__name__) import stem.control import traceback +logger = logging.getLogger(__name__) + # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates # configured by the user will not work. Some draconian networks block TLS unless a corporate @@ -54,8 +55,8 @@ import traceback # https://github.com/kennethreitz/requests/issues/2966 # Until then, I will use a mix of urllib3 and urllib. -import urllib3 -import urllib3.contrib.socks +import urllib3 # noqa: E402 (imported here intentionally after the long note above) +import urllib3.contrib.socks # noqa: E402 URL_ORIGIN = "/https://www.youtube.com" @@ -177,7 +178,6 @@ def get_pool(use_tor): class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): '''Separate cookiejars for receiving and sending''' def __init__(self, cookiejar_send=None, cookiejar_receive=None): - import http.cookiejar self.cookiejar_send = cookiejar_send self.cookiejar_receive = cookiejar_receive @@ -208,6 +208,16 @@ class FetchError(Exception): self.error_message = error_message +def _noop_cleanup(response): + '''No-op cleanup used when the urllib opener owns the response.''' + return None + + +def _release_conn_cleanup(response): + '''Release the urllib3 pooled connection back to the pool.''' + response.release_conn() + + def decode_content(content, encoding_header): encodings = encoding_header.replace(' ', '').split(',') for encoding in reversed(encodings): @@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None, opener = urllib.request.build_opener(cookie_processor) response = opener.open(req, timeout=timeout) - cleanup_func = (lambda r: None) + cleanup_func = _noop_cleanup else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. # default: Retry.DEFAULT = Retry(3) @@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None, error_message=msg) else: raise - cleanup_func = (lambda r: r.release_conn()) + cleanup_func = _release_conn_cleanup return response, cleanup_func @@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, Max retries: 5 attempts with exponential backoff """ - import random - max_retries = 5 base_delay = 1.0 # Base delay in seconds @@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, logger.error(f'Server error {response.status} after {max_retries} retries') raise FetchError(str(response.status), reason=response.reason, ip=None) - # Exponential backoff for server errors + # Exponential backoff for server errors. Non-crypto jitter. delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1) logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...') time.sleep(delay) @@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, else: raise - # Wait and retry + # Wait and retry. Non-crypto jitter. delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1) logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...') time.sleep(delay) @@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue): def download_thumbnail(save_directory, video_id): - save_location = os.path.join(save_directory, video_id + ".jpg") + save_location = os.path.join(save_directory, video_id + '.jpg') for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'): - url = f"https://i.ytimg.com/vi/{video_id}/{quality}" + url = f'https://i.ytimg.com/vi/{video_id}/{quality}' try: - thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) + thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id) except FetchError as e: if '404' in str(e): continue - print("Failed to download thumbnail for " + video_id + ": " + str(e)) + print('Failed to download thumbnail for ' + video_id + ': ' + str(e)) return False except urllib.error.HTTPError as e: if e.code == 404: continue - print("Failed to download thumbnail for " + video_id + ": " + str(e)) + print('Failed to download thumbnail for ' + video_id + ': ' + str(e)) return False try: - f = open(save_location, 'wb') + with open(save_location, 'wb') as f: + f.write(thumbnail) except FileNotFoundError: os.makedirs(save_directory, exist_ok=True) - f = open(save_location, 'wb') - f.write(thumbnail) - f.close() + with open(save_location, 'wb') as f: + f.write(thumbnail) return True - print("No thumbnail available for " + video_id) + print('No thumbnail available for ' + video_id) return False @@ -899,6 +907,25 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'REQUIRE_JS_PLAYER': False, }, + + 'ios_vr': { + 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', + 'INNERTUBE_CONTEXT': { + 'client': { + 'hl': 'en', + 'gl': 'US', + 'clientName': 'IOS_VR', + 'clientVersion': '1.0', + 'deviceMake': 'Apple', + 'deviceModel': 'iPhone16,2', + 'osName': 'iPhone', + 'osVersion': '18.7.2.22H124', + 'userAgent': 'com.google.ios.youtube/1.0 (iPhone16,2; U; CPU iOS 18_7_2 like Mac OS X)' + } + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'REQUIRE_JS_PLAYER': False + }, } def get_visitor_data(): |
