aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/util.py')
-rw-r--r--youtube/util.py69
1 files changed, 48 insertions, 21 deletions
diff --git a/youtube/util.py b/youtube/util.py
index 3a8fd01..5e60d1c 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,5 +1,6 @@
from datetime import datetime
import logging
+import random
import settings
import socks
import sockshandler
@@ -19,11 +20,11 @@ import gevent.queue
import gevent.lock
import collections
import stem
-
-logger = logging.getLogger(__name__)
import stem.control
import traceback
+logger = logging.getLogger(__name__)
+
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
# configured by the user will not work. Some draconian networks block TLS unless a corporate
@@ -54,8 +55,8 @@ import traceback
# https://github.com/kennethreitz/requests/issues/2966
# Until then, I will use a mix of urllib3 and urllib.
-import urllib3
-import urllib3.contrib.socks
+import urllib3 # noqa: E402 (imported here intentionally after the long note above)
+import urllib3.contrib.socks # noqa: E402
URL_ORIGIN = "/https://www.youtube.com"
@@ -177,7 +178,6 @@ def get_pool(use_tor):
class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
'''Separate cookiejars for receiving and sending'''
def __init__(self, cookiejar_send=None, cookiejar_receive=None):
- import http.cookiejar
self.cookiejar_send = cookiejar_send
self.cookiejar_receive = cookiejar_receive
@@ -208,6 +208,16 @@ class FetchError(Exception):
self.error_message = error_message
+def _noop_cleanup(response):
+ '''No-op cleanup used when the urllib opener owns the response.'''
+ return None
+
+
+def _release_conn_cleanup(response):
+ '''Release the urllib3 pooled connection back to the pool.'''
+ response.release_conn()
+
+
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
for encoding in reversed(encodings):
@@ -263,7 +273,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
- cleanup_func = (lambda r: None)
+ cleanup_func = _noop_cleanup
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
# default: Retry.DEFAULT = Retry(3)
@@ -297,7 +307,7 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
error_message=msg)
else:
raise
- cleanup_func = (lambda r: r.release_conn())
+ cleanup_func = _release_conn_cleanup
return response, cleanup_func
@@ -315,8 +325,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
Max retries: 5 attempts with exponential backoff
"""
- import random
-
max_retries = 5
base_delay = 1.0 # Base delay in seconds
@@ -401,7 +409,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
logger.error(f'Server error {response.status} after {max_retries} retries')
raise FetchError(str(response.status), reason=response.reason, ip=None)
- # Exponential backoff for server errors
+ # Exponential backoff for server errors. Non-crypto jitter.
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
@@ -432,7 +440,7 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
else:
raise
- # Wait and retry
+ # Wait and retry. Non-crypto jitter.
delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
time.sleep(delay)
@@ -532,30 +540,30 @@ class RateLimitedQueue(gevent.queue.Queue):
def download_thumbnail(save_directory, video_id):
- save_location = os.path.join(save_directory, video_id + ".jpg")
+ save_location = os.path.join(save_directory, video_id + '.jpg')
for quality in ('hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'):
- url = f"https://i.ytimg.com/vi/{video_id}/{quality}"
+ url = f'https://i.ytimg.com/vi/{video_id}/{quality}'
try:
- thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
+ thumbnail = fetch_url(url, report_text='Saved thumbnail: ' + video_id)
except FetchError as e:
if '404' in str(e):
continue
- print("Failed to download thumbnail for " + video_id + ": " + str(e))
+ print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
return False
except urllib.error.HTTPError as e:
if e.code == 404:
continue
- print("Failed to download thumbnail for " + video_id + ": " + str(e))
+ print('Failed to download thumbnail for ' + video_id + ': ' + str(e))
return False
try:
- f = open(save_location, 'wb')
+ with open(save_location, 'wb') as f:
+ f.write(thumbnail)
except FileNotFoundError:
os.makedirs(save_directory, exist_ok=True)
- f = open(save_location, 'wb')
- f.write(thumbnail)
- f.close()
+ with open(save_location, 'wb') as f:
+ f.write(thumbnail)
return True
- print("No thumbnail available for " + video_id)
+ print('No thumbnail available for ' + video_id)
return False
@@ -899,6 +907,25 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
'REQUIRE_JS_PLAYER': False,
},
+
+ 'ios_vr': {
+ 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'hl': 'en',
+ 'gl': 'US',
+ 'clientName': 'IOS_VR',
+ 'clientVersion': '1.0',
+ 'deviceMake': 'Apple',
+ 'deviceModel': 'iPhone16,2',
+ 'osName': 'iPhone',
+ 'osVersion': '18.7.2.22H124',
+ 'userAgent': 'com.google.ios.youtube/1.0 (iPhone16,2; U; CPU iOS 18_7_2 like Mac OS X)'
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
+ 'REQUIRE_JS_PLAYER': False
+ },
}
def get_visitor_data():