aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/util.py')
-rw-r--r--youtube/util.py55
1 files changed, 37 insertions, 18 deletions
diff --git a/youtube/util.py b/youtube/util.py
index b19f91b..3c32ddb 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -54,7 +54,7 @@ URL_ORIGIN = "/https://www.youtube.com"
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
old_tor_connection_pool = None
-tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED')
+tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED')
tor_pool_refresh_time = time.monotonic() # prevent problems due to clock changes
@@ -74,7 +74,7 @@ def get_pool(use_tor):
# Keep a reference for 5 min to avoid it getting garbage collected while sockets still in use
old_tor_connection_pool = tor_connection_pool
- tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED')
+ tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED')
tor_pool_refresh_time = current_time
return tor_connection_pool
@@ -119,8 +119,11 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content)
return content
-def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
+def fetch_url_response(url, headers=(), timeout=15, data=None,
+ cookiejar_send=None, cookiejar_receive=None,
+ use_tor=True, max_redirects=None):
'''
+ returns response, cleanup_function
When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it)
When cookiejar_receive is set to a CookieJar object,
@@ -147,32 +150,51 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii')
- start_time = time.time()
-
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
if use_tor and settings.route_tor:
- opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
+ opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", settings.tor_port), cookie_processor)
else:
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
- response_time = time.time()
-
-
- content = response.read()
+ cleanup_func = (lambda r: None)
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
+ # default: Retry.DEFAULT = Retry(3)
+ # (in connectionpool.py in urllib3)
+ # According to the documentation for urlopen, a redirect counts as a
+ # retry. So there are 3 redirects max by default.
+ if max_redirects:
+ retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
+ else:
+ retries = urllib3.Retry(3)
pool = get_pool(use_tor and settings.route_tor)
+ response = pool.request(method, url, headers=headers,
+ timeout=timeout, preload_content=False,
+ decode_content=False, retries=retries)
+ cleanup_func = (lambda r: r.release_conn())
- response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
- response_time = time.time()
+ return response, cleanup_func
- content = response.read()
- response.release_conn()
+def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
+ cookiejar_send=None, cookiejar_receive=None, use_tor=True,
+ debug_name=None):
+ start_time = time.time()
+
+ response, cleanup_func = fetch_url_response(
+ url, headers, timeout=timeout,
+ cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
+ use_tor=use_tor)
+ response_time = time.time()
+
+ content = response.read()
+ read_finish = time.time()
+
+ cleanup_func(response) # release_connection for urllib3
if (response.status == 429
and content.startswith(b'<!DOCTYPE')
@@ -185,7 +207,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason, ip=None)
- read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
@@ -198,8 +219,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
- if return_response:
- return content, response
return content
def head(url, use_tor=False, report_text=None, max_redirects=10):
@@ -209,7 +228,7 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
# default: Retry.DEFAULT = Retry(3)
# (in connectionpool.py in urllib3)
# According to the documentation for urlopen, a redirect counts as a retry
- # by default. So there are 3 redirects max by default. Let's change that
+ # So there are 3 redirects max by default. Let's change that
# to 10 since googlevideo redirects a lot.
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
raise_on_redirect=False)