diff options
Diffstat (limited to 'youtube/util.py')
-rw-r--r-- | youtube/util.py | 55 |
1 files changed, 37 insertions, 18 deletions
diff --git a/youtube/util.py b/youtube/util.py index b19f91b..3c32ddb 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -54,7 +54,7 @@ URL_ORIGIN = "/https://www.youtube.com" connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED') old_tor_connection_pool = None -tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED') +tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED') tor_pool_refresh_time = time.monotonic() # prevent problems due to clock changes @@ -74,7 +74,7 @@ def get_pool(use_tor): # Keep a reference for 5 min to avoid it getting garbage collected while sockets still in use old_tor_connection_pool = tor_connection_pool - tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED') + tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED') tor_pool_refresh_time = current_time return tor_connection_pool @@ -119,8 +119,11 @@ def decode_content(content, encoding_header): content = gzip.decompress(content) return content -def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None): +def fetch_url_response(url, headers=(), timeout=15, data=None, + cookiejar_send=None, cookiejar_receive=None, + use_tor=True, max_redirects=None): ''' + returns response, cleanup_function When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) When cookiejar_receive is set to a CookieJar object, @@ -147,32 +150,51 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja elif not isinstance(data, bytes): data = urllib.parse.urlencode(data).encode('ascii') - start_time = time.time() - if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib req = urllib.request.Request(url, data=data, headers=headers) cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive) if use_tor and settings.route_tor: - opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor) + opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", settings.tor_port), cookie_processor) else: opener = urllib.request.build_opener(cookie_processor) response = opener.open(req, timeout=timeout) - response_time = time.time() - - - content = response.read() + cleanup_func = (lambda r: None) else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. + # default: Retry.DEFAULT = Retry(3) + # (in connectionpool.py in urllib3) + # According to the documentation for urlopen, a redirect counts as a + # retry. So there are 3 redirects max by default. + if max_redirects: + retries = urllib3.Retry(3+max_redirects, redirect=max_redirects) + else: + retries = urllib3.Retry(3) pool = get_pool(use_tor and settings.route_tor) + response = pool.request(method, url, headers=headers, + timeout=timeout, preload_content=False, + decode_content=False, retries=retries) + cleanup_func = (lambda r: r.release_conn()) - response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False) - response_time = time.time() + return response, cleanup_func - content = response.read() - response.release_conn() +def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, + cookiejar_send=None, cookiejar_receive=None, use_tor=True, + debug_name=None): + start_time = time.time() + + response, cleanup_func = fetch_url_response( + url, headers, timeout=timeout, + cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, + use_tor=use_tor) + response_time = time.time() + + content = response.read() + read_finish = time.time() + + cleanup_func(response) # release_connection for urllib3 if (response.status == 429 and content.startswith(b'<!DOCTYPE') @@ -185,7 +207,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja elif response.status >= 400: raise FetchError(str(response.status), reason=response.reason, ip=None) - read_finish = time.time() if report_text: print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) content = decode_content(content, response.getheader('Content-Encoding', default='identity')) @@ -198,8 +219,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja with open(os.path.join(save_dir, debug_name), 'wb') as f: f.write(content) - if return_response: - return content, response return content def head(url, use_tor=False, report_text=None, max_redirects=10): @@ -209,7 +228,7 @@ def head(url, use_tor=False, report_text=None, max_redirects=10): # default: Retry.DEFAULT = Retry(3) # (in connectionpool.py in urllib3) # According to the documentation for urlopen, a redirect counts as a retry - # by default. So there are 3 redirects max by default. Let's change that + # So there are 3 redirects max by default. Let's change that # to 10 since googlevideo redirects a lot. retries = urllib3.Retry(3+max_redirects, redirect=max_redirects, raise_on_redirect=False) |