diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/__init__.py | 4 | ||||
-rw-r--r-- | youtube/templates/shared.css | 1 | ||||
-rw-r--r-- | youtube/util.py | 162 |
3 files changed, 121 insertions, 46 deletions
diff --git a/youtube/__init__.py b/youtube/__init__.py index 6d79e44..a8ca227 100644 --- a/youtube/__init__.py +++ b/youtube/__init__.py @@ -68,8 +68,10 @@ def error_page(e): error_message = ('Error: Youtube blocked the request because the Tor' ' exit node is overutilized. Try getting a new exit node by' ' using the New Identity button in the Tor Browser.') + if exc_info()[1].error_message: + error_message += '\n\n' + exc_info()[1].error_message if exc_info()[1].ip: - error_message += ' Exit node IP address: ' + exc_info()[1].ip + error_message += '\n\nExit node IP address: ' + exc_info()[1].ip return flask.render_template('error.html', error_message=error_message, slim=slim), 502 return flask.render_template('error.html', traceback=traceback.format_exc(), slim=slim), 500 diff --git a/youtube/templates/shared.css b/youtube/templates/shared.css index 2288a34..3dd48ac 100644 --- a/youtube/templates/shared.css +++ b/youtube/templates/shared.css @@ -341,6 +341,7 @@ h1{ font-weight: normal; } #error-box, #error-message{ + white-space: pre-wrap; background-color: var(--interface-color); width: 80%; margin: auto; diff --git a/youtube/util.py b/youtube/util.py index 8d0f8ca..ccdcbc1 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -16,6 +16,9 @@ import gevent import gevent.queue import gevent.lock import collections +import stem +import stem.control +import traceback # The trouble with the requests library: It ships its own certificate bundle via certifi # instead of using the system certificate store, meaning self-signed certificates @@ -54,32 +57,81 @@ URL_ORIGIN = "/https://www.youtube.com" connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED') -old_tor_connection_pool = None -tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED') +class TorManager: + def __init__(self): + self.old_tor_connection_pool = None + self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager( + 'socks5://127.0.0.1:' + str(settings.tor_port) + '/', + cert_reqs = 'CERT_REQUIRED') + self.tor_pool_refresh_time = time.monotonic() -tor_pool_refresh_time = time.monotonic() # prevent problems due to clock changes + self.new_identity_lock = gevent.lock.BoundedSemaphore(1) + self.last_new_identity_time = time.monotonic() - 20 -def get_pool(use_tor): - global old_tor_connection_pool - global tor_connection_pool - global tor_pool_refresh_time + def refresh_tor_connection_pool(self): + self.tor_connection_pool.clear() - if not use_tor: - return connection_pool + # Keep a reference for 5 min to avoid it getting garbage collected + # while sockets still in use + self.old_tor_connection_pool = self.tor_connection_pool - # Tor changes circuits after 10 minutes: https://tor.stackexchange.com/questions/262/for-how-long-does-a-circuit-stay-alive - current_time = time.monotonic() - if current_time - tor_pool_refresh_time > 300: # close pool after 5 minutes - tor_connection_pool.clear() + self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager( + 'socks5://127.0.0.1:' + str(settings.tor_port) + '/', + cert_reqs = 'CERT_REQUIRED') + self.tor_pool_refresh_time = time.monotonic() - # Keep a reference for 5 min to avoid it getting garbage collected while sockets still in use - old_tor_connection_pool = tor_connection_pool + def get_tor_connection_pool(self): + # Tor changes circuits after 10 minutes: + # https://tor.stackexchange.com/questions/262/for-how-long-does-a-circuit-stay-alive + current_time = time.monotonic() - tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED') - tor_pool_refresh_time = current_time + # close pool after 5 minutes + if current_time - self.tor_pool_refresh_time > 300: + self.refresh_tor_connection_pool() - return tor_connection_pool + return self.tor_connection_pool + def new_identity(self, time_failed_request_started): + '''return error, or None if no error and the identity is fresh''' + print('new_identity: new_identity called') + # blocks if another greenlet currently has the lock + self.new_identity_lock.acquire() + print('new_identity: New identity lock acquired') + + try: + # This was caused by a request that failed within a previous, + # stale identity + if time_failed_request_started <= self.last_new_identity_time: + print('new_identity: Cancelling; request was from stale identity') + return None + + delta = time.monotonic() - self.last_new_identity_time + if delta < 20: + print('new_identity: Retried already within last 20 seconds') + return 'Retried with new circuit once (max) within last 20 seconds.' + try: + port = settings.tor_control_port + with stem.control.Controller.from_port(port=port) as controller: + controller.authenticate() + print('new_identity: Getting new identity') + controller.signal(stem.Signal.NEWNYM) + print('new_identity: NEWNYM signal sent') + self.last_new_identity_time = time.monotonic() + self.refresh_tor_connection_pool() + return None + except stem.SocketError: + traceback.print_exc() + return 'Failed to connect to Tor control port.' + finally: + self.new_identity_lock.release() + +tor_manager = TorManager() + + +def get_pool(use_tor): + if not use_tor: + return connection_pool + return tor_manager.get_tor_connection_pool() class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): @@ -103,11 +155,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): https_response = http_response class FetchError(Exception): - def __init__(self, code, reason='', ip=None): + def __init__(self, code, reason='', ip=None, error_message=None): Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason) self.code = code self.reason = reason self.ip = ip + self.error_message = error_message def decode_content(content, encoding_header): encodings = encoding_header.replace(' ', '').split(',') @@ -184,32 +237,51 @@ def fetch_url_response(url, headers=(), timeout=15, data=None, def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, debug_name=None): - start_time = time.time() - - response, cleanup_func = fetch_url_response( - url, headers, timeout=timeout, - cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, - use_tor=use_tor) - response_time = time.time() - - content = response.read() - read_finish = time.time() - - cleanup_func(response) # release_connection for urllib3 - content = decode_content( - content, - response.getheader('Content-Encoding', default='identity')) - - if (response.status == 429 - and content.startswith(b'<!DOCTYPE') - and b'Our systems have detected unusual traffic' in content): - ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', - content) - ip = ip.group(1).decode('ascii') if ip else None - raise FetchError('429', reason=response.reason, ip=ip) - - elif response.status >= 400: - raise FetchError(str(response.status), reason=response.reason, ip=None) + while True: + start_time = time.time() + + response, cleanup_func = fetch_url_response( + url, headers, timeout=timeout, + cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, + use_tor=use_tor) + response_time = time.time() + + content = response.read() + + read_finish = time.time() + + cleanup_func(response) # release_connection for urllib3 + content = decode_content( + content, + response.getheader('Content-Encoding', default='identity')) + + if (response.status == 429 + and content.startswith(b'<!DOCTYPE') + and b'Our systems have detected unusual traffic' in content): + ip = re.search( + br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', + content) + ip = ip.group(1).decode('ascii') if ip else None + + # don't get new identity if we're not using Tor + if not use_tor: + raise FetchError('429', reason=response.reason, ip=ip) + + print('Error: Youtube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip) + + # get new identity + error = tor_manager.new_identity(start_time) + if error: + raise FetchError( + '429', reason=response.reason, ip=ip, + error_message='Automatic circuit change: ' + error) + else: + continue # retry now that we have new identity + + elif response.status >= 400: + raise FetchError(str(response.status), reason=response.reason, + ip=None) + break if report_text: print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) |