diff options
author | James Taylor <user234683@users.noreply.github.com> | 2020-12-21 11:59:35 -0800 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2020-12-21 18:23:09 -0500 |
commit | b11120d000970304b01287a28d6494e4844cfced (patch) | |
tree | df8076e1b2dbe8187f4317cd54a9c92e939d6626 | |
parent | 574cb2dae8534a8abba5710217e4f6c8655ff854 (diff) | |
download | yt-local-b11120d000970304b01287a28d6494e4844cfced.tar.lz yt-local-b11120d000970304b01287a28d6494e4844cfced.tar.xz yt-local-b11120d000970304b01287a28d6494e4844cfced.zip |
Exit node retrying: Retry 3 times. Also add tests for it.
Closes #20
Signed-off-by: Jesús <heckyel@hyperbola.info>
-rw-r--r-- | pytest.ini | 4 | ||||
-rw-r--r-- | requirements-dev.txt | 1 | ||||
-rw-r--r-- | tests/conftest.py | 14 | ||||
-rw-r--r-- | tests/test_responses/429.html | 28 | ||||
-rw-r--r-- | tests/test_util.py | 76 | ||||
-rw-r--r-- | youtube/util.py | 21 |
6 files changed, 140 insertions, 4 deletions
diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..fb67425 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,4 @@ +# pytest.ini +[pytest] +testpaths = + tests diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..559a4fc --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1 @@ +pytest>=6.2.1 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2694317 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,14 @@ +import pytest +import urllib3 +import urllib +import urllib.request +import socket + +# https://realpython.com/pytest-python-testing/ +@pytest.fixture(autouse=True) +def disable_network_calls(monkeypatch): + def stunted_get(*args, **kwargs): + raise RuntimeError('Network access not allowed during testing!') + monkeypatch.setattr(urllib.request, 'Request', stunted_get) + monkeypatch.setattr(urllib3.PoolManager, 'request', stunted_get) + monkeypatch.setattr(socket, 'socket', stunted_get) diff --git a/tests/test_responses/429.html b/tests/test_responses/429.html new file mode 100644 index 0000000..9bde0f9 --- /dev/null +++ b/tests/test_responses/429.html @@ -0,0 +1,28 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> +<html> +<head><meta http-equiv="content-type" content="text/html; charset=utf-8"><meta name="viewport" content="initial-scale=1"><title>https://m.youtube.com/watch?v=aaaaaaaaaaa&pbj=1&bpctr=9999999999</title></head> +<body style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;" onload="e=document.getElementById('captcha');if(e){e.focus();}"> +<div style="max-width:400px;"> +<hr noshade size="1" style="color:#ccc; background-color:#ccc;"><br> +<form id="captcha-form" action="index" method="post"> +<script src="https://www.google.com/recaptcha/api.js" async defer></script> +<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script> +<div id="recaptcha" class="g-recaptcha" data-sitekey="6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b" data-callback="submitCallback" data-s="vJ20x5QPFGCo8r3XkMznOwMTCK8wPW_bLLhPDgo_I1cwF6xLuYZlq2G2wZPaSJiE8zx5YnaxJzFQGsyhY6NHQKMAaUTtSP6GAbPtueM35Jq3Hmk-gEAozXvvF0HIjK5oONT7F-06MwXDxA4HOqZyOEbsUG_8JjFcCklQjUNUVVItgyLpIbZ1dQ-IEtCXY5E3KDcgHGznfAyMGk_bby9uCpfxNTQwljGippKv1PIU7dI4d5LLpgBPWF0"></div> +<input type='hidden' name='q' value='EhAgAUug_-oCrgAAAAAAAAoQGPe-9u8FIhkA8aeDS_-EXvhS86PaeaDvps8cqCssFqOzMgFy'><input type="hidden" name="continue" value="https://m.youtube.com/watch?v=aaaaaaaaaaa&pbj=1&bpctr=9999999999"> +</form> +<hr noshade size="1" style="color:#ccc; background-color:#ccc;"> + +<div style="font-size:13px;"> +<b>About this page</b><br><br> + +Our systems have detected unusual traffic from your computer network. This page checks to see if it's really you sending the requests, and not a robot. <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br> + +<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;"> +This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop. In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests. If you share your network connection, ask your administrator for help — a different computer using the same IP address may be responsible. <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly. +</div> + +IP address: 2001:4ba0:ffea:2ae::a10<br>Time: 2019-12-21T04:28:41Z<br>URL: https://m.youtube.com/watch?v=aaaaaaaaaaa&pbj=1&bpctr=9999999999<br> +</div> +</div> +</body> +</html> diff --git a/tests/test_util.py b/tests/test_util.py new file mode 100644 index 0000000..bc10de4 --- /dev/null +++ b/tests/test_util.py @@ -0,0 +1,76 @@ +from youtube import util +import settings +import pytest # overview: https://realpython.com/pytest-python-testing/ +import urllib3 +import io +import os +import stem + + +def load_test_page(name): + with open(os.path.join('./tests/test_responses', name), 'rb') as f: + return f.read() + + +html429 = load_test_page('429.html') + + +class MockResponse(urllib3.response.HTTPResponse): + def __init__(self, body='success', headers=None, status=200, reason=''): + print(body[0:10]) + headers = headers or {} + if isinstance(body, str): + body = body.encode('utf-8') + self.body_io = io.BytesIO(body) + self.read = self.body_io.read + urllib3.response.HTTPResponse.__init__( + self, body=body, headers=headers, status=status, + preload_content=False, decode_content=False, reason=reason + ) + + +class NewIdentityState(): + MAX_TRIES = util.TorManager.MAX_TRIES + def __init__(self, new_identities_till_success): + self.new_identities_till_success = new_identities_till_success + + def new_identity(self, *args, **kwargs): + print('newidentity') + self.new_identities_till_success -= 1 + + def fetch_url_response(self, *args, **kwargs): + cleanup_func = (lambda r: None) + if self.new_identities_till_success == 0: + return MockResponse(), cleanup_func + return MockResponse(body=html429, status=429), cleanup_func + + +class MockController(): + def authenticate(self, *args, **kwargs): + pass + @classmethod + def from_port(cls, *args, **kwargs): + return cls() + def __enter__(self, *args, **kwargs): + return self + def __exit__(self, *args, **kwargs): + pass + + +@pytest.mark.parametrize('new_identities_till_success', + [i for i in range(0, NewIdentityState.MAX_TRIES+2)]) +def test_exit_node_retry(monkeypatch, new_identities_till_success): + new_identity_state = NewIdentityState(new_identities_till_success) + # https://docs.pytest.org/en/stable/monkeypatch.html + monkeypatch.setattr(settings, 'route_tor', 1) + monkeypatch.setattr(util, 'tor_manager', util.TorManager()) # fresh one + MockController.signal = new_identity_state.new_identity + monkeypatch.setattr(stem.control, 'Controller', MockController) + monkeypatch.setattr(util, 'fetch_url_response', + new_identity_state.fetch_url_response) + if new_identities_till_success <= NewIdentityState.MAX_TRIES: + assert util.fetch_url('url') == b'success' + else: + with pytest.raises(util.FetchError) as excinfo: + util.fetch_url('url') + assert int(excinfo.value.code) == 429 diff --git a/youtube/util.py b/youtube/util.py index 355d8c7..1544c94 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -60,6 +60,9 @@ connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED') class TorManager: + MAX_TRIES = 3 + COOLDOWN_TIME = 5 + def __init__(self): self.old_tor_connection_pool = None self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager( @@ -69,6 +72,7 @@ class TorManager: self.new_identity_lock = gevent.lock.BoundedSemaphore(1) self.last_new_identity_time = time.monotonic() - 20 + self.try_num = 1 def refresh_tor_connection_pool(self): self.tor_connection_pool.clear() @@ -108,9 +112,14 @@ class TorManager: return None delta = time.monotonic() - self.last_new_identity_time - if delta < 20: - print('new_identity: Retried already within last 20 seconds') - return 'Retried with new circuit once (max) within last 20 seconds.' + if delta < self.COOLDOWN_TIME and self.try_num == 1: + err = ('Retried with new circuit %d times (max) within last ' + '%d seconds.' % (self.MAX_TRIES, self.COOLDOWN_TIME)) + print('new_identity:', err) + return err + elif delta >= self.COOLDOWN_TIME: + self.try_num = 1 + try: port = settings.tor_control_port with stem.control.Controller.from_port(port=port) as controller: @@ -120,10 +129,14 @@ class TorManager: print('new_identity: NEWNYM signal sent') self.last_new_identity_time = time.monotonic() self.refresh_tor_connection_pool() - return None except stem.SocketError: traceback.print_exc() return 'Failed to connect to Tor control port.' + finally: + self.try_num += 1 + if self.try_num > self.MAX_TRIES: + self.try_num = 1 + return None finally: self.new_identity_lock.release() |