aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-12-21 11:59:35 -0800
committerJesús <heckyel@hyperbola.info>2020-12-21 18:23:09 -0500
commitb11120d000970304b01287a28d6494e4844cfced (patch)
treedf8076e1b2dbe8187f4317cd54a9c92e939d6626
parent574cb2dae8534a8abba5710217e4f6c8655ff854 (diff)
downloadyt-local-b11120d000970304b01287a28d6494e4844cfced.tar.lz
yt-local-b11120d000970304b01287a28d6494e4844cfced.tar.xz
yt-local-b11120d000970304b01287a28d6494e4844cfced.zip
Exit node retrying: Retry 3 times. Also add tests for it.
Closes #20 Signed-off-by: Jesús <heckyel@hyperbola.info>
-rw-r--r--pytest.ini4
-rw-r--r--requirements-dev.txt1
-rw-r--r--tests/conftest.py14
-rw-r--r--tests/test_responses/429.html28
-rw-r--r--tests/test_util.py76
-rw-r--r--youtube/util.py21
6 files changed, 140 insertions, 4 deletions
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..fb67425
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+# pytest.ini
+[pytest]
+testpaths =
+ tests
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..559a4fc
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
+pytest>=6.2.1
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..2694317
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,14 @@
+import pytest
+import urllib3
+import urllib
+import urllib.request
+import socket
+
+# https://realpython.com/pytest-python-testing/
+@pytest.fixture(autouse=True)
+def disable_network_calls(monkeypatch):
+ def stunted_get(*args, **kwargs):
+ raise RuntimeError('Network access not allowed during testing!')
+ monkeypatch.setattr(urllib.request, 'Request', stunted_get)
+ monkeypatch.setattr(urllib3.PoolManager, 'request', stunted_get)
+ monkeypatch.setattr(socket, 'socket', stunted_get)
diff --git a/tests/test_responses/429.html b/tests/test_responses/429.html
new file mode 100644
index 0000000..9bde0f9
--- /dev/null
+++ b/tests/test_responses/429.html
@@ -0,0 +1,28 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<head><meta http-equiv="content-type" content="text/html; charset=utf-8"><meta name="viewport" content="initial-scale=1"><title>https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999</title></head>
+<body style="font-family: arial, sans-serif; background-color: #fff; color: #000; padding:20px; font-size:18px;" onload="e=document.getElementById('captcha');if(e){e.focus();}">
+<div style="max-width:400px;">
+<hr noshade size="1" style="color:#ccc; background-color:#ccc;"><br>
+<form id="captcha-form" action="index" method="post">
+<script src="https://www.google.com/recaptcha/api.js" async defer></script>
+<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script>
+<div id="recaptcha" class="g-recaptcha" data-sitekey="6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b" data-callback="submitCallback" data-s="vJ20x5QPFGCo8r3XkMznOwMTCK8wPW_bLLhPDgo_I1cwF6xLuYZlq2G2wZPaSJiE8zx5YnaxJzFQGsyhY6NHQKMAaUTtSP6GAbPtueM35Jq3Hmk-gEAozXvvF0HIjK5oONT7F-06MwXDxA4HOqZyOEbsUG_8JjFcCklQjUNUVVItgyLpIbZ1dQ-IEtCXY5E3KDcgHGznfAyMGk_bby9uCpfxNTQwljGippKv1PIU7dI4d5LLpgBPWF0"></div>
+<input type='hidden' name='q' value='EhAgAUug_-oCrgAAAAAAAAoQGPe-9u8FIhkA8aeDS_-EXvhS86PaeaDvps8cqCssFqOzMgFy'><input type="hidden" name="continue" value="https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999">
+</form>
+<hr noshade size="1" style="color:#ccc; background-color:#ccc;">
+
+<div style="font-size:13px;">
+<b>About this page</b><br><br>
+
+Our systems have detected unusual traffic from your computer network. This page checks to see if it&#39;s really you sending the requests, and not a robot. <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br>
+
+<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;">
+This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop. In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests. If you share your network connection, ask your administrator for help &mdash; a different computer using the same IP address may be responsible. <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
+</div>
+
+IP address: 2001:4ba0:ffea:2ae::a10<br>Time: 2019-12-21T04:28:41Z<br>URL: https://m.youtube.com/watch?v=aaaaaaaaaaa&amp;pbj=1&amp;bpctr=9999999999<br>
+</div>
+</div>
+</body>
+</html>
diff --git a/tests/test_util.py b/tests/test_util.py
new file mode 100644
index 0000000..bc10de4
--- /dev/null
+++ b/tests/test_util.py
@@ -0,0 +1,76 @@
+from youtube import util
+import settings
+import pytest # overview: https://realpython.com/pytest-python-testing/
+import urllib3
+import io
+import os
+import stem
+
+
+def load_test_page(name):
+ with open(os.path.join('./tests/test_responses', name), 'rb') as f:
+ return f.read()
+
+
+html429 = load_test_page('429.html')
+
+
+class MockResponse(urllib3.response.HTTPResponse):
+ def __init__(self, body='success', headers=None, status=200, reason=''):
+ print(body[0:10])
+ headers = headers or {}
+ if isinstance(body, str):
+ body = body.encode('utf-8')
+ self.body_io = io.BytesIO(body)
+ self.read = self.body_io.read
+ urllib3.response.HTTPResponse.__init__(
+ self, body=body, headers=headers, status=status,
+ preload_content=False, decode_content=False, reason=reason
+ )
+
+
+class NewIdentityState():
+ MAX_TRIES = util.TorManager.MAX_TRIES
+ def __init__(self, new_identities_till_success):
+ self.new_identities_till_success = new_identities_till_success
+
+ def new_identity(self, *args, **kwargs):
+ print('newidentity')
+ self.new_identities_till_success -= 1
+
+ def fetch_url_response(self, *args, **kwargs):
+ cleanup_func = (lambda r: None)
+ if self.new_identities_till_success == 0:
+ return MockResponse(), cleanup_func
+ return MockResponse(body=html429, status=429), cleanup_func
+
+
+class MockController():
+ def authenticate(self, *args, **kwargs):
+ pass
+ @classmethod
+ def from_port(cls, *args, **kwargs):
+ return cls()
+ def __enter__(self, *args, **kwargs):
+ return self
+ def __exit__(self, *args, **kwargs):
+ pass
+
+
+@pytest.mark.parametrize('new_identities_till_success',
+ [i for i in range(0, NewIdentityState.MAX_TRIES+2)])
+def test_exit_node_retry(monkeypatch, new_identities_till_success):
+ new_identity_state = NewIdentityState(new_identities_till_success)
+ # https://docs.pytest.org/en/stable/monkeypatch.html
+ monkeypatch.setattr(settings, 'route_tor', 1)
+ monkeypatch.setattr(util, 'tor_manager', util.TorManager()) # fresh one
+ MockController.signal = new_identity_state.new_identity
+ monkeypatch.setattr(stem.control, 'Controller', MockController)
+ monkeypatch.setattr(util, 'fetch_url_response',
+ new_identity_state.fetch_url_response)
+ if new_identities_till_success <= NewIdentityState.MAX_TRIES:
+ assert util.fetch_url('url') == b'success'
+ else:
+ with pytest.raises(util.FetchError) as excinfo:
+ util.fetch_url('url')
+ assert int(excinfo.value.code) == 429
diff --git a/youtube/util.py b/youtube/util.py
index 355d8c7..1544c94 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -60,6 +60,9 @@ connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
class TorManager:
+ MAX_TRIES = 3
+ COOLDOWN_TIME = 5
+
def __init__(self):
self.old_tor_connection_pool = None
self.tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager(
@@ -69,6 +72,7 @@ class TorManager:
self.new_identity_lock = gevent.lock.BoundedSemaphore(1)
self.last_new_identity_time = time.monotonic() - 20
+ self.try_num = 1
def refresh_tor_connection_pool(self):
self.tor_connection_pool.clear()
@@ -108,9 +112,14 @@ class TorManager:
return None
delta = time.monotonic() - self.last_new_identity_time
- if delta < 20:
- print('new_identity: Retried already within last 20 seconds')
- return 'Retried with new circuit once (max) within last 20 seconds.'
+ if delta < self.COOLDOWN_TIME and self.try_num == 1:
+ err = ('Retried with new circuit %d times (max) within last '
+ '%d seconds.' % (self.MAX_TRIES, self.COOLDOWN_TIME))
+ print('new_identity:', err)
+ return err
+ elif delta >= self.COOLDOWN_TIME:
+ self.try_num = 1
+
try:
port = settings.tor_control_port
with stem.control.Controller.from_port(port=port) as controller:
@@ -120,10 +129,14 @@ class TorManager:
print('new_identity: NEWNYM signal sent')
self.last_new_identity_time = time.monotonic()
self.refresh_tor_connection_pool()
- return None
except stem.SocketError:
traceback.print_exc()
return 'Failed to connect to Tor control port.'
+ finally:
+ self.try_num += 1
+ if self.try_num > self.MAX_TRIES:
+ self.try_num = 1
+ return None
finally:
self.new_identity_lock.release()