aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-12-24 21:15:18 -0800
committerJesús <heckyel@hyperbola.info>2020-12-25 22:56:19 -0500
commitb567a34ecd0c9c627d22faab97393107eccc314c (patch)
tree69f528fed664c05d92af87d708aa5a9488dd1a88
parent6145b66d7468661dc5d68853a5a5a1b53d697a90 (diff)
downloadyt-local-b567a34ecd0c9c627d22faab97393107eccc314c.tar.lz
yt-local-b567a34ecd0c9c627d22faab97393107eccc314c.tar.xz
yt-local-b567a34ecd0c9c627d22faab97393107eccc314c.zip
TorManager: Add delay before subsequent new identity retries
The request can be retried immediately after the first new identity, but if we do more new identities, we have to wait for at least 6 seconds before doing the request, otherwise it won't be done on a new ip based on my experiments. Potential issue: If after getting third new identity, request takes > 12 seconds (since timeout is 15) and returns 429, then the Tor Manager will let it do a 4th try instead of giving up (meaning request is taking forever from user's perspective). Should be a very rare occurence however. Signed-off-by: Jesús <heckyel@hyperbola.info>
-rw-r--r--youtube/util.py19
1 files changed, 18 insertions, 1 deletions
diff --git a/youtube/util.py b/youtube/util.py
index 1544c94..1d7eb59 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -61,7 +61,9 @@ connection_pool = urllib3.PoolManager(cert_reqs='CERT_REQUIRED')
class TorManager:
MAX_TRIES = 3
- COOLDOWN_TIME = 5
+ # Remember the 6-sec wait times, so make cooldown be two of those
+ # (otherwise it will retry forever if 429s never end)
+ COOLDOWN_TIME = 12
def __init__(self):
self.old_tor_connection_pool = None
@@ -99,6 +101,12 @@ class TorManager:
def new_identity(self, time_failed_request_started):
'''return error, or None if no error and the identity is fresh'''
+
+ # The overall pattern at maximum (always returning 429) will be
+ # R N (0) R N (6) R N (6) R | (12) R N (0) R N (6) ...
+ # where R is a request, N is a new identity, (x) is a wait time of
+ # x sec, and | is where we give up and display an error to the user.
+
print('new_identity: new_identity called')
# blocks if another greenlet currently has the lock
self.new_identity_lock.acquire()
@@ -133,9 +141,18 @@ class TorManager:
traceback.print_exc()
return 'Failed to connect to Tor control port.'
finally:
+ original_try_num = self.try_num
self.try_num += 1
if self.try_num > self.MAX_TRIES:
self.try_num = 1
+
+ # If we do the request right after second new identity it won't
+ # be a new IP, based on experiments.
+ # Not necessary after first new identity
+ if original_try_num > 1:
+ print('Sleeping for 6 seconds before retrying request')
+ time.sleep(6) # experimentally determined minimum
+
return None
finally:
self.new_identity_lock.release()