Fix MaxRetryErrors due to Tor exit node blockage

Sometimes YouTube redirects to a google.com/sorry page, seemingly setting up redirect loops. Other times the url redirects to itself. Signed-off-by: Jesús <heckyel@hyperbola.info>
author: James Taylor <user234683@users.noreply.github.com> 2022-02-15 00:24:48 -0800
committer: Jesús <heckyel@hyperbola.info> 2022-02-15 21:30:47 -0500
commit: 5260716d14dea32f67ac805ca187e97723c3d6ce (patch)
tree: d045377dae7777bf3def8acbd8fc1693e865ddf7 /youtube
parent: 32d30bde9c1fb006348ce99f239f9f74f35607e4 (diff)
download: yt-local-5260716d14dea32f67ac805ca187e97723c3d6ce.tar.lz
yt-local-5260716d14dea32f67ac805ca187e97723c3d6ce.tar.xz
yt-local-5260716d14dea32f67ac805ca187e97723c3d6ce.zip
1 files changed, 16 insertions, 3 deletions
diff --git a/youtube/util.py b/youtube/util.py
index 4070f12..05f78b9 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -268,14 +268,15 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
         # According to the documentation for urlopen, a redirect counts as a
         # retry. So there are 3 redirects max by default.
         if max_redirects:
-            retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
+            retries = urllib3.Retry(3+max_redirects, redirect=max_redirects, raise_on_redirect=False)
         else:
-            retries = urllib3.Retry(3)
+            retries = urllib3.Retry(3, raise_on_redirect=False)
         pool = get_pool(use_tor and settings.route_tor)
         try:
             response = pool.request(method, url, headers=headers, body=data,
                                     timeout=timeout, preload_content=False,
                                     decode_content=False, retries=retries)
+            response.retries = retries
         except urllib3.exceptions.MaxRetryError as e:
             exception_cause = e.__context__.__context__
             if (isinstance(exception_cause, socks.ProxyConnectionError)
@@ -328,11 +329,23 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
             with open(os.path.join(save_dir, debug_name), 'wb') as f:
                 f.write(content)
 
-        if response.status == 429:
+        if response.status == 429 or (
+            response.status == 302 and (response.getheader('Location') == url
+                or response.getheader('Location').startswith(
+                       'https://www.google.com/sorry/index'
+                   )
+            )
+        ):
+            print(response.status, response.reason, response.retries.history,
+                  response.getheaders())
             ip = re.search(
                 br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
                 content)
             ip = ip.group(1).decode('ascii') if ip else None
+            if not ip:
+                ip = re.search(r'IP=((?:\d+\.)+\d+)',
+                               response.getheader('Set-Cookie'))
+                ip = ip.group(1) if ip else None
 
             # don't get new identity if we're not using Tor
             if not use_tor:
author	James Taylor <user234683@users.noreply.github.com>	2022-02-15 00:24:48 -0800
committer	Jesús <heckyel@hyperbola.info>	2022-02-15 21:30:47 -0500
commit	5260716d14dea32f67ac805ca187e97723c3d6ce (patch)
tree	d045377dae7777bf3def8acbd8fc1693e865ddf7 /youtube
parent	32d30bde9c1fb006348ce99f239f9f74f35607e4 (diff)
download	yt-local-5260716d14dea32f67ac805ca187e97723c3d6ce.tar.lz yt-local-5260716d14dea32f67ac805ca187e97723c3d6ce.tar.xz yt-local-5260716d14dea32f67ac805ca187e97723c3d6ce.zip