From f25324794916dc51367e399b923e367854e5b5ee Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Tue, 18 Feb 2020 11:47:23 -0800
Subject: Fix MaxRetryError when checking for video URL access The default
 urllib3 max redirect amount was set to 3. Change it to 10 and do not fail if
 there is a problem with checking for URL access. Just print the error to the
 console and proceed.

Also add an unrelated remark about the bcptr=9999999999 parameter in watch.py
---
 youtube/util.py  | 12 ++++++++++--
 youtube/watch.py | 14 ++++++++++++--
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'youtube')

diff --git a/youtube/util.py b/youtube/util.py
index a5bd874..c7168a8 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -195,10 +195,18 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
         return content, response
     return content
 
-def head(url, use_tor=False, report_text=None):
+def head(url, use_tor=False, report_text=None, max_redirects=10):
     pool = get_pool(use_tor and settings.route_tor)
     start_time = time.time()
-    response = pool.request('HEAD', url)
+
+    # default: Retry.DEFAULT = Retry(3)
+    # (in connectionpool.py in urllib3)
+    # According to the documentation for urlopen, a redirect counts as a retry
+    # by default. So there are 3 redirects max by default. Let's change that
+    # to 10 since googlevideo redirects a lot.
+    retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
+        raise_on_redirect=False)
+    response = pool.request('HEAD', url, retries=retries)
     if report_text:
         print(report_text, '    Latency:', round(time.time() - start_time,3))
     return response
diff --git a/youtube/watch.py b/youtube/watch.py
index 5b36462..f80229b 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -208,6 +208,8 @@ headers = (
 ) + util.mobile_ua
 
 def extract_info(video_id):
+    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
+    # videos
     polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999', headers=headers, debug_name='watch').decode('utf-8')
     # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
     try:
@@ -237,8 +239,14 @@ def extract_info(video_id):
     # check for 403
     info['invidious_used'] = False
     if settings.route_tor and info['formats'] and info['formats'][0]['url']:
-        response = util.head(info['formats'][0]['url'],
-            report_text='Checked for URL access')
+        try:
+            response = util.head(info['formats'][0]['url'],
+                report_text='Checked for URL access')
+        except urllib3.exceptions.HTTPError:
+            print('Error while checking for URL access:\n')
+            traceback.print_exc()
+            return info
+
         if response.status == 403:
             print(('Access denied (403) for video urls.'
                 ' Retrieving urls from Invidious...'))
@@ -277,6 +285,8 @@ def extract_info(video_id):
                         + itag + ' not found in invidious urls'))
                     continue
                 fmt['url'] = itag_to_url[itag]
+        elif 300 <= response.status < 400:
+            print('Error: exceeded max redirects while checking video URL')
     return info
 
 def video_quality_string(format):
-- 
cgit v1.2.3