From f25324794916dc51367e399b923e367854e5b5ee Mon Sep 17 00:00:00 2001 From: James Taylor Date: Tue, 18 Feb 2020 11:47:23 -0800 Subject: Fix MaxRetryError when checking for video URL access The default urllib3 max redirect amount was set to 3. Change it to 10 and do not fail if there is a problem with checking for URL access. Just print the error to the console and proceed. Also add an unrelated remark about the bcptr=9999999999 parameter in watch.py --- youtube/util.py | 12 ++++++++++-- youtube/watch.py | 14 ++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'youtube') diff --git a/youtube/util.py b/youtube/util.py index a5bd874..c7168a8 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -195,10 +195,18 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja return content, response return content -def head(url, use_tor=False, report_text=None): +def head(url, use_tor=False, report_text=None, max_redirects=10): pool = get_pool(use_tor and settings.route_tor) start_time = time.time() - response = pool.request('HEAD', url) + + # default: Retry.DEFAULT = Retry(3) + # (in connectionpool.py in urllib3) + # According to the documentation for urlopen, a redirect counts as a retry + # by default. So there are 3 redirects max by default. Let's change that + # to 10 since googlevideo redirects a lot. + retries = urllib3.Retry(3+max_redirects, redirect=max_redirects, + raise_on_redirect=False) + response = pool.request('HEAD', url, retries=retries) if report_text: print(report_text, ' Latency:', round(time.time() - start_time,3)) return response diff --git a/youtube/watch.py b/youtube/watch.py index 5b36462..f80229b 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -208,6 +208,8 @@ headers = ( ) + util.mobile_ua def extract_info(video_id): + # bpctr=9999999999 will bypass are-you-sure dialogs for controversial + # videos polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999', headers=headers, debug_name='watch').decode('utf-8') # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info try: @@ -237,8 +239,14 @@ def extract_info(video_id): # check for 403 info['invidious_used'] = False if settings.route_tor and info['formats'] and info['formats'][0]['url']: - response = util.head(info['formats'][0]['url'], - report_text='Checked for URL access') + try: + response = util.head(info['formats'][0]['url'], + report_text='Checked for URL access') + except urllib3.exceptions.HTTPError: + print('Error while checking for URL access:\n') + traceback.print_exc() + return info + if response.status == 403: print(('Access denied (403) for video urls.' ' Retrieving urls from Invidious...')) @@ -277,6 +285,8 @@ def extract_info(video_id): + itag + ' not found in invidious urls')) continue fmt['url'] = itag_to_url[itag] + elif 300 <= response.status < 400: + print('Error: exceeded max redirects while checking video URL') return info def video_quality_string(format): -- cgit v1.2.3