From 7c2736aa26cb1700297c9fcd6e6ffa5516243a92 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 1 Feb 2020 15:09:37 -0800 Subject: Check for 403 errors and fallback on Invidious 403 errors on the video urls happen typically when a video has copyrighted content or was livestreamed originally. They appear to not happen (or at least happen less frequently) if the Tor exit node used ipv6, however. --- youtube/util.py | 8 ++++++ youtube/watch.py | 53 +++++++++++++++++++++++++++++++++++++ youtube/yt_data_extract/__init__.py | 3 ++- 3 files changed, 63 insertions(+), 1 deletion(-) (limited to 'youtube') diff --git a/youtube/util.py b/youtube/util.py index f209060..a5bd874 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -195,6 +195,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja return content, response return content +def head(url, use_tor=False, report_text=None): + pool = get_pool(use_tor and settings.route_tor) + start_time = time.time() + response = pool.request('HEAD', url) + if report_text: + print(report_text, ' Latency:', round(time.time() - start_time,3)) + return response + mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_ua = (('User-Agent', mobile_user_agent),) desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' diff --git a/youtube/watch.py b/youtube/watch.py index 388a8e1..ceca4cd 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -12,6 +12,8 @@ import os import math import traceback import urllib +import re +import urllib3.exceptions try: with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f: @@ -232,6 +234,57 @@ def extract_info(video_id): decryption_error = 'Error decrypting url signatures: ' + decryption_error info['playability_error'] = decryption_error + # check for 403 + if settings.route_tor and info['formats'] and info['formats'][0]['url']: + response = util.head(info['formats'][0]['url'], + report_text='Checked for URL access') + if response.status == 403: + print(('Access denied (403) for video urls.' + ' Retrieving urls from Invidious...')) + try: + video_info = util.fetch_url( + 'https://invidio.us/api/v1/videos/' + + video_id + + '?fields=adaptiveFormats,formatStreams', + report_text='Retrieved urls from Invidious', + debug_name='invidious_urls') + except (urllib3.exceptions.HTTPError) as e: + traceback.print_exc() + playability_error = ('Access denied (403) for video urls.' + + ' Failed to use Invidious to get the urls: ' + + str(e)) + if info['playability_error']: + info['playability_error'] += '\n' + playability_error + else: + info['playability_error'] = playability_error + + return info + + video_info = json.loads(video_info.decode('utf-8')) + info['formats'] = [] + for fmt in (video_info['adaptiveFormats'] + + video_info['formatStreams']): + # adjust keys to match our conventions + fmt['file_size'] = fmt.get('clen') + fmt['ext'] = fmt.get('container') + if 'resolution' in fmt: + fmt['height'] = int(fmt['resolution'].rstrip('p')) + + # update with information from _formats table such as ext + itag = fmt.get('itag') + fmt.update(yt_data_extract._formats.get(itag, {})) + + # extract acodec, vcodec, and ext + # (need for 'ext' because 'container' not always present) + yt_data_extract.update_format_with_type_info(fmt, fmt) + + # ensure keys are present + for key in ('ext', 'audio_bitrate', 'acodec', 'vcodec', + 'width', 'height', 'audio_sample_rate', 'fps'): + if key not in fmt: + fmt[key] = None + + info['formats'].append(fmt) return info def video_quality_string(format): diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py index 898141e..3378b8d 100644 --- a/youtube/yt_data_extract/__init__.py +++ b/youtube/yt_data_extract/__init__.py @@ -8,4 +8,5 @@ from .everything_else import (extract_channel_info, extract_search_info, from .watch_extraction import (extract_watch_info, get_caption_url, update_with_age_restricted_info, requires_decryption, - extract_decryption_function, decrypt_signatures) + extract_decryption_function, decrypt_signatures, _formats, + update_format_with_type_info) -- cgit v1.2.3