diff options
author | James Taylor <user234683@users.noreply.github.com> | 2020-02-01 15:09:37 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2020-02-01 15:09:37 -0800 |
commit | 7c2736aa26cb1700297c9fcd6e6ffa5516243a92 (patch) | |
tree | fd7fe697484ade8a86eeeecdec04a282d3e77aaa /youtube | |
parent | e364927f8374577c3ecaf7ccb365382aa525f913 (diff) | |
download | yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.tar.lz yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.tar.xz yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.zip |
Check for 403 errors and fallback on Invidious
403 errors on the video urls happen typically when a video has copyrighted content or was livestreamed originally. They appear to not happen (or at least happen less frequently) if the Tor exit node used ipv6, however.
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/util.py | 8 | ||||
-rw-r--r-- | youtube/watch.py | 53 | ||||
-rw-r--r-- | youtube/yt_data_extract/__init__.py | 3 |
3 files changed, 63 insertions, 1 deletions
diff --git a/youtube/util.py b/youtube/util.py index f209060..a5bd874 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -195,6 +195,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja return content, response return content +def head(url, use_tor=False, report_text=None): + pool = get_pool(use_tor and settings.route_tor) + start_time = time.time() + response = pool.request('HEAD', url) + if report_text: + print(report_text, ' Latency:', round(time.time() - start_time,3)) + return response + mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_ua = (('User-Agent', mobile_user_agent),) desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' diff --git a/youtube/watch.py b/youtube/watch.py index 388a8e1..ceca4cd 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -12,6 +12,8 @@ import os import math import traceback import urllib +import re +import urllib3.exceptions try: with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f: @@ -232,6 +234,57 @@ def extract_info(video_id): decryption_error = 'Error decrypting url signatures: ' + decryption_error info['playability_error'] = decryption_error + # check for 403 + if settings.route_tor and info['formats'] and info['formats'][0]['url']: + response = util.head(info['formats'][0]['url'], + report_text='Checked for URL access') + if response.status == 403: + print(('Access denied (403) for video urls.' + ' Retrieving urls from Invidious...')) + try: + video_info = util.fetch_url( + 'https://invidio.us/api/v1/videos/' + + video_id + + '?fields=adaptiveFormats,formatStreams', + report_text='Retrieved urls from Invidious', + debug_name='invidious_urls') + except (urllib3.exceptions.HTTPError) as e: + traceback.print_exc() + playability_error = ('Access denied (403) for video urls.' + + ' Failed to use Invidious to get the urls: ' + + str(e)) + if info['playability_error']: + info['playability_error'] += '\n' + playability_error + else: + info['playability_error'] = playability_error + + return info + + video_info = json.loads(video_info.decode('utf-8')) + info['formats'] = [] + for fmt in (video_info['adaptiveFormats'] + + video_info['formatStreams']): + # adjust keys to match our conventions + fmt['file_size'] = fmt.get('clen') + fmt['ext'] = fmt.get('container') + if 'resolution' in fmt: + fmt['height'] = int(fmt['resolution'].rstrip('p')) + + # update with information from _formats table such as ext + itag = fmt.get('itag') + fmt.update(yt_data_extract._formats.get(itag, {})) + + # extract acodec, vcodec, and ext + # (need for 'ext' because 'container' not always present) + yt_data_extract.update_format_with_type_info(fmt, fmt) + + # ensure keys are present + for key in ('ext', 'audio_bitrate', 'acodec', 'vcodec', + 'width', 'height', 'audio_sample_rate', 'fps'): + if key not in fmt: + fmt[key] = None + + info['formats'].append(fmt) return info def video_quality_string(format): diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py index 898141e..3378b8d 100644 --- a/youtube/yt_data_extract/__init__.py +++ b/youtube/yt_data_extract/__init__.py @@ -8,4 +8,5 @@ from .everything_else import (extract_channel_info, extract_search_info, from .watch_extraction import (extract_watch_info, get_caption_url, update_with_age_restricted_info, requires_decryption, - extract_decryption_function, decrypt_signatures) + extract_decryption_function, decrypt_signatures, _formats, + update_format_with_type_info) |