aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-02-01 15:09:37 -0800
committerJames Taylor <user234683@users.noreply.github.com>2020-02-01 15:09:37 -0800
commit7c2736aa26cb1700297c9fcd6e6ffa5516243a92 (patch)
treefd7fe697484ade8a86eeeecdec04a282d3e77aaa
parente364927f8374577c3ecaf7ccb365382aa525f913 (diff)
downloadyt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.tar.lz
yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.tar.xz
yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.zip
Check for 403 errors and fallback on Invidious
403 errors on the video urls happen typically when a video has copyrighted content or was livestreamed originally. They appear to not happen (or at least happen less frequently) if the Tor exit node used ipv6, however.
-rw-r--r--youtube/util.py8
-rw-r--r--youtube/watch.py53
-rw-r--r--youtube/yt_data_extract/__init__.py3
3 files changed, 63 insertions, 1 deletions
diff --git a/youtube/util.py b/youtube/util.py
index f209060..a5bd874 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -195,6 +195,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
return content, response
return content
+def head(url, use_tor=False, report_text=None):
+ pool = get_pool(use_tor and settings.route_tor)
+ start_time = time.time()
+ response = pool.request('HEAD', url)
+ if report_text:
+ print(report_text, ' Latency:', round(time.time() - start_time,3))
+ return response
+
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
diff --git a/youtube/watch.py b/youtube/watch.py
index 388a8e1..ceca4cd 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -12,6 +12,8 @@ import os
import math
import traceback
import urllib
+import re
+import urllib3.exceptions
try:
with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
@@ -232,6 +234,57 @@ def extract_info(video_id):
decryption_error = 'Error decrypting url signatures: ' + decryption_error
info['playability_error'] = decryption_error
+ # check for 403
+ if settings.route_tor and info['formats'] and info['formats'][0]['url']:
+ response = util.head(info['formats'][0]['url'],
+ report_text='Checked for URL access')
+ if response.status == 403:
+ print(('Access denied (403) for video urls.'
+ ' Retrieving urls from Invidious...'))
+ try:
+ video_info = util.fetch_url(
+ 'https://invidio.us/api/v1/videos/'
+ + video_id
+ + '?fields=adaptiveFormats,formatStreams',
+ report_text='Retrieved urls from Invidious',
+ debug_name='invidious_urls')
+ except (urllib3.exceptions.HTTPError) as e:
+ traceback.print_exc()
+ playability_error = ('Access denied (403) for video urls.'
+ + ' Failed to use Invidious to get the urls: '
+ + str(e))
+ if info['playability_error']:
+ info['playability_error'] += '\n' + playability_error
+ else:
+ info['playability_error'] = playability_error
+
+ return info
+
+ video_info = json.loads(video_info.decode('utf-8'))
+ info['formats'] = []
+ for fmt in (video_info['adaptiveFormats']
+ + video_info['formatStreams']):
+ # adjust keys to match our conventions
+ fmt['file_size'] = fmt.get('clen')
+ fmt['ext'] = fmt.get('container')
+ if 'resolution' in fmt:
+ fmt['height'] = int(fmt['resolution'].rstrip('p'))
+
+ # update with information from _formats table such as ext
+ itag = fmt.get('itag')
+ fmt.update(yt_data_extract._formats.get(itag, {}))
+
+ # extract acodec, vcodec, and ext
+ # (need for 'ext' because 'container' not always present)
+ yt_data_extract.update_format_with_type_info(fmt, fmt)
+
+ # ensure keys are present
+ for key in ('ext', 'audio_bitrate', 'acodec', 'vcodec',
+ 'width', 'height', 'audio_sample_rate', 'fps'):
+ if key not in fmt:
+ fmt[key] = None
+
+ info['formats'].append(fmt)
return info
def video_quality_string(format):
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py
index 898141e..3378b8d 100644
--- a/youtube/yt_data_extract/__init__.py
+++ b/youtube/yt_data_extract/__init__.py
@@ -8,4 +8,5 @@ from .everything_else import (extract_channel_info, extract_search_info,
from .watch_extraction import (extract_watch_info, get_caption_url,
update_with_age_restricted_info, requires_decryption,
- extract_decryption_function, decrypt_signatures)
+ extract_decryption_function, decrypt_signatures, _formats,
+ update_format_with_type_info)