aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-02-01 15:09:37 -0800
committerJames Taylor <user234683@users.noreply.github.com>2020-02-01 15:09:37 -0800
commit7c2736aa26cb1700297c9fcd6e6ffa5516243a92 (patch)
treefd7fe697484ade8a86eeeecdec04a282d3e77aaa /youtube
parente364927f8374577c3ecaf7ccb365382aa525f913 (diff)
downloadyt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.tar.lz
yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.tar.xz
yt-local-7c2736aa26cb1700297c9fcd6e6ffa5516243a92.zip
Check for 403 errors and fallback on Invidious
403 errors on the video urls happen typically when a video has copyrighted content or was livestreamed originally. They appear to not happen (or at least happen less frequently) if the Tor exit node used ipv6, however.
Diffstat (limited to 'youtube')
-rw-r--r--youtube/util.py8
-rw-r--r--youtube/watch.py53
-rw-r--r--youtube/yt_data_extract/__init__.py3
3 files changed, 63 insertions, 1 deletions
diff --git a/youtube/util.py b/youtube/util.py
index f209060..a5bd874 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -195,6 +195,14 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
return content, response
return content
+def head(url, use_tor=False, report_text=None):
+ pool = get_pool(use_tor and settings.route_tor)
+ start_time = time.time()
+ response = pool.request('HEAD', url)
+ if report_text:
+ print(report_text, ' Latency:', round(time.time() - start_time,3))
+ return response
+
mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),)
desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
diff --git a/youtube/watch.py b/youtube/watch.py
index 388a8e1..ceca4cd 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -12,6 +12,8 @@ import os
import math
import traceback
import urllib
+import re
+import urllib3.exceptions
try:
with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
@@ -232,6 +234,57 @@ def extract_info(video_id):
decryption_error = 'Error decrypting url signatures: ' + decryption_error
info['playability_error'] = decryption_error
+ # check for 403
+ if settings.route_tor and info['formats'] and info['formats'][0]['url']:
+ response = util.head(info['formats'][0]['url'],
+ report_text='Checked for URL access')
+ if response.status == 403:
+ print(('Access denied (403) for video urls.'
+ ' Retrieving urls from Invidious...'))
+ try:
+ video_info = util.fetch_url(
+ 'https://invidio.us/api/v1/videos/'
+ + video_id
+ + '?fields=adaptiveFormats,formatStreams',
+ report_text='Retrieved urls from Invidious',
+ debug_name='invidious_urls')
+ except (urllib3.exceptions.HTTPError) as e:
+ traceback.print_exc()
+ playability_error = ('Access denied (403) for video urls.'
+ + ' Failed to use Invidious to get the urls: '
+ + str(e))
+ if info['playability_error']:
+ info['playability_error'] += '\n' + playability_error
+ else:
+ info['playability_error'] = playability_error
+
+ return info
+
+ video_info = json.loads(video_info.decode('utf-8'))
+ info['formats'] = []
+ for fmt in (video_info['adaptiveFormats']
+ + video_info['formatStreams']):
+ # adjust keys to match our conventions
+ fmt['file_size'] = fmt.get('clen')
+ fmt['ext'] = fmt.get('container')
+ if 'resolution' in fmt:
+ fmt['height'] = int(fmt['resolution'].rstrip('p'))
+
+ # update with information from _formats table such as ext
+ itag = fmt.get('itag')
+ fmt.update(yt_data_extract._formats.get(itag, {}))
+
+ # extract acodec, vcodec, and ext
+ # (need for 'ext' because 'container' not always present)
+ yt_data_extract.update_format_with_type_info(fmt, fmt)
+
+ # ensure keys are present
+ for key in ('ext', 'audio_bitrate', 'acodec', 'vcodec',
+ 'width', 'height', 'audio_sample_rate', 'fps'):
+ if key not in fmt:
+ fmt[key] = None
+
+ info['formats'].append(fmt)
return info
def video_quality_string(format):
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py
index 898141e..3378b8d 100644
--- a/youtube/yt_data_extract/__init__.py
+++ b/youtube/yt_data_extract/__init__.py
@@ -8,4 +8,5 @@ from .everything_else import (extract_channel_info, extract_search_info,
from .watch_extraction import (extract_watch_info, get_caption_url,
update_with_age_restricted_info, requires_decryption,
- extract_decryption_function, decrypt_signatures)
+ extract_decryption_function, decrypt_signatures, _formats,
+ update_format_with_type_info)