aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-01-31 20:06:15 -0800
committerJames Taylor <user234683@users.noreply.github.com>2020-01-31 20:06:15 -0800
commitf787e4e2027583476ca34bd01c8462f6459369bb (patch)
tree3ac533d55d3f524a49abbd83957b9c87d65f357a /youtube
parentcd4a2fb0ebb63600d9e66fa695c6517a968a78f8 (diff)
downloadyt-local-f787e4e2027583476ca34bd01c8462f6459369bb.tar.lz
yt-local-f787e4e2027583476ca34bd01c8462f6459369bb.tar.xz
yt-local-f787e4e2027583476ca34bd01c8462f6459369bb.zip
Give a proper error message for 429 errors
These occur when too many requests are coming from a Tor exit node. Before, there would be an error page with an exception instructing users to report the issue. But this is an expected and persistent issue.
Diffstat (limited to 'youtube')
-rw-r--r--youtube/__init__.py12
-rw-r--r--youtube/channel.py2
-rw-r--r--youtube/playlist.py1
-rw-r--r--youtube/subscriptions.py13
-rw-r--r--youtube/util.py23
-rw-r--r--youtube/watch.py1
6 files changed, 49 insertions, 3 deletions
diff --git a/youtube/__init__.py b/youtube/__init__.py
index d8171c0..9e95256 100644
--- a/youtube/__init__.py
+++ b/youtube/__init__.py
@@ -1,6 +1,8 @@
+from youtube import util
import flask
import settings
import traceback
+from sys import exc_info
yt_app = flask.Flask(__name__)
yt_app.url_map.strict_slashes = False
@@ -34,4 +36,14 @@ def commatize(num):
@yt_app.errorhandler(500)
def error_page(e):
+ if (exc_info()[0] == util.FetchError
+ and exc_info()[1].code == '429'
+ and settings.route_tor
+ ):
+ error_message = ('Error: Youtube blocked the request because the Tor'
+ ' exit node is overcrowded. Try getting a new exit node by'
+ ' restarting the Tor Browser.')
+ if exc_info()[1].ip:
+ error_message += ' Exit node IP address: ' + exc_info()[1].ip
+ return flask.render_template('error.html', error_message=error_message), 502
return flask.render_template('error.html', traceback=traceback.format_exc()), 500
diff --git a/youtube/channel.py b/youtube/channel.py
index 4df82e5..c897a87 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -179,6 +179,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'videos':
tasks = (
@@ -186,6 +187,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'about':
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
diff --git a/youtube/playlist.py b/youtube/playlist.py
index 3ca235a..91c8d1d 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -88,6 +88,7 @@ def get_playlist_page():
gevent.spawn(get_videos, playlist_id, page)
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
first_page_json, this_page_json = tasks[0].value, tasks[1].value
info = yt_data_extract.extract_playlist_info(this_page_json)
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 76130f3..c26c79d 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -405,7 +405,14 @@ def check_channels_if_necessary(channel_ids):
checking_channels.add(channel_id)
check_channels_queue.put(channel_id)
-
+def _get_atoma_feed(channel_id):
+ url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id
+ try:
+ return util.fetch_url(url).decode('utf-8')
+ except util.FetchError as e:
+ if e.code == '404': # 404 is expected for terminated channels
+ return ''
+ raise
def _get_upstream_videos(channel_id):
try:
@@ -417,7 +424,7 @@ def _get_upstream_videos(channel_id):
tasks = (
gevent.spawn(channel.get_channel_tab, channel_id, print_status=False), # channel page, need for video duration
- gevent.spawn(util.fetch_url, 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id) # atoma feed, need for exact published time
+ gevent.spawn(_get_atoma_feed, channel_id) # need atoma feed for exact published time
)
gevent.joinall(tasks)
@@ -438,7 +445,7 @@ def _get_upstream_videos(channel_id):
return element
return None
- root = defusedxml.ElementTree.fromstring(feed.decode('utf-8'))
+ root = defusedxml.ElementTree.fromstring(feed)
assert remove_bullshit(root.tag) == 'feed'
for entry in root:
if (remove_bullshit(entry.tag) != 'entry'):
diff --git a/youtube/util.py b/youtube/util.py
index feeec8c..f209060 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -97,6 +97,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
https_request = http_request
https_response = http_response
+class FetchError(Exception):
+ def __init__(self, code, reason='', ip=None):
+ Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
+ self.code = code
+ self.reason = reason
+ self.ip = ip
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
@@ -161,6 +167,17 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
content = response.read()
response.release_conn()
+ if (response.status == 429
+ and content.startswith(b'<!DOCTYPE')
+ and b'Our systems have detected unusual traffic' in content):
+ ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
+ content)
+ ip = ip.group(1).decode('ascii') if ip else None
+ raise FetchError('429', reason=response.reason, ip=ip)
+
+ elif response.status >= 400:
+ raise FetchError(str(response.status), reason=response.reason, ip=None)
+
read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
@@ -359,3 +376,9 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
add_extra_html_info(item)
return item
+
+def check_gevent_exceptions(*tasks):
+ for task in tasks:
+ if task.exception:
+ raise task.exception
+
diff --git a/youtube/watch.py b/youtube/watch.py
index 7106345..388a8e1 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -287,6 +287,7 @@ def get_watch_page(video_id=None):
gevent.spawn(extract_info, video_id)
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(tasks[1])
comments_info, info = tasks[0].value, tasks[1].value
if info['error']: