diff options
author | James Taylor <user234683@users.noreply.github.com> | 2020-01-31 20:06:15 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2020-01-31 20:06:15 -0800 |
commit | f787e4e2027583476ca34bd01c8462f6459369bb (patch) | |
tree | 3ac533d55d3f524a49abbd83957b9c87d65f357a /youtube | |
parent | cd4a2fb0ebb63600d9e66fa695c6517a968a78f8 (diff) | |
download | yt-local-f787e4e2027583476ca34bd01c8462f6459369bb.tar.lz yt-local-f787e4e2027583476ca34bd01c8462f6459369bb.tar.xz yt-local-f787e4e2027583476ca34bd01c8462f6459369bb.zip |
Give a proper error message for 429 errors
These occur when too many requests are coming from a Tor exit node.
Before, there would be an error page with an exception instructing users to report the issue.
But this is an expected and persistent issue.
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/__init__.py | 12 | ||||
-rw-r--r-- | youtube/channel.py | 2 | ||||
-rw-r--r-- | youtube/playlist.py | 1 | ||||
-rw-r--r-- | youtube/subscriptions.py | 13 | ||||
-rw-r--r-- | youtube/util.py | 23 | ||||
-rw-r--r-- | youtube/watch.py | 1 |
6 files changed, 49 insertions, 3 deletions
diff --git a/youtube/__init__.py b/youtube/__init__.py index d8171c0..9e95256 100644 --- a/youtube/__init__.py +++ b/youtube/__init__.py @@ -1,6 +1,8 @@ +from youtube import util import flask import settings import traceback +from sys import exc_info yt_app = flask.Flask(__name__) yt_app.url_map.strict_slashes = False @@ -34,4 +36,14 @@ def commatize(num): @yt_app.errorhandler(500) def error_page(e): + if (exc_info()[0] == util.FetchError + and exc_info()[1].code == '429' + and settings.route_tor + ): + error_message = ('Error: Youtube blocked the request because the Tor' + ' exit node is overcrowded. Try getting a new exit node by' + ' restarting the Tor Browser.') + if exc_info()[1].ip: + error_message += ' Exit node IP address: ' + exc_info()[1].ip + return flask.render_template('error.html', error_message=error_message), 502 return flask.render_template('error.html', traceback=traceback.format_exc()), 500 diff --git a/youtube/channel.py b/youtube/channel.py index 4df82e5..c897a87 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -179,6 +179,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None): gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'videos': tasks = ( @@ -186,6 +187,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None): gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos') ) gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'about': polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about') diff --git a/youtube/playlist.py b/youtube/playlist.py index 3ca235a..91c8d1d 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -88,6 +88,7 @@ def get_playlist_page(): gevent.spawn(get_videos, playlist_id, page) ) gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) first_page_json, this_page_json = tasks[0].value, tasks[1].value info = yt_data_extract.extract_playlist_info(this_page_json) diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 76130f3..c26c79d 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -405,7 +405,14 @@ def check_channels_if_necessary(channel_ids): checking_channels.add(channel_id) check_channels_queue.put(channel_id) - +def _get_atoma_feed(channel_id): + url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id + try: + return util.fetch_url(url).decode('utf-8') + except util.FetchError as e: + if e.code == '404': # 404 is expected for terminated channels + return '' + raise def _get_upstream_videos(channel_id): try: @@ -417,7 +424,7 @@ def _get_upstream_videos(channel_id): tasks = ( gevent.spawn(channel.get_channel_tab, channel_id, print_status=False), # channel page, need for video duration - gevent.spawn(util.fetch_url, 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id) # atoma feed, need for exact published time + gevent.spawn(_get_atoma_feed, channel_id) # need atoma feed for exact published time ) gevent.joinall(tasks) @@ -438,7 +445,7 @@ def _get_upstream_videos(channel_id): return element return None - root = defusedxml.ElementTree.fromstring(feed.decode('utf-8')) + root = defusedxml.ElementTree.fromstring(feed) assert remove_bullshit(root.tag) == 'feed' for entry in root: if (remove_bullshit(entry.tag) != 'entry'): diff --git a/youtube/util.py b/youtube/util.py index feeec8c..f209060 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -97,6 +97,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): https_request = http_request https_response = http_response +class FetchError(Exception): + def __init__(self, code, reason='', ip=None): + Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason) + self.code = code + self.reason = reason + self.ip = ip def decode_content(content, encoding_header): encodings = encoding_header.replace(' ', '').split(',') @@ -161,6 +167,17 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja content = response.read() response.release_conn() + if (response.status == 429 + and content.startswith(b'<!DOCTYPE') + and b'Our systems have detected unusual traffic' in content): + ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', + content) + ip = ip.group(1).decode('ascii') if ip else None + raise FetchError('429', reason=response.reason, ip=ip) + + elif response.status >= 400: + raise FetchError(str(response.status), reason=response.reason, ip=None) + read_finish = time.time() if report_text: print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) @@ -359,3 +376,9 @@ def parse_info_prepare_for_html(renderer, additional_info={}): add_extra_html_info(item) return item + +def check_gevent_exceptions(*tasks): + for task in tasks: + if task.exception: + raise task.exception + diff --git a/youtube/watch.py b/youtube/watch.py index 7106345..388a8e1 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -287,6 +287,7 @@ def get_watch_page(video_id=None): gevent.spawn(extract_info, video_id) ) gevent.joinall(tasks) + util.check_gevent_exceptions(tasks[1]) comments_info, info = tasks[0].value, tasks[1].value if info['error']: |