From f787e4e2027583476ca34bd01c8462f6459369bb Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 31 Jan 2020 20:06:15 -0800 Subject: Give a proper error message for 429 errors These occur when too many requests are coming from a Tor exit node. Before, there would be an error page with an exception instructing users to report the issue. But this is an expected and persistent issue. --- youtube/__init__.py | 12 ++++++++++++ youtube/channel.py | 2 ++ youtube/playlist.py | 1 + youtube/subscriptions.py | 13 ++++++++++--- youtube/util.py | 23 +++++++++++++++++++++++ youtube/watch.py | 1 + 6 files changed, 49 insertions(+), 3 deletions(-) diff --git a/youtube/__init__.py b/youtube/__init__.py index d8171c0..9e95256 100644 --- a/youtube/__init__.py +++ b/youtube/__init__.py @@ -1,6 +1,8 @@ +from youtube import util import flask import settings import traceback +from sys import exc_info yt_app = flask.Flask(__name__) yt_app.url_map.strict_slashes = False @@ -34,4 +36,14 @@ def commatize(num): @yt_app.errorhandler(500) def error_page(e): + if (exc_info()[0] == util.FetchError + and exc_info()[1].code == '429' + and settings.route_tor + ): + error_message = ('Error: Youtube blocked the request because the Tor' + ' exit node is overcrowded. Try getting a new exit node by' + ' restarting the Tor Browser.') + if exc_info()[1].ip: + error_message += ' Exit node IP address: ' + exc_info()[1].ip + return flask.render_template('error.html', error_message=error_message), 502 return flask.render_template('error.html', traceback=traceback.format_exc()), 500 diff --git a/youtube/channel.py b/youtube/channel.py index 4df82e5..c897a87 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -179,6 +179,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None): gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'videos': tasks = ( @@ -186,6 +187,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None): gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos') ) gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value elif tab == 'about': polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about') diff --git a/youtube/playlist.py b/youtube/playlist.py index 3ca235a..91c8d1d 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -88,6 +88,7 @@ def get_playlist_page(): gevent.spawn(get_videos, playlist_id, page) ) gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) first_page_json, this_page_json = tasks[0].value, tasks[1].value info = yt_data_extract.extract_playlist_info(this_page_json) diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 76130f3..c26c79d 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -405,7 +405,14 @@ def check_channels_if_necessary(channel_ids): checking_channels.add(channel_id) check_channels_queue.put(channel_id) - +def _get_atoma_feed(channel_id): + url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id + try: + return util.fetch_url(url).decode('utf-8') + except util.FetchError as e: + if e.code == '404': # 404 is expected for terminated channels + return '' + raise def _get_upstream_videos(channel_id): try: @@ -417,7 +424,7 @@ def _get_upstream_videos(channel_id): tasks = ( gevent.spawn(channel.get_channel_tab, channel_id, print_status=False), # channel page, need for video duration - gevent.spawn(util.fetch_url, 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id) # atoma feed, need for exact published time + gevent.spawn(_get_atoma_feed, channel_id) # need atoma feed for exact published time ) gevent.joinall(tasks) @@ -438,7 +445,7 @@ def _get_upstream_videos(channel_id): return element return None - root = defusedxml.ElementTree.fromstring(feed.decode('utf-8')) + root = defusedxml.ElementTree.fromstring(feed) assert remove_bullshit(root.tag) == 'feed' for entry in root: if (remove_bullshit(entry.tag) != 'entry'): diff --git a/youtube/util.py b/youtube/util.py index feeec8c..f209060 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -97,6 +97,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler): https_request = http_request https_response = http_response +class FetchError(Exception): + def __init__(self, code, reason='', ip=None): + Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason) + self.code = code + self.reason = reason + self.ip = ip def decode_content(content, encoding_header): encodings = encoding_header.replace(' ', '').split(',') @@ -161,6 +167,17 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja content = response.read() response.release_conn() + if (response.status == 429 + and content.startswith(b'= 400: + raise FetchError(str(response.status), reason=response.reason, ip=None) + read_finish = time.time() if report_text: print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) @@ -359,3 +376,9 @@ def parse_info_prepare_for_html(renderer, additional_info={}): add_extra_html_info(item) return item + +def check_gevent_exceptions(*tasks): + for task in tasks: + if task.exception: + raise task.exception + diff --git a/youtube/watch.py b/youtube/watch.py index 7106345..388a8e1 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -287,6 +287,7 @@ def get_watch_page(video_id=None): gevent.spawn(extract_info, video_id) ) gevent.joinall(tasks) + util.check_gevent_exceptions(tasks[1]) comments_info, info = tasks[0].value, tasks[1].value if info['error']: -- cgit v1.2.3