From f787e4e2027583476ca34bd01c8462f6459369bb Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Fri, 31 Jan 2020 20:06:15 -0800
Subject: Give a proper error message for 429 errors These occur when too many
 requests are coming from a Tor exit node. Before, there would be an error
 page with an exception instructing users to report the issue. But this is an
 expected and persistent issue.

---
 youtube/__init__.py      | 12 ++++++++++++
 youtube/channel.py       |  2 ++
 youtube/playlist.py      |  1 +
 youtube/subscriptions.py | 13 ++++++++++---
 youtube/util.py          | 23 +++++++++++++++++++++++
 youtube/watch.py         |  1 +
 6 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/youtube/__init__.py b/youtube/__init__.py
index d8171c0..9e95256 100644
--- a/youtube/__init__.py
+++ b/youtube/__init__.py
@@ -1,6 +1,8 @@
+from youtube import util
 import flask
 import settings
 import traceback
+from sys import exc_info
 yt_app = flask.Flask(__name__)
 yt_app.url_map.strict_slashes = False
 
@@ -34,4 +36,14 @@ def commatize(num):
 
 @yt_app.errorhandler(500)
 def error_page(e):
+    if (exc_info()[0] == util.FetchError
+        and exc_info()[1].code == '429'
+        and settings.route_tor
+    ):
+        error_message = ('Error: Youtube blocked the request because the Tor'
+            ' exit node is overcrowded. Try getting a new exit node by'
+            ' restarting the Tor Browser.')
+        if exc_info()[1].ip:
+            error_message += ' Exit node IP address: ' + exc_info()[1].ip
+        return flask.render_template('error.html', error_message=error_message), 502
     return flask.render_template('error.html', traceback=traceback.format_exc()), 500
diff --git a/youtube/channel.py b/youtube/channel.py
index 4df82e5..c897a87 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -179,6 +179,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
             gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
         )
         gevent.joinall(tasks)
+        util.check_gevent_exceptions(*tasks)
         number_of_videos, polymer_json = tasks[0].value, tasks[1].value
     elif tab == 'videos':
         tasks = (
@@ -186,6 +187,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
             gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
         )
         gevent.joinall(tasks)
+        util.check_gevent_exceptions(*tasks)
         number_of_videos, polymer_json = tasks[0].value, tasks[1].value
     elif tab == 'about':
         polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
diff --git a/youtube/playlist.py b/youtube/playlist.py
index 3ca235a..91c8d1d 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -88,6 +88,7 @@ def get_playlist_page():
             gevent.spawn(get_videos, playlist_id, page)
         )
         gevent.joinall(tasks)
+        util.check_gevent_exceptions(*tasks)
         first_page_json, this_page_json = tasks[0].value, tasks[1].value
 
     info = yt_data_extract.extract_playlist_info(this_page_json)
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 76130f3..c26c79d 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -405,7 +405,14 @@ def check_channels_if_necessary(channel_ids):
             checking_channels.add(channel_id)
             check_channels_queue.put(channel_id)
 
-
+def _get_atoma_feed(channel_id):
+    url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id
+    try:
+        return util.fetch_url(url).decode('utf-8')
+    except util.FetchError as e:
+        if e.code == '404': # 404 is expected for terminated channels
+            return ''
+        raise
 
 def _get_upstream_videos(channel_id):
     try:
@@ -417,7 +424,7 @@ def _get_upstream_videos(channel_id):
 
     tasks = (
         gevent.spawn(channel.get_channel_tab, channel_id, print_status=False), # channel page, need for video duration
-        gevent.spawn(util.fetch_url, 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id) # atoma feed, need for exact published time
+        gevent.spawn(_get_atoma_feed, channel_id) # need atoma feed for exact published time
     )
     gevent.joinall(tasks)
 
@@ -438,7 +445,7 @@ def _get_upstream_videos(channel_id):
                     return element
             return None
 
-        root = defusedxml.ElementTree.fromstring(feed.decode('utf-8'))
+        root = defusedxml.ElementTree.fromstring(feed)
         assert remove_bullshit(root.tag) == 'feed'
         for entry in root:
             if (remove_bullshit(entry.tag) != 'entry'):
diff --git a/youtube/util.py b/youtube/util.py
index feeec8c..f209060 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -97,6 +97,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
     https_request = http_request
     https_response = http_response
 
+class FetchError(Exception):
+    def __init__(self, code, reason='', ip=None):
+        Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
+        self.code = code
+        self.reason = reason
+        self.ip = ip
 
 def decode_content(content, encoding_header):
     encodings = encoding_header.replace(' ', '').split(',')
@@ -161,6 +167,17 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
         content = response.read()
         response.release_conn()
 
+    if (response.status == 429
+            and content.startswith(b'<!DOCTYPE')
+            and b'Our systems have detected unusual traffic' in content):
+        ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
+            content)
+        ip = ip.group(1).decode('ascii') if ip else None
+        raise FetchError('429', reason=response.reason, ip=ip)
+
+    elif response.status >= 400:
+        raise FetchError(str(response.status), reason=response.reason, ip=None)
+
     read_finish = time.time()
     if report_text:
         print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
@@ -359,3 +376,9 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
     add_extra_html_info(item)
 
     return item
+
+def check_gevent_exceptions(*tasks):
+    for task in tasks:
+        if task.exception:
+            raise task.exception
+
diff --git a/youtube/watch.py b/youtube/watch.py
index 7106345..388a8e1 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -287,6 +287,7 @@ def get_watch_page(video_id=None):
         gevent.spawn(extract_info, video_id)
     )
     gevent.joinall(tasks)
+    util.check_gevent_exceptions(tasks[1])
     comments_info, info = tasks[0].value, tasks[1].value
 
     if info['error']:
-- 
cgit v1.2.3