aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube/__init__.py12
-rw-r--r--youtube/channel.py2
-rw-r--r--youtube/playlist.py1
-rw-r--r--youtube/subscriptions.py13
-rw-r--r--youtube/util.py23
-rw-r--r--youtube/watch.py1
6 files changed, 49 insertions, 3 deletions
diff --git a/youtube/__init__.py b/youtube/__init__.py
index d8171c0..9e95256 100644
--- a/youtube/__init__.py
+++ b/youtube/__init__.py
@@ -1,6 +1,8 @@
+from youtube import util
import flask
import settings
import traceback
+from sys import exc_info
yt_app = flask.Flask(__name__)
yt_app.url_map.strict_slashes = False
@@ -34,4 +36,14 @@ def commatize(num):
@yt_app.errorhandler(500)
def error_page(e):
+ if (exc_info()[0] == util.FetchError
+ and exc_info()[1].code == '429'
+ and settings.route_tor
+ ):
+ error_message = ('Error: Youtube blocked the request because the Tor'
+ ' exit node is overcrowded. Try getting a new exit node by'
+ ' restarting the Tor Browser.')
+ if exc_info()[1].ip:
+ error_message += ' Exit node IP address: ' + exc_info()[1].ip
+ return flask.render_template('error.html', error_message=error_message), 502
return flask.render_template('error.html', traceback=traceback.format_exc()), 500
diff --git a/youtube/channel.py b/youtube/channel.py
index 4df82e5..c897a87 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -179,6 +179,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'videos':
tasks = (
@@ -186,6 +187,7 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
number_of_videos, polymer_json = tasks[0].value, tasks[1].value
elif tab == 'about':
polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
diff --git a/youtube/playlist.py b/youtube/playlist.py
index 3ca235a..91c8d1d 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -88,6 +88,7 @@ def get_playlist_page():
gevent.spawn(get_videos, playlist_id, page)
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
first_page_json, this_page_json = tasks[0].value, tasks[1].value
info = yt_data_extract.extract_playlist_info(this_page_json)
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 76130f3..c26c79d 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -405,7 +405,14 @@ def check_channels_if_necessary(channel_ids):
checking_channels.add(channel_id)
check_channels_queue.put(channel_id)
-
+def _get_atoma_feed(channel_id):
+ url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id
+ try:
+ return util.fetch_url(url).decode('utf-8')
+ except util.FetchError as e:
+ if e.code == '404': # 404 is expected for terminated channels
+ return ''
+ raise
def _get_upstream_videos(channel_id):
try:
@@ -417,7 +424,7 @@ def _get_upstream_videos(channel_id):
tasks = (
gevent.spawn(channel.get_channel_tab, channel_id, print_status=False), # channel page, need for video duration
- gevent.spawn(util.fetch_url, 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id) # atoma feed, need for exact published time
+ gevent.spawn(_get_atoma_feed, channel_id) # need atoma feed for exact published time
)
gevent.joinall(tasks)
@@ -438,7 +445,7 @@ def _get_upstream_videos(channel_id):
return element
return None
- root = defusedxml.ElementTree.fromstring(feed.decode('utf-8'))
+ root = defusedxml.ElementTree.fromstring(feed)
assert remove_bullshit(root.tag) == 'feed'
for entry in root:
if (remove_bullshit(entry.tag) != 'entry'):
diff --git a/youtube/util.py b/youtube/util.py
index feeec8c..f209060 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -97,6 +97,12 @@ class HTTPAsymmetricCookieProcessor(urllib.request.BaseHandler):
https_request = http_request
https_response = http_response
+class FetchError(Exception):
+ def __init__(self, code, reason='', ip=None):
+ Exception.__init__(self, 'HTTP error during request: ' + code + ' ' + reason)
+ self.code = code
+ self.reason = reason
+ self.ip = ip
def decode_content(content, encoding_header):
encodings = encoding_header.replace(' ', '').split(',')
@@ -161,6 +167,17 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
content = response.read()
response.release_conn()
+ if (response.status == 429
+ and content.startswith(b'<!DOCTYPE')
+ and b'Our systems have detected unusual traffic' in content):
+ ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
+ content)
+ ip = ip.group(1).decode('ascii') if ip else None
+ raise FetchError('429', reason=response.reason, ip=ip)
+
+ elif response.status >= 400:
+ raise FetchError(str(response.status), reason=response.reason, ip=None)
+
read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
@@ -359,3 +376,9 @@ def parse_info_prepare_for_html(renderer, additional_info={}):
add_extra_html_info(item)
return item
+
+def check_gevent_exceptions(*tasks):
+ for task in tasks:
+ if task.exception:
+ raise task.exception
+
diff --git a/youtube/watch.py b/youtube/watch.py
index 7106345..388a8e1 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -287,6 +287,7 @@ def get_watch_page(video_id=None):
gevent.spawn(extract_info, video_id)
)
gevent.joinall(tasks)
+ util.check_gevent_exceptions(tasks[1])
comments_info, info = tasks[0].value, tasks[1].value
if info['error']: