aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
Diffstat (limited to 'youtube')
-rw-r--r--youtube/__init__.py17
-rw-r--r--youtube/channel.py42
-rw-r--r--youtube/templates/comments.html10
-rw-r--r--youtube/templates/settings.html2
-rw-r--r--youtube/templates/watch.html2
-rw-r--r--youtube/util.py55
-rw-r--r--youtube/watch.py14
7 files changed, 105 insertions, 37 deletions
diff --git a/youtube/__init__.py b/youtube/__init__.py
index 8675c4b..6c2ec48 100644
--- a/youtube/__init__.py
+++ b/youtube/__init__.py
@@ -2,6 +2,7 @@ from youtube import util
import flask
import settings
import traceback
+import re
from sys import exc_info
yt_app = flask.Flask(__name__)
yt_app.url_map.strict_slashes = False
@@ -34,6 +35,22 @@ def commatize(num):
num = int(num)
return '{:,}'.format(num)
+def timestamp_replacement(match):
+ time_seconds = 0
+ for part in match.group(0).split(':'):
+ time_seconds = 60*time_seconds + int(part)
+ return (
+ '<a href="#" onclick="document.querySelector(\'video\').currentTime='
+ + str(time_seconds)
+ + '">' + match.group(0)
+ + '</a>'
+ )
+
+TIMESTAMP_RE = re.compile(r'\b(\d?\d:)?\d?\d:\d\d\b')
+@yt_app.template_filter('timestamps')
+def timestamps(text):
+ return TIMESTAMP_RE.sub(timestamp_replacement, text)
+
@yt_app.errorhandler(500)
def error_page(e):
if (exc_info()[0] == util.FetchError
diff --git a/youtube/channel.py b/youtube/channel.py
index ba1a4d2..ad6db5b 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -44,7 +44,28 @@ generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
# view:
# grid: 0 or 1
# list: 2
-def channel_ctoken_desktop(channel_id, page, sort, tab, view=1):
+def channel_ctoken_v3(channel_id, page, sort, tab, view=1):
+ # page > 1 doesn't work when sorting by oldest
+ offset = 30*(int(page) - 1)
+ page_token = proto.string(61, proto.unpadded_b64encode(
+ proto.string(1, proto.unpadded_b64encode(proto.uint(1,offset)))
+ ))
+
+ tab = proto.string(2, tab )
+ sort = proto.uint(3, int(sort))
+
+ shelf_view = proto.uint(4, 0)
+ view = proto.uint(6, int(view))
+ continuation_info = proto.string(3,
+ proto.percent_b64encode(tab + sort + shelf_view + view + page_token)
+ )
+
+ channel_id = proto.string(2, channel_id )
+ pointless_nest = proto.string(80226972, channel_id + continuation_info)
+
+ return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
+
+def channel_ctoken_v2(channel_id, page, sort, tab, view=1):
# see https://github.com/iv-org/invidious/issues/1319#issuecomment-671732646
# page > 1 doesn't work when sorting by oldest
offset = 30*(int(page) - 1)
@@ -74,14 +95,14 @@ def channel_ctoken_desktop(channel_id, page, sort, tab, view=1):
return base64.urlsafe_b64encode(pointless_nest).decode('ascii')
-def channel_ctoken_mobile(channel_id, page, sort, tab, view=1):
+def channel_ctoken_v1(channel_id, page, sort, tab, view=1):
tab = proto.string(2, tab )
sort = proto.uint(3, int(sort))
page = proto.string(15, str(page) )
# example with shelves in videos tab: https://www.youtube.com/channel/UCNL1ZadSjHpjm4q9j2sVtOA/videos
shelf_view = proto.uint(4, 0)
view = proto.uint(6, int(view))
- continuation_info = proto.string( 3, proto.percent_b64encode(tab + view + sort + shelf_view + page) )
+ continuation_info = proto.string(3, proto.percent_b64encode(tab + view + sort + shelf_view + page + proto.uint(23, 0)) )
channel_id = proto.string(2, channel_id )
pointless_nest = proto.string(80226972, channel_id + continuation_info)
@@ -91,15 +112,16 @@ def channel_ctoken_mobile(channel_id, page, sort, tab, view=1):
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1, print_status=True):
message = 'Got channel tab' if print_status else None
- if int(sort) == 2 and int(page) > 1: # use mobile endpoint
- ctoken = channel_ctoken_mobile(channel_id, page, sort, tab, view)
+ if int(sort) == 2 and int(page) > 1:
+ ctoken = channel_ctoken_v1(channel_id, page, sort, tab, view)
ctoken = ctoken.replace('=', '%3D')
- url = ('https://m.youtube.com/channel/' + channel_id + '/' + tab
- + '?ctoken=' + ctoken + '&pbj=1')
- content = util.fetch_url(url, headers_mobile + real_cookie,
+ url = ('https://www.youtube.com/channel/' + channel_id + '/' + tab
+ + '?action_continuation=1&continuation=' + ctoken
+ + '&pbj=1')
+ content = util.fetch_url(url, headers_desktop + real_cookie,
debug_name='channel_tab', report_text=message)
- else: # use desktop endpoint
- ctoken = channel_ctoken_desktop(channel_id, page, sort, tab, view)
+ else:
+ ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
ctoken = ctoken.replace('=', '%3D')
url = 'https://www.youtube.com/browse_ajax?ctoken=' + ctoken
content = util.fetch_url(url,
diff --git a/youtube/templates/comments.html b/youtube/templates/comments.html
index 396852a..f2cdf65 100644
--- a/youtube/templates/comments.html
+++ b/youtube/templates/comments.html
@@ -1,6 +1,6 @@
{% import "common_elements.html" as common_elements %}
-{% macro render_comment(comment, include_avatar) %}
+{% macro render_comment(comment, include_avatar, timestamp_links=False) %}
<div class="comment-container">
<div class="comment">
<a class="author-avatar" href="{{ comment['author_url'] }}" title="{{ comment['author'] }}">
@@ -14,7 +14,11 @@
<a class="permalink" href="{{ comment['permalink'] }}" title="permalink">
<time datetime="">{{ comment['time_published'] }}</time>
</a>
- <span class="text">{{ common_elements.text_runs(comment['text']) }}</span>
+ {% if timestamp_links %}
+ <span class="text">{{ common_elements.text_runs(comment['text'])|timestamps|safe }}</span>
+ {% else %}
+ <span class="text">{{ common_elements.text_runs(comment['text']) }}</span>
+ {% endif %}
<span class="likes">{{ comment['likes_text'] if comment['like_count'] else ''}}</span>
<div class="bottom-row">
@@ -36,7 +40,7 @@
</div>
<div class="comments">
{% for comment in comments_info['comments'] %}
- {{ render_comment(comment, comments_info['include_avatars']) }}
+ {{ render_comment(comment, comments_info['include_avatars'], True) }}
{% endfor %}
</div>
{% if 'more_comments_url' is in comments_info %}
diff --git a/youtube/templates/settings.html b/youtube/templates/settings.html
index 19a2461..5d1df5f 100644
--- a/youtube/templates/settings.html
+++ b/youtube/templates/settings.html
@@ -4,7 +4,7 @@
{% block style %}
.settings-form {
margin: auto;
- width: 500px;
+ width: 600px;
margin-top:10px;
padding: 10px;
display: block;
diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html
index 8264eb8..e3c6fa0 100644
--- a/youtube/templates/watch.html
+++ b/youtube/templates/watch.html
@@ -413,7 +413,7 @@ Reload without invidious (for usage of new identity button).</a>
<input class="checkbox" name="video_info_list" value="{{ video_info }}" form="playlist-edit" type="checkbox">
- <span class="description">{{ common_elements.text_runs(description)|urlize }}</span>
+ <span class="description">{{ common_elements.text_runs(description)|escape|urlize|timestamps|safe }}</span>
<div class="music-list">
{% if music_list.__len__() != 0 %}
<hr>
diff --git a/youtube/util.py b/youtube/util.py
index b19f91b..3c32ddb 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -54,7 +54,7 @@ URL_ORIGIN = "/https://www.youtube.com"
connection_pool = urllib3.PoolManager(cert_reqs = 'CERT_REQUIRED')
old_tor_connection_pool = None
-tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED')
+tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED')
tor_pool_refresh_time = time.monotonic() # prevent problems due to clock changes
@@ -74,7 +74,7 @@ def get_pool(use_tor):
# Keep a reference for 5 min to avoid it getting garbage collected while sockets still in use
old_tor_connection_pool = tor_connection_pool
- tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:9150/', cert_reqs = 'CERT_REQUIRED')
+ tor_connection_pool = urllib3.contrib.socks.SOCKSProxyManager('socks5://127.0.0.1:' + str(settings.tor_port) + '/', cert_reqs = 'CERT_REQUIRED')
tor_pool_refresh_time = current_time
return tor_connection_pool
@@ -119,8 +119,11 @@ def decode_content(content, encoding_header):
content = gzip.decompress(content)
return content
-def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
+def fetch_url_response(url, headers=(), timeout=15, data=None,
+ cookiejar_send=None, cookiejar_receive=None,
+ use_tor=True, max_redirects=None):
'''
+ returns response, cleanup_function
When cookiejar_send is set to a CookieJar object,
those cookies will be sent in the request (but cookies in response will not be merged into it)
When cookiejar_receive is set to a CookieJar object,
@@ -147,32 +150,51 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif not isinstance(data, bytes):
data = urllib.parse.urlencode(data).encode('ascii')
- start_time = time.time()
-
if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib
req = urllib.request.Request(url, data=data, headers=headers)
cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)
if use_tor and settings.route_tor:
- opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
+ opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", settings.tor_port), cookie_processor)
else:
opener = urllib.request.build_opener(cookie_processor)
response = opener.open(req, timeout=timeout)
- response_time = time.time()
-
-
- content = response.read()
+ cleanup_func = (lambda r: None)
else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
+ # default: Retry.DEFAULT = Retry(3)
+ # (in connectionpool.py in urllib3)
+ # According to the documentation for urlopen, a redirect counts as a
+ # retry. So there are 3 redirects max by default.
+ if max_redirects:
+ retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
+ else:
+ retries = urllib3.Retry(3)
pool = get_pool(use_tor and settings.route_tor)
+ response = pool.request(method, url, headers=headers,
+ timeout=timeout, preload_content=False,
+ decode_content=False, retries=retries)
+ cleanup_func = (lambda r: r.release_conn())
- response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
- response_time = time.time()
+ return response, cleanup_func
- content = response.read()
- response.release_conn()
+def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
+ cookiejar_send=None, cookiejar_receive=None, use_tor=True,
+ debug_name=None):
+ start_time = time.time()
+
+ response, cleanup_func = fetch_url_response(
+ url, headers, timeout=timeout,
+ cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
+ use_tor=use_tor)
+ response_time = time.time()
+
+ content = response.read()
+ read_finish = time.time()
+
+ cleanup_func(response) # release_connection for urllib3
if (response.status == 429
and content.startswith(b'<!DOCTYPE')
@@ -185,7 +207,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
elif response.status >= 400:
raise FetchError(str(response.status), reason=response.reason, ip=None)
- read_finish = time.time()
if report_text:
print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3))
content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
@@ -198,8 +219,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
with open(os.path.join(save_dir, debug_name), 'wb') as f:
f.write(content)
- if return_response:
- return content, response
return content
def head(url, use_tor=False, report_text=None, max_redirects=10):
@@ -209,7 +228,7 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
# default: Retry.DEFAULT = Retry(3)
# (in connectionpool.py in urllib3)
# According to the documentation for urlopen, a redirect counts as a retry
- # by default. So there are 3 redirects max by default. Let's change that
+ # So there are 3 redirects max by default. Let's change that
# to 10 since googlevideo redirects a lot.
retries = urllib3.Retry(3+max_redirects, redirect=max_redirects,
raise_on_redirect=False)
diff --git a/youtube/watch.py b/youtube/watch.py
index c1f5e1e..cedf632 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -24,7 +24,7 @@ except FileNotFoundError:
def get_video_sources(info):
video_sources = []
- if not settings.theater_mode:
+ if (not settings.theater_mode) or settings.route_tor == 2:
max_resolution = 360
else:
max_resolution = settings.default_resolution
@@ -270,10 +270,11 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
else:
info['hls_formats'] = []
- # check for 403
+ # check for 403. Unnecessary for tor video routing b/c ip address is same
info['invidious_used'] = False
info['invidious_reload_button'] = False
- if settings.route_tor and info['formats'] and info['formats'][0]['url']:
+ if (settings.route_tor == 1
+ and info['formats'] and info['formats'][0]['url']):
try:
response = util.head(info['formats'][0]['url'],
report_text='Checked for URL access')
@@ -408,10 +409,10 @@ def get_watch_page(video_id=None):
"author": info['author'],
}
+ # prefix urls, and other post-processing not handled by yt_data_extract
for item in info['related_videos']:
util.prefix_urls(item)
util.add_extra_html_info(item)
-
if info['playlist']:
playlist_id = info['playlist']['id']
for item in info['playlist']['items']:
@@ -423,6 +424,11 @@ def get_watch_page(video_id=None):
item['url'] += '&index=' + str(item['index'])
info['playlist']['author_url'] = util.prefix_url(
info['playlist']['author_url'])
+ # Don't prefix hls_formats for now because the urls inside the manifest
+ # would need to be prefixed as well.
+ for fmt in info['formats']:
+ fmt['url'] = util.prefix_url(fmt['url'])
+
if settings.gather_googlevideo_domains:
with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: