aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
Diffstat (limited to 'youtube')
-rw-r--r--youtube/channel.py4
-rw-r--r--youtube/subscriptions.py256
-rw-r--r--youtube/youtube.py6
3 files changed, 254 insertions, 12 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index e9f315b..55316e2 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -248,6 +248,7 @@ def channel_videos_html(polymer_json, current_page=1, current_sort=3, number_of_
return yt_channel_items_template.substitute(
header = html_common.get_header(),
channel_title = microformat['title'],
+ channel_id = channel_id,
channel_tabs = channel_tabs_html(channel_id, 'Videos'),
sort_buttons = channel_sort_buttons_html(channel_id, 'videos', current_sort),
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
@@ -269,6 +270,7 @@ def channel_playlists_html(polymer_json, current_sort=3):
return yt_channel_items_template.substitute(
header = html_common.get_header(),
channel_title = microformat['title'],
+ channel_id = channel_id,
channel_tabs = channel_tabs_html(channel_id, 'Playlists'),
sort_buttons = channel_sort_buttons_html(channel_id, 'playlists', current_sort),
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
@@ -333,6 +335,7 @@ def channel_about_page(polymer_json):
description = description,
links = channel_links,
stats = stats,
+ channel_id = channel_metadata['channelId'],
channel_tabs = channel_tabs_html(channel_metadata['channelId'], 'About'),
)
@@ -353,6 +356,7 @@ def channel_search_page(polymer_json, query, current_page=1, number_of_videos =
return yt_channel_items_template.substitute(
header = html_common.get_header(),
channel_title = html.escape(microformat['title']),
+ channel_id = channel_id,
channel_tabs = channel_tabs_html(channel_id, '', query),
avatar = '/' + microformat['thumbnail']['thumbnails'][0]['url'],
page_title = html.escape(query + ' - Channel search'),
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 47f1ea3..0c7e8a5 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -1,18 +1,252 @@
+from youtube import util, yt_data_extract, html_common, channel
+import settings
+from string import Template
+import sqlite3
+import os
+import secrets
+import datetime
+import itertools
+import time
import urllib
+import socks, sockshandler
-with open("subscriptions.txt", 'r', encoding='utf-8') as file:
- subscriptions = file.read()
-
-# Line format: "channel_id channel_name"
-# Example:
-# UCYO_jab_esuFRV4b17AJtAw 3Blue1Brown
+# so as to not completely break on people who have updated but don't know of new dependency
+try:
+ import atoma
+except ModuleNotFoundError:
+ print('Error: atoma not installed, subscriptions will not work')
-subscriptions = ((line[0:24], line[25: ]) for line in subscriptions.splitlines())
+with open('yt_subscriptions_template.html', 'r', encoding='utf-8') as f:
+ subscriptions_template = Template(f.read())
-def get_new_videos():
- for channel_id, channel_name in subscriptions:
-
+# https://stackabuse.com/a-sqlite-tutorial-with-python/
+database_path = os.path.join(settings.data_dir, "subscriptions.sqlite")
-def get_subscriptions_page():
+def open_database():
+ if not os.path.exists(settings.data_dir):
+ os.makedirs(settings.data_dir)
+ connection = sqlite3.connect(database_path)
+
+ # Create tables if they don't exist
+ try:
+ cursor = connection.cursor()
+ cursor.execute('''CREATE TABLE IF NOT EXISTS subscribed_channels (
+ id integer PRIMARY KEY,
+ channel_id text NOT NULL,
+ channel_name text NOT NULL,
+ time_last_checked integer
+ )''')
+ cursor.execute('''CREATE TABLE IF NOT EXISTS videos (
+ id integer PRIMARY KEY,
+ uploader_id integer NOT NULL REFERENCES subscribed_channels(id) ON UPDATE CASCADE ON DELETE CASCADE,
+ video_id text NOT NULL,
+ title text NOT NULL,
+ duration text,
+ time_published integer NOT NULL,
+ description text
+ )''')
+ connection.commit()
+ except:
+ connection.rollback()
+ connection.close()
+ raise
+
+ return connection
+
+def _subscribe(channels):
+ ''' channels is a list of (channel_id, channel_name) '''
+
+ # set time_last_checked to 0 on all channels being subscribed to
+ channels = ( (channel_id, channel_name, 0) for channel_id, channel_name in channels)
+
+ connection = open_database()
+ try:
+ cursor = connection.cursor()
+ cursor.executemany("INSERT INTO subscribed_channels (channel_id, channel_name, time_last_checked) VALUES (?, ?, ?)", channels)
+ connection.commit()
+ except:
+ connection.rollback()
+ raise
+ finally:
+ connection.close()
+
+def _unsubscribe(channel_ids):
+ ''' channel_ids is a list of channel_ids '''
+ connection = open_database()
+ try:
+ cursor = connection.cursor()
+ cursor.executemany("DELETE FROM subscribed_channels WHERE channel_id=?", ((channel_id, ) for channel_id in channel_ids))
+ connection.commit()
+ except:
+ connection.rollback()
+ raise
+ finally:
+ connection.close()
+
+def _get_videos(number, offset):
+ connection = open_database()
+ try:
+ cursor = connection.cursor()
+ db_videos = cursor.execute('''SELECT video_id, title, duration, channel_name
+ FROM videos
+ INNER JOIN subscribed_channels on videos.uploader_id = subscribed_channels.id
+ ORDER BY time_published DESC
+ LIMIT ? OFFSET ?''', (number, offset))
+
+ for db_video in db_videos:
+ yield {
+ 'id': db_video[0],
+ 'title': db_video[1],
+ 'duration': db_video[2],
+ 'author': db_video[3],
+ }
+ except:
+ connection.rollback()
+ raise
+ finally:
+ connection.close()
+
+
+
+units = {
+ 'year': 31536000, # 365*24*3600
+ 'month': 2592000, # 30*24*3600
+ 'week': 604800, # 7*24*3600
+ 'day': 86400, # 24*3600
+ 'hour': 3600,
+ 'minute': 60,
+ 'second': 1,
+}
+def youtube_timestamp_to_posix(dumb_timestamp):
+ ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago,
+ approximates the unix time (seconds since 1/1/1970) '''
+ dumb_timestamp = dumb_timestamp.lower()
+ now = time.time()
+ if dumb_timestamp == "just now":
+ return now
+ split = dumb_timestamp.split(' ')
+ number, unit = int(split[0]), split[1]
+ if number > 1:
+ unit = unit[:-1] # remove s from end
+ return now - number*units[unit]
+
+
+weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun')
+months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec')
+def _get_upstream_videos(channel_id, time_last_checked):
+ feed_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id
+ headers = {}
+
+ # randomly change time_last_checked up to one day earlier to make tracking harder
+ time_last_checked = time_last_checked - secrets.randbelow(24*3600)
+ if time_last_checked < 0: # happens when time_last_checked is initialized to 0 when checking for first time
+ time_last_checked = 0
+
+ # If-Modified-Since header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since
+ struct_time = time.gmtime(time_last_checked)
+ weekday = weekdays[struct_time.tm_wday] # dumb requirement
+ month = months[struct_time.tm_mon - 1]
+ headers['If-Modified-Since'] = time.strftime(weekday + ', %d ' + month + ' %Y %H:%M:%S GMT', struct_time)
+ print(headers['If-Modified-Since'])
+
+
+ headers['User-Agent'] = 'Python-urllib' # Don't leak python version
+ headers['Accept-Encoding'] = 'gzip, br'
+ req = urllib.request.Request(feed_url, headers=headers)
+ if settings.route_tor:
+ opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150))
+ else:
+ opener = urllib.request.build_opener()
+ response = opener.open(req, timeout=15)
+
+
+ if response.getcode == '304':
+ print('No new videos for ' + channel_id)
+ return []
+
+
+ content = response.read()
+ print('Retrieved videos for ' + channel_id)
+ content = util.decode_content(content, response.getheader('Content-Encoding', default='identity'))
+
+
+ feed = atoma.parse_atom_bytes(content)
+ atom_videos = {}
+ for entry in feed.entries:
+ video_id = entry.id_[9:] # example of id_: yt:video:q6EoRBvdVPQ
+
+ atom_videos[video_id] = {
+ 'title': entry.title.value,
+ #'description': '', # Not supported by atoma
+ #'duration': '', # Youtube's atom feeds don't provide it.. very frustrating
+ 'time_published': int(entry.published.timestamp()),
+ }
+
+
+ # final list
+ videos = []
+
+ # Now check channel page to retrieve missing information for videos
+ json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response'])
+ for json_video in json_channel_videos:
+ info = yt_data_extract.renderer_info(json_video['gridVideoRenderer'])
+ if 'description' not in info:
+ info['description'] = ''
+ if info['id'] in atom_videos:
+ info.update(atom_videos[info['id']])
+ else:
+ info['time_published'] = youtube_timestamp_to_posix(info['published'])
+ videos.append(info)
+ return videos
+
+def get_subscriptions_page(env, start_response):
+ items_html = '''<nav class="item-grid">\n'''
+
+ for item in _get_videos(30, 0):
+ items_html += html_common.video_item_html(item, html_common.small_video_item_template)
+ items_html += '''\n</nav>'''
+
+ start_response('200 OK', [('Content-type','text/html'),])
+ return subscriptions_template.substitute(
+ header = html_common.get_header(),
+ items = items_html,
+ page_buttons = '',
+ ).encode('utf-8')
+
+def post_subscriptions_page(env, start_response):
+ params = env['parameters']
+ action = params['action'][0]
+ if action == 'subscribe':
+ if len(params['channel_id']) != len(params['channel_name']):
+ start_response('400 Bad Request', ())
+ return b'400 Bad Request, length of channel_id != length of channel_name'
+ _subscribe(zip(params['channel_id'], params['channel_name']))
+
+ elif action == 'unsubscribe':
+ _unsubscribe(params['channel_id'])
+
+ elif action == 'refresh':
+ connection = open_database()
+ try:
+ cursor = connection.cursor()
+ for uploader_id, channel_id, time_last_checked in cursor.execute('''SELECT id, channel_id, time_last_checked FROM subscribed_channels'''):
+ db_videos = ( (uploader_id, info['id'], info['title'], info['duration'], info['time_published'], info['description']) for info in _get_upstream_videos(channel_id, time_last_checked) )
+ cursor.executemany('''INSERT INTO videos (uploader_id, video_id, title, duration, time_published, description) VALUES (?, ?, ?, ?, ?, ?)''', db_videos)
+
+ cursor.execute('''UPDATE subscribed_channels SET time_last_checked = ?''', ( int(time.time()), ) )
+ connection.commit()
+ except:
+ connection.rollback()
+ raise
+ finally:
+ connection.close()
+
+ start_response('303 See Other', [('Location', util.URL_ORIGIN + '/subscriptions'),] )
+ return b''
+ else:
+ start_response('400 Bad Request', ())
+ return b'400 Bad Request'
+ start_response('204 No Content', ())
+ return b''
diff --git a/youtube/youtube.py b/youtube/youtube.py
index a6a216e..4ec7962 100644
--- a/youtube/youtube.py
+++ b/youtube/youtube.py
@@ -1,7 +1,7 @@
import mimetypes
import urllib.parse
import os
-from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util
+from youtube import local_playlist, watch, search, playlist, channel, comments, post_comment, accounts, util, subscriptions
import settings
YOUTUBE_FILES = (
"/shared.css",
@@ -24,6 +24,8 @@ get_handlers = {
'post_comment': post_comment.get_post_comment_page,
'delete_comment': post_comment.get_delete_comment_page,
'login': accounts.get_account_login_page,
+
+ 'subscriptions': subscriptions.get_subscriptions_page,
}
post_handlers = {
'edit_playlist': local_playlist.edit_playlist,
@@ -33,6 +35,8 @@ post_handlers = {
'comments': post_comment.post_comment,
'post_comment': post_comment.post_comment,
'delete_comment': post_comment.delete_comment,
+
+ 'subscriptions': subscriptions.post_subscriptions_page,
}
def youtube(env, start_response):