From c65df7d27df64049e1597e245758f70e808173e1 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 16 Feb 2019 14:00:06 -0800 Subject: subscriptions: Basic new videos checking function for channel --- youtube/subscriptions.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 102 insertions(+), 1 deletion(-) (limited to 'youtube/subscriptions.py') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 39957bf..0d31bd4 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -1,6 +1,14 @@ -from youtube import common, settings +from youtube import common, settings, channel import sqlite3 import os +import secrets +import datetime + +# so as to not completely break on people who have updated but don't know of new dependency +try: + import atoma +except ModuleNotFoundError: + print('Error: atoma not installed, subscriptions will not work') # https://stackabuse.com/a-sqlite-tutorial-with-python/ @@ -74,3 +82,96 @@ def _get_videos(number, offset): raise finally: connection.close() + + + +units = { + 'year': 31536000, # 365*24*3600 + 'month': 2592000, # 30*24*3600 + 'week': 604800, # 7*24*3600 + 'day': 86400, # 24*3600 + 'hour': 3600, + 'minute': 60, + 'second': 1, +} +def youtube_timestamp_to_posix(dumb_timestamp): + ''' Given a dumbed down timestamp such as 1 year ago, 3 hours ago, + approximates the unix time (seconds since 1/1/1970) ''' + dumb_timestamp = dumb_timestamp.lower() + now = time.time() + if dumb_timestamp == "just now": + return now + split = dumb_timestamp.split(' ') + number, unit = int(split[0]), split[1] + if number > 1: + unit = unit[:-1] # remove s from end + return now - number*units[unit] + + +weekdays = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') +months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec') +def _get_upstream_videos(channel_id, channel_name, time_last_checked): + feed_url = "https://www.youtube.com/feeds/videos.xml?channel_id=" + channel_id + headers = {} + + # randomly change time_last_checked up to one day earlier to make tracking harder + time_last_checked = time_last_checked - secrets.randbelow(24*3600) + + # If-Modified-Since header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since + struct_time = time.gmtime(time_last_checked) + weekday = weekdays[struct_time.tm_wday] # dumb requirement + month = months[struct_time.tm_mon - 1] + headers['If-Modified-Since'] = time.strftime(weekday + ', %d ' + month + ' %Y %H:%M:%S GMT', struct_time) + print(headers['If-Modified-Since']) + + + headers['User-Agent'] = 'Python-urllib' # Don't leak python version + headers['Accept-Encoding'] = 'gzip, br' + req = urllib.request.Request(url, headers=headers) + if settings.route_tor: + opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150)) + else: + opener = urllib.request.build_opener() + response = opener.open(req, timeout=15) + + + if response.getcode == '304': + print('No new videos for ' + channel_id) + return [] + + + content = response.read() + print('Retrieved videos for ' + channel_id) + content = common.decode_content(content, response.getheader('Content-Encoding', default='identity')) + + + feed = atoma.parse_atom_bytes(content) + atom_videos = {} + for entry in feed.entries: + video_id = entry.id_[9:] # example of id_: yt:video:q6EoRBvdVPQ + + # standard names used in this program for purposes of html templating + atom_videos[video_id] = { + 'title': entry.title.value, + 'author': entry.authors[0].name, + #'description': '', # Not supported by atoma + #'duration': '', # Youtube's atom feeds don't provide it.. very frustrating + 'published': entry.published.strftime('%m/%d/%Y'), + 'time_published': int(entry.published.timestamp()), + } + + + # final list + videos = [] + + # Now check channel page to retrieve missing information for videos + json_channel_videos = channel.get_grid_items(channel.get_channel_tab(channel_id)[1]['response']) + for json_video in json_channel_videos: + info = renderer_info(json_video) + if info['id'] in atom_videos: + info.update(atom_videos[info['id']]) + else: + info['author'] = channel_name + info['time published'] = youtube_timestamp_to_posix(info['published']) + videos.append(info) + return videos -- cgit v1.2.3