Merge branch 'modular-data-extract'

Commits in this branch are prefixed with "Extraction:" This branch refactors data extraction. All such functionality has been moved to the yt_data_extract module. Responses from requests are given to the module and it parses them into a consistent, more useful format. The dependency on youtube-dl has also been dropped and this functionality has been built from scratch for these reasons: (1) I've noticed youtube-dl breaks more often than invidious (which uses watch page extraction built from scratch) in response to changes from Youtube, so I'm hoping what I wrote will also be less brittle. (2) Such breakage is inconvenient because I have to manually merge the fixes since I had to make changes to youtube-dl to make it do things such as extracting related videos. (3) I have no control over error handling and request pooling with youtube-dl, since it does all the requests (these would require intrusive changes I don't want to maintain). (4) I will now be able to finally display the number of comments and whether comments are disabled without making additional requests.
author: James Taylor <user234683@users.noreply.github.com> 2019-12-19 21:33:54 -0800
committer: James Taylor <user234683@users.noreply.github.com> 2019-12-19 21:33:54 -0800
commit: b4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch)
tree: 4de0082ac9eb26a05188dd424835ea50b1483113 /youtube/subscriptions.py
parent: b614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff)
parent: 6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff)
download: yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip
1 files changed, 12 insertions, 5 deletions
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index c9638cf..18436e2 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -172,7 +172,7 @@ def _get_videos(cursor, number_per_page, offset, tag = None):
             'id':   db_video[0],
             'title':    db_video[1],
             'duration': db_video[2],
-            'published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]),
+            'time_published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]),
             'author':   db_video[5],
         })
 
@@ -455,10 +455,17 @@ def _get_upstream_videos(channel_id):
         print('Failed to read atoma feed for ' + channel_status_name)
         traceback.print_exc()
 
-    videos = channel.extract_info(json.loads(channel_tab), 'videos')['items']
+    channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos')
+    if channel_info['error']:
+        print('Error checking channel ' + channel_status_name + ': ' + channel_info['error'])
+        return
+
+    videos = channel_info['items']
     for i, video_item in enumerate(videos):
-        if 'description' not in video_item:
+        if not video_item.get('description'):
             video_item['description'] = ''
+        else:
+            video_item['description'] = ''.join(run.get('text', '') for run in video_item['description'])
 
         if video_item['id'] in times_published:
             video_item['time_published'] = times_published[video_item['id']]
@@ -466,7 +473,7 @@ def _get_upstream_videos(channel_id):
         else:
             video_item['is_time_published_exact'] = False
             try:
-                video_item['time_published'] = youtube_timestamp_to_posix(video_item['published']) - i  # subtract a few seconds off the videos so they will be in the right order
+                video_item['time_published'] = youtube_timestamp_to_posix(video_item['time_published']) - i  # subtract a few seconds off the videos so they will be in the right order
             except KeyError:
                 print(video_item)
 
@@ -759,7 +766,7 @@ def get_subscriptions_page():
                 video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg'
                 video['type'] = 'video'
                 video['item_size'] = 'small'
-                yt_data_extract.add_extra_html_info(video)
+                util.add_extra_html_info(video)
 
             tags = _get_all_tags(cursor)
author	James Taylor <user234683@users.noreply.github.com>	2019-12-19 21:33:54 -0800
committer	James Taylor <user234683@users.noreply.github.com>	2019-12-19 21:33:54 -0800
commit	b4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch)
tree	4de0082ac9eb26a05188dd424835ea50b1483113 /youtube/subscriptions.py
parent	b614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff)
parent	6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff)
download	yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip