diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-19 21:33:54 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-19 21:33:54 -0800 |
commit | b4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch) | |
tree | 4de0082ac9eb26a05188dd424835ea50b1483113 /youtube/subscriptions.py | |
parent | b614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff) | |
parent | 6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff) | |
download | yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip |
Merge branch 'modular-data-extract'
Commits in this branch are prefixed with "Extraction:"
This branch refactors data extraction. All such functionality has been moved to the yt_data_extract module.
Responses from requests are given to the module and it parses them into a consistent, more useful format.
The dependency on youtube-dl has also been dropped and this functionality has been built from scratch for these reasons:
(1) I've noticed youtube-dl breaks more often than invidious (which uses watch page extraction built from scratch) in response to changes from Youtube, so I'm hoping what I wrote will also be less brittle.
(2) Such breakage is inconvenient because I have to manually merge the fixes since I had to make changes to youtube-dl to make it do things such as extracting related videos.
(3) I have no control over error handling and request pooling with youtube-dl, since it does all the requests (these would require intrusive changes I don't want to maintain).
(4) I will now be able to finally display the number of comments and whether comments are disabled without making additional requests.
Diffstat (limited to 'youtube/subscriptions.py')
-rw-r--r-- | youtube/subscriptions.py | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index c9638cf..18436e2 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -172,7 +172,7 @@ def _get_videos(cursor, number_per_page, offset, tag = None): 'id': db_video[0], 'title': db_video[1], 'duration': db_video[2], - 'published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]), + 'time_published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]), 'author': db_video[5], }) @@ -455,10 +455,17 @@ def _get_upstream_videos(channel_id): print('Failed to read atoma feed for ' + channel_status_name) traceback.print_exc() - videos = channel.extract_info(json.loads(channel_tab), 'videos')['items'] + channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos') + if channel_info['error']: + print('Error checking channel ' + channel_status_name + ': ' + channel_info['error']) + return + + videos = channel_info['items'] for i, video_item in enumerate(videos): - if 'description' not in video_item: + if not video_item.get('description'): video_item['description'] = '' + else: + video_item['description'] = ''.join(run.get('text', '') for run in video_item['description']) if video_item['id'] in times_published: video_item['time_published'] = times_published[video_item['id']] @@ -466,7 +473,7 @@ def _get_upstream_videos(channel_id): else: video_item['is_time_published_exact'] = False try: - video_item['time_published'] = youtube_timestamp_to_posix(video_item['published']) - i # subtract a few seconds off the videos so they will be in the right order + video_item['time_published'] = youtube_timestamp_to_posix(video_item['time_published']) - i # subtract a few seconds off the videos so they will be in the right order except KeyError: print(video_item) @@ -759,7 +766,7 @@ def get_subscriptions_page(): video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg' video['type'] = 'video' video['item_size'] = 'small' - yt_data_extract.add_extra_html_info(video) + util.add_extra_html_info(video) tags = _get_all_tags(cursor) |