From bd343ed71f628e0f1dd1eb3f45fb4e04887f223f Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 17:28:11 -0700 Subject: Extraction: Move channel extraction to yt_data_extract --- youtube/subscriptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube/subscriptions.py') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 56bdf93..175622f 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -455,7 +455,7 @@ def _get_upstream_videos(channel_id): print('Failed to read atoma feed for ' + channel_status_name) traceback.print_exc() - videos = channel.extract_info(json.loads(channel_tab), 'videos')['items'] + videos = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos')['items'] for i, video_item in enumerate(videos): if 'description' not in video_item: video_item['description'] = '' -- cgit v1.2.3 From 216231f9a6ca9ed48389e797a0c30d7d3b01e379 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 17:48:02 -0700 Subject: Extraction: Proper error handling for terminated or non-existant channels --- youtube/subscriptions.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'youtube/subscriptions.py') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 175622f..87e1659 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -455,7 +455,12 @@ def _get_upstream_videos(channel_id): print('Failed to read atoma feed for ' + channel_status_name) traceback.print_exc() - videos = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos')['items'] + channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos') + if channel_info['errors']: + print('Error checking channel ' + channel_status_name + ': ' + ', '.join(channel_info['errors'])) + return + + videos = channel_info['items'] for i, video_item in enumerate(videos): if 'description' not in video_item: video_item['description'] = '' -- cgit v1.2.3 From dc6c370152d063ad4198c747fc12eb06fc1ec0e4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Sep 2019 21:39:53 -0700 Subject: Extraction: refactor response extraction to work with both mobile & desktop respones, also improve errors --- youtube/subscriptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube/subscriptions.py') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 87e1659..e0c71f5 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -456,8 +456,8 @@ def _get_upstream_videos(channel_id): traceback.print_exc() channel_info = yt_data_extract.extract_channel_info(json.loads(channel_tab), 'videos') - if channel_info['errors']: - print('Error checking channel ' + channel_status_name + ': ' + ', '.join(channel_info['errors'])) + if channel_info['error']: + print('Error checking channel ' + channel_status_name + ': ' + channel_info['error']) return videos = channel_info['items'] -- cgit v1.2.3 From 98777ee82561ae205f156a7f8497728aecfa080c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Dec 2019 19:39:16 -0800 Subject: Extraction: Rewrite item_extraction for better error handling and readability, rename extracted names for more consistency --- youtube/subscriptions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'youtube/subscriptions.py') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index e0c71f5..9709467 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -172,7 +172,7 @@ def _get_videos(cursor, number_per_page, offset, tag = None): 'id': db_video[0], 'title': db_video[1], 'duration': db_video[2], - 'published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]), + 'time_published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]), 'author': db_video[5], }) @@ -462,8 +462,10 @@ def _get_upstream_videos(channel_id): videos = channel_info['items'] for i, video_item in enumerate(videos): - if 'description' not in video_item: + if not video_item.get('description'): video_item['description'] = '' + else: + video_item['description'] = ''.join(run.get('text', '') for run in video_item['description']) if video_item['id'] in times_published: video_item['time_published'] = times_published[video_item['id']] @@ -471,7 +473,7 @@ def _get_upstream_videos(channel_id): else: video_item['is_time_published_exact'] = False try: - video_item['time_published'] = youtube_timestamp_to_posix(video_item['published']) - i # subtract a few seconds off the videos so they will be in the right order + video_item['time_published'] = youtube_timestamp_to_posix(video_item['time_published']) - i # subtract a few seconds off the videos so they will be in the right order except KeyError: print(video_item) -- cgit v1.2.3 From d1d908d5b1aadb0dc75b25df1a47789c021f89e2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 19 Dec 2019 19:48:53 -0800 Subject: Extraction: Move html post processing stuff from yt_data_extract to util --- youtube/subscriptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube/subscriptions.py') diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index 9709467..dd058b3 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -766,7 +766,7 @@ def get_subscriptions_page(): video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg' video['type'] = 'video' video['item_size'] = 'small' - yt_data_extract.add_extra_html_info(video) + util.add_extra_html_info(video) tags = _get_all_tags(cursor) -- cgit v1.2.3