From 98777ee82561ae205f156a7f8497728aecfa080c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Dec 2019 19:39:16 -0800 Subject: Extraction: Rewrite item_extraction for better error handling and readability, rename extracted names for more consistency --- youtube/__init__.py | 7 + youtube/comments.py | 22 +- youtube/playlist.py | 12 +- youtube/search.py | 6 +- youtube/subscriptions.py | 8 +- youtube/templates/comments.html | 4 +- youtube/templates/common_elements.html | 78 +++--- youtube/templates/playlist.html | 5 +- youtube/templates/watch.html | 6 +- youtube/util.py | 2 + youtube/watch.py | 8 +- youtube/yt_data_extract.py | 487 +++++++++++++++------------------ 12 files changed, 305 insertions(+), 340 deletions(-) diff --git a/youtube/__init__.py b/youtube/__init__.py index 0137e86..534b9f8 100644 --- a/youtube/__init__.py +++ b/youtube/__init__.py @@ -23,3 +23,10 @@ def inject_theme_preference(): 'theme_path': '/youtube.com/static/' + theme_names[settings.theme] + '.css', } +@yt_app.template_filter('commatize') +def commatize(num): + if num is None: + return '' + if isinstance(num, str): + num = int(num) + return '{:,}'.format(num) diff --git a/youtube/comments.py b/youtube/comments.py index 250a95f..e237f0f 100644 --- a/youtube/comments.py +++ b/youtube/comments.py @@ -91,33 +91,33 @@ def post_process_comments_info(comments_info): comment['author_url'] = util.URL_ORIGIN + comment['author_url'] comment['author_avatar'] = '/' + comment['author_avatar'] - comment['permalink'] = util.URL_ORIGIN + '/watch?v=' + comments_info['video_id'] + '&lc=' + comment['comment_id'] + comment['permalink'] = util.URL_ORIGIN + '/watch?v=' + comments_info['video_id'] + '&lc=' + comment['id'] if comment['author_channel_id'] in accounts.accounts: comment['delete_url'] = (util.URL_ORIGIN + '/delete_comment?video_id=' + comments_info['video_id'] + '&channel_id='+ comment['author_channel_id'] + '&author_id=' + comment['author_id'] - + '&comment_id=' + comment['comment_id']) + + '&comment_id=' + comment['id']) - num_replies = comment['number_of_replies'] - if num_replies == 0: - comment['replies_url'] = util.URL_ORIGIN + '/post_comment?parent_id=' + comment['comment_id'] + "&video_id=" + comments_info['video_id'] + reply_count = comment['reply_count'] + if reply_count == 0: + comment['replies_url'] = util.URL_ORIGIN + '/post_comment?parent_id=' + comment['id'] + "&video_id=" + comments_info['video_id'] else: - comment['replies_url'] = util.URL_ORIGIN + '/comments?parent_id=' + comment['comment_id'] + "&video_id=" + comments_info['video_id'] + comment['replies_url'] = util.URL_ORIGIN + '/comments?parent_id=' + comment['id'] + "&video_id=" + comments_info['video_id'] - if num_replies == 0: + if reply_count == 0: comment['view_replies_text'] = 'Reply' - elif num_replies == 1: + elif reply_count == 1: comment['view_replies_text'] = '1 reply' else: - comment['view_replies_text'] = str(num_replies) + ' replies' + comment['view_replies_text'] = str(reply_count) + ' replies' - if comment['likes'] == 1: + if comment['like_count'] == 1: comment['likes_text'] = '1 like' else: - comment['likes_text'] = str(comment['likes']) + ' likes' + comment['likes_text'] = str(comment['like_count']) + ' likes' comments_info['include_avatars'] = settings.enable_comment_avatars if comments_info['ctoken']: diff --git a/youtube/playlist.py b/youtube/playlist.py index bc2c417..ced0644 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -98,13 +98,19 @@ def get_playlist_page(): info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) yt_data_extract.prefix_urls(info['metadata']) - for item in info['items']: + for item in info.get('items', ()): yt_data_extract.prefix_urls(item) yt_data_extract.add_extra_html_info(item) + if 'id' in item: + item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg' + + video_count = yt_data_extract.default_multi_get(info, 'metadata', 'video_count') + if video_count is None: + video_count = 40 return flask.render_template('playlist.html', - video_list = info['items'], - num_pages = math.ceil(info['metadata']['size']/20), + video_list = info.get('items', []), + num_pages = math.ceil(video_count/20), parameters_dictionary = request.args, **info['metadata'] diff --git a/youtube/search.py b/youtube/search.py index cb66744..a881557 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -79,9 +79,9 @@ def get_search_page(): if search_info['error']: return flask.render_template('error.html', error_message = search_info['error']) - for item_info in search_info['items']: - yt_data_extract.prefix_urls(item_info) - yt_data_extract.add_extra_html_info(item_info) + for extract_item_info in search_info['items']: + yt_data_extract.prefix_urls(extract_item_info) + yt_data_extract.add_extra_html_info(extract_item_info) corrections = search_info['corrections'] if corrections['type'] == 'did_you_mean': diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py index e0c71f5..9709467 100644 --- a/youtube/subscriptions.py +++ b/youtube/subscriptions.py @@ -172,7 +172,7 @@ def _get_videos(cursor, number_per_page, offset, tag = None): 'id': db_video[0], 'title': db_video[1], 'duration': db_video[2], - 'published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]), + 'time_published': exact_timestamp(db_video[3]) if db_video[4] else posix_to_dumbed_down(db_video[3]), 'author': db_video[5], }) @@ -462,8 +462,10 @@ def _get_upstream_videos(channel_id): videos = channel_info['items'] for i, video_item in enumerate(videos): - if 'description' not in video_item: + if not video_item.get('description'): video_item['description'] = '' + else: + video_item['description'] = ''.join(run.get('text', '') for run in video_item['description']) if video_item['id'] in times_published: video_item['time_published'] = times_published[video_item['id']] @@ -471,7 +473,7 @@ def _get_upstream_videos(channel_id): else: video_item['is_time_published_exact'] = False try: - video_item['time_published'] = youtube_timestamp_to_posix(video_item['published']) - i # subtract a few seconds off the videos so they will be in the right order + video_item['time_published'] = youtube_timestamp_to_posix(video_item['time_published']) - i # subtract a few seconds off the videos so they will be in the right order except KeyError: print(video_item) diff --git a/youtube/templates/comments.html b/youtube/templates/comments.html index 20cde4e..396852a 100644 --- a/youtube/templates/comments.html +++ b/youtube/templates/comments.html @@ -12,11 +12,11 @@ {{ comment['author'] }} {{ common_elements.text_runs(comment['text']) }} - {{ comment['likes_text'] if comment['likes'] else ''}} + {{ comment['likes_text'] if comment['like_count'] else ''}}
{{ comment['view_replies_text'] }} {% if 'delete_url' is in comment %} diff --git a/youtube/templates/common_elements.html b/youtube/templates/common_elements.html index 1a417ae..4c776b6 100644 --- a/youtube/templates/common_elements.html +++ b/youtube/templates/common_elements.html @@ -9,55 +9,59 @@ {{ text_run["text"] }} {%- endif -%} {%- endfor -%} - {%- else -%} + {%- elif runs -%} {{ runs }} {%- endif -%} {% endmacro %} {% macro item(info, description=false, horizontal=true, include_author=true, include_badges=true) %}
-
- - - {% if info['type'] != 'channel' %} -
- {{ info['size'] if info['type'] == 'playlist' else info['duration'] }} -
- {% endif %} -
+ {% if info['error'] %} + {{ info['error'] }} + {% else %} +
+ + + {% if info['type'] != 'channel' %} +
+ {{ (info['video_count']|string + ' videos') if info['type'] == 'playlist' else info['duration'] }} +
+ {% endif %} +
- + -
    - {% if info['type'] == 'channel' %} -
  • {{ info['subscriber_count'] }} subscribers
  • -
  • {{ info['size'] }} videos
  • - {% else %} - {% if include_author %} - {% if 'author_url' is in(info) %} -
  • By {{ info['author'] }}
  • - {% else %} -
  • {{ info['author'] }}
  • +
      + {% if info['type'] == 'channel' %} +
    • {{ info['approx_subscriber_count'] }} subscribers
    • +
    • {{ info['video_count'] }} videos
    • + {% else %} + {% if include_author %} + {% if info.get('author_url') %} +
    • By {{ info['author'] }}
    • + {% else %} +
    • {{ info['author'] }}
    • + {% endif %} + {% endif %} + {% if info.get('approx_view_count') %} +
    • {{ info['approx_view_count'] }} views
    • + {% endif %} + {% if info.get('time_published') %} +
    • {% endif %} {% endif %} - {% if 'views' is in(info) %} -
    • {{ info['views'] }}
    • - {% endif %} - {% if 'published' is in(info) %} -
    • - {% endif %} - {% endif %} -
    +
- {% if description %} - {{ text_runs(info.get('description', '')) }} - {% endif %} - {% if include_badges %} - {{ info['badges']|join(' | ') }} + {% if description %} + {{ text_runs(info.get('description', '')) }} + {% endif %} + {% if include_badges %} + {{ info['badges']|join(' | ') }} + {% endif %} +
+ {% if info['type'] == 'video' %} + {% endif %} -
- {% if info['type'] == 'video' %} - {% endif %}
diff --git a/youtube/templates/playlist.html b/youtube/templates/playlist.html index 52c468e..ebd152b 100644 --- a/youtube/templates/playlist.html +++ b/youtube/templates/playlist.html @@ -54,8 +54,9 @@

{{ title }}

{{ author }}
-
{{ views }}
-
{{ size }} videos
+
{{ video_count|commatize }} videos
+
{{ view_count|commatize }} views
+
Last updated {{ time_published }}
{{ common_elements.text_runs(description) }}
diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html index 0ffa358..5bd2a25 100644 --- a/youtube/templates/watch.html +++ b/youtube/templates/watch.html @@ -261,11 +261,11 @@ {%- endif -%}
Uploaded by {{ uploader }}
- {{ views }} views + {{ view_count }} views - - {{ likes }} likes {{ dislikes }} dislikes + + {{ like_count }} likes {{ dislike_count }} dislikes
Download