diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-19 19:48:53 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-19 19:48:53 -0800 |
commit | d1d908d5b1aadb0dc75b25df1a47789c021f89e2 (patch) | |
tree | 56a1a0e8361a732241774fee35ae521858d16d5f /youtube/yt_data_extract | |
parent | 76376b29a0adf6bd6d7a0202d904f923bdc8aa57 (diff) | |
download | yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.tar.lz yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.tar.xz yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.zip |
Extraction: Move html post processing stuff from yt_data_extract to util
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/__init__.py | 3 | ||||
-rw-r--r-- | youtube/yt_data_extract/common.py | 39 |
2 files changed, 1 insertions, 41 deletions
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py index f2a93a9..f2f07c0 100644 --- a/youtube/yt_data_extract/__init__.py +++ b/youtube/yt_data_extract/__init__.py @@ -1,8 +1,7 @@ from .common import (get, multi_get, deep_get, multi_deep_get, liberal_update, conservative_update, remove_redirect, normalize_url, extract_str, extract_formatted_text, extract_int, extract_approx_int, - extract_date, extract_item_info, extract_items, extract_response, - prefix_urls, add_extra_html_info, parse_info_prepare_for_html) + extract_date, extract_item_info, extract_items, extract_response) from .everything_else import (extract_channel_info, extract_search_info, extract_playlist_metadata, extract_playlist_info, extract_comments_info) diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 5fa67bc..459b5e9 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -1,6 +1,3 @@ -from youtube import util - -import json import re import urllib.parse import collections @@ -179,35 +176,6 @@ def check_missing_keys(object, *key_sequences): return None -def prefix_urls(item): - try: - item['thumbnail'] = util.prefix_url(item['thumbnail']) - except KeyError: - pass - - try: - item['author_url'] = util.prefix_url(item['author_url']) - except KeyError: - pass - -def add_extra_html_info(item): - if item['type'] == 'video': - item['url'] = (util.URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None - - video_info = {} - for key in ('id', 'title', 'author', 'duration'): - try: - video_info[key] = item[key] - except KeyError: - video_info[key] = '' - - item['video_info'] = json.dumps(video_info) - - elif item['type'] == 'playlist': - item['url'] = (util.URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None - elif item['type'] == 'channel': - item['url'] = (util.URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None - def extract_item_info(item, additional_info={}): if not item: return {'error': 'No item given'} @@ -307,13 +275,6 @@ def extract_item_info(item, additional_info={}): ))) return info -def parse_info_prepare_for_html(renderer, additional_info={}): - item = extract_item_info(renderer, additional_info) - prefix_urls(item) - add_extra_html_info(item) - - return item - def extract_response(polymer_json): '''return response, error''' response = multi_deep_get(polymer_json, [1, 'response'], ['response'], default=None, types=dict) |