Extraction: Move html post processing stuff from yt_data_extract to util

author: James Taylor <user234683@users.noreply.github.com> 2019-12-19 19:48:53 -0800
committer: James Taylor <user234683@users.noreply.github.com> 2019-12-19 19:48:53 -0800
commit: d1d908d5b1aadb0dc75b25df1a47789c021f89e2 (patch)
tree: 56a1a0e8361a732241774fee35ae521858d16d5f /youtube
parent: 76376b29a0adf6bd6d7a0202d904f923bdc8aa57 (diff)
download: yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.tar.lz
yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.tar.xz
yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.zip
9 files changed, 50 insertions, 52 deletions
diff --git a/youtube/channel.py b/youtube/channel.py
index 67a79ad..ad06e3f 100644
--- a/youtube/channel.py
+++ b/youtube/channel.py
@@ -142,8 +142,8 @@ def post_process_channel_info(info):
     info['avatar'] = util.prefix_url(info['avatar'])
     info['channel_url'] = util.prefix_url(info['channel_url'])
     for item in info['items']:
-        yt_data_extract.prefix_urls(item)
-        yt_data_extract.add_extra_html_info(item)
+        util.prefix_urls(item)
+        util.add_extra_html_info(item)
 
 
 
diff --git a/youtube/local_playlist.py b/youtube/local_playlist.py
index 2375ba2..0b47c72 100644
--- a/youtube/local_playlist.py
+++ b/youtube/local_playlist.py
@@ -57,7 +57,7 @@ def get_local_playlist_videos(name, offset=0, amount=50):
                 info['thumbnail'] = util.get_thumbnail_url(info['id'])
                 missing_thumbnails.append(info['id'])
             info['type'] = 'video'
-            yt_data_extract.add_extra_html_info(info)
+            util.add_extra_html_info(info)
             videos.append(info)
         except json.decoder.JSONDecodeError:
             if not video_json.strip() == '':
diff --git a/youtube/playlist.py b/youtube/playlist.py
index 5dc8ab7..3ca235a 100644
--- a/youtube/playlist.py
+++ b/youtube/playlist.py
@@ -97,10 +97,10 @@ def get_playlist_page():
     if page != '1':
         info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json)
 
-    yt_data_extract.prefix_urls(info['metadata'])
+    util.prefix_urls(info['metadata'])
     for item in info.get('items', ()):
-        yt_data_extract.prefix_urls(item)
-        yt_data_extract.add_extra_html_info(item)
+        util.prefix_urls(item)
+        util.add_extra_html_info(item)
         if 'id' in item:
             item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg'
 
diff --git a/youtube/search.py b/youtube/search.py
index a881557..0f6bbc4 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -80,8 +80,8 @@ def get_search_page():
         return flask.render_template('error.html', error_message = search_info['error'])
 
     for extract_item_info in search_info['items']:
-        yt_data_extract.prefix_urls(extract_item_info)
-        yt_data_extract.add_extra_html_info(extract_item_info)
+        util.prefix_urls(extract_item_info)
+        util.add_extra_html_info(extract_item_info)
 
     corrections = search_info['corrections']
     if corrections['type'] == 'did_you_mean':
diff --git a/youtube/subscriptions.py b/youtube/subscriptions.py
index 9709467..dd058b3 100644
--- a/youtube/subscriptions.py
+++ b/youtube/subscriptions.py
@@ -766,7 +766,7 @@ def get_subscriptions_page():
                 video['thumbnail'] = util.URL_ORIGIN + '/data/subscription_thumbnails/' + video['id'] + '.jpg'
                 video['type'] = 'video'
                 video['item_size'] = 'small'
-                yt_data_extract.add_extra_html_info(video)
+                util.add_extra_html_info(video)
 
             tags = _get_all_tags(cursor)
 
diff --git a/youtube/util.py b/youtube/util.py
index 9023b98..feeec8c 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,4 +1,5 @@
 import settings
+from youtube import yt_data_extract
 import socks, sockshandler
 import gzip
 import brotli
@@ -6,6 +7,7 @@ import urllib.parse
 import re
 import time
 import os
+import json
 import gevent
 import gevent.queue
 import gevent.lock
@@ -321,3 +323,39 @@ def left_remove(string, substring):
         return string[len(substring):]
     return string
 
+
+def prefix_urls(item):
+    try:
+        item['thumbnail'] = prefix_url(item['thumbnail'])
+    except KeyError:
+        pass
+
+    try:
+        item['author_url'] = prefix_url(item['author_url'])
+    except KeyError:
+        pass
+
+def add_extra_html_info(item):
+    if item['type'] == 'video':
+        item['url'] = (URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
+
+        video_info = {}
+        for key in ('id', 'title', 'author', 'duration'):
+            try:
+                video_info[key] = item[key]
+            except KeyError:
+                video_info[key] = ''
+
+        item['video_info'] = json.dumps(video_info)
+
+    elif item['type'] == 'playlist':
+        item['url'] = (URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
+    elif item['type'] == 'channel':
+        item['url'] = (URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
+
+def parse_info_prepare_for_html(renderer, additional_info={}):
+    item = yt_data_extract.extract_item_info(renderer, additional_info)
+    prefix_urls(item)
+    add_extra_html_info(item)
+
+    return item
diff --git a/youtube/watch.py b/youtube/watch.py
index 69ab87b..45d658f 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -370,8 +370,8 @@ def get_watch_page():
     }
 
     for item in info['related_videos']:
-        yt_data_extract.prefix_urls(item)
-        yt_data_extract.add_extra_html_info(item)
+        util.prefix_urls(item)
+        util.add_extra_html_info(item)
 
     if settings.gather_googlevideo_domains:
         with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f:
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py
index f2a93a9..f2f07c0 100644
--- a/youtube/yt_data_extract/__init__.py
+++ b/youtube/yt_data_extract/__init__.py
@@ -1,8 +1,7 @@
 from .common import (get, multi_get, deep_get, multi_deep_get,
     liberal_update, conservative_update, remove_redirect, normalize_url,
     extract_str, extract_formatted_text, extract_int, extract_approx_int,
-    extract_date, extract_item_info, extract_items, extract_response,
-    prefix_urls, add_extra_html_info, parse_info_prepare_for_html)
+    extract_date, extract_item_info, extract_items, extract_response)
 
 from .everything_else import (extract_channel_info, extract_search_info,
     extract_playlist_metadata, extract_playlist_info, extract_comments_info)
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 5fa67bc..459b5e9 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -1,6 +1,3 @@
-from youtube import util
-
-import json
 import re
 import urllib.parse
 import collections
@@ -179,35 +176,6 @@ def check_missing_keys(object, *key_sequences):
 
     return None
 
-def prefix_urls(item):
-    try:
-        item['thumbnail'] = util.prefix_url(item['thumbnail'])
-    except KeyError:
-        pass
-
-    try:
-        item['author_url'] = util.prefix_url(item['author_url'])
-    except KeyError:
-        pass
-
-def add_extra_html_info(item):
-    if item['type'] == 'video':
-        item['url'] = (util.URL_ORIGIN + '/watch?v=' + item['id']) if item.get('id') else None
-
-        video_info = {}
-        for key in ('id', 'title', 'author', 'duration'):
-            try:
-                video_info[key] = item[key]
-            except KeyError:
-                video_info[key] = ''
-
-        item['video_info'] = json.dumps(video_info)
-
-    elif item['type'] == 'playlist':
-        item['url'] = (util.URL_ORIGIN + '/playlist?list=' + item['id']) if item.get('id') else None
-    elif item['type'] == 'channel':
-        item['url'] = (util.URL_ORIGIN + "/channel/" + item['id']) if item.get('id') else None
-
 def extract_item_info(item, additional_info={}):
     if not item:
         return {'error': 'No item given'}
@@ -307,13 +275,6 @@ def extract_item_info(item, additional_info={}):
         )))
     return info
 
-def parse_info_prepare_for_html(renderer, additional_info={}):
-    item = extract_item_info(renderer, additional_info)
-    prefix_urls(item)
-    add_extra_html_info(item)
-
-    return item
-
 def extract_response(polymer_json):
     '''return response, error'''
     response = multi_deep_get(polymer_json, [1, 'response'], ['response'], default=None, types=dict)
author	James Taylor <user234683@users.noreply.github.com>	2019-12-19 19:48:53 -0800
committer	James Taylor <user234683@users.noreply.github.com>	2019-12-19 19:48:53 -0800
commit	d1d908d5b1aadb0dc75b25df1a47789c021f89e2 (patch)
tree	56a1a0e8361a732241774fee35ae521858d16d5f /youtube
parent	76376b29a0adf6bd6d7a0202d904f923bdc8aa57 (diff)
download	yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.tar.lz yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.tar.xz yt-local-d1d908d5b1aadb0dc75b25df1a47789c021f89e2.zip