aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-06-21 21:41:41 -0700
committerJames Taylor <user234683@users.noreply.github.com>2019-06-21 21:41:41 -0700
commit1156b0998758ee803c7e8ae0cc2beb5181c232a3 (patch)
tree781c8d34c0de049327fd2d3f9085dc9877748acc /youtube/yt_data_extract.py
parente33bae2e50799e3617432f294cf36581404a5114 (diff)
downloadyt-local-1156b0998758ee803c7e8ae0cc2beb5181c232a3.tar.lz
yt-local-1156b0998758ee803c7e8ae0cc2beb5181c232a3.tar.xz
yt-local-1156b0998758ee803c7e8ae0cc2beb5181c232a3.zip
Refactor search page
Diffstat (limited to 'youtube/yt_data_extract.py')
-rw-r--r--youtube/yt_data_extract.py113
1 files changed, 96 insertions, 17 deletions
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py
index 5483911..a487c57 100644
--- a/youtube/yt_data_extract.py
+++ b/youtube/yt_data_extract.py
@@ -1,4 +1,7 @@
+from youtube import util
+
import html
+import json
# videos (all of type str):
@@ -138,9 +141,83 @@ dispatch = {
}
-def renderer_info(renderer):
+def ajax_info(item_json):
+ try:
+ info = {}
+ for key, node in item_json.items():
+ try:
+ simple_key, function = dispatch[key]
+ except KeyError:
+ continue
+ info[simple_key] = function(node)
+ return info
+ except KeyError:
+ print(item_json)
+ raise
+
+
+
+def prefix_urls(item):
+ try:
+ item['thumbnail'] = '/' + item['thumbnail'].lstrip('/')
+ except KeyError:
+ pass
+
+ try:
+ item['author_url'] = util.URL_ORIGIN + item['author_url']
+ except KeyError:
+ pass
+
+def add_extra_html_info(item):
+ if item['type'] == 'video':
+ item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id']
+
+ video_info = {}
+ for key in ('id', 'title', 'author', 'duration'):
+ try:
+ video_info[key] = item[key]
+ except KeyError:
+ video_info[key] = ''
+
+ item['video_info'] = json.dumps(video_info)
+
+ elif item['type'] == 'playlist':
+ item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id']
+ elif item['type'] == 'channel':
+ item['url'] = util.URL_ORIGIN + "/channel/" + item['id']
+
+
+def renderer_info(renderer, additional_info={}):
+ type = list(renderer.keys())[0]
+ renderer = renderer[type]
+ info = {}
+ if type == 'itemSectionRenderer':
+ return renderer_info(renderer['contents'][0], additional_info)
+
+ if type in ('movieRenderer', 'clarificationRenderer'):
+ info['type'] = 'unsupported'
+ return info
+
+ info.update(additional_info)
+
+ if type.startswith('compact'):
+ info['item_size'] = 'small'
+ else:
+ info['item_size'] = 'medium'
+
+ if type in ('compactVideoRenderer', 'videoRenderer', 'gridVideoRenderer'):
+ info['type'] = 'video'
+ elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer',
+ 'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer',
+ 'showRenderer', 'compactShowRenderer', 'gridShowRenderer'):
+ info['type'] = 'playlist'
+ elif type == 'channelRenderer':
+ info['type'] = 'channel'
+ else:
+ info['type'] = 'unsupported'
+ return info
+
try:
- info = {}
if 'viewCountText' in renderer: # prefer this one as it contains all the digits
info['views'] = get_text(renderer['viewCountText'])
elif 'shortViewCountText' in renderer:
@@ -183,23 +260,25 @@ def renderer_info(renderer):
except KeyError:
continue
info[simple_key] = function(node)
+ if info['type'] == 'video' and 'duration' not in info:
+ info['duration'] = 'Live'
+
return info
except KeyError:
print(renderer)
raise
-
-def ajax_info(item_json):
- try:
- info = {}
- for key, node in item_json.items():
- try:
- simple_key, function = dispatch[key]
- except KeyError:
- continue
- info[simple_key] = function(node)
- return info
- except KeyError:
- print(item_json)
- raise
-
+
+
+
+ #print(renderer)
+ #raise NotImplementedError('Unknown renderer type: ' + type)
+ return ''
+
+def parse_info_prepare_for_html(renderer):
+ item = renderer_info(renderer)
+ prefix_urls(item)
+ add_extra_html_info(item)
+
+ return item
+