Merge flask framework and other stuff from master

author: James Taylor <user234683@users.noreply.github.com> 2019-08-09 22:01:04 -0700
committer: James Taylor <user234683@users.noreply.github.com> 2019-08-09 22:01:04 -0700
commit: 2e75c6d9603f8a5edf6495f8d4fb3115a67d823c (patch)
tree: 8fb2d1bec2cf0e50c5fce6bc718f755485419db0 /youtube/yt_data_extract.py
parent: cc9283ad5332f59a69a91d9d0fab299779de513c (diff)
parent: adc40bc760345a23678a01f27d7697dfd3811914 (diff)
download: yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.lz
yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.xz
yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.zip
1 files changed, 101 insertions, 28 deletions
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py
index 5483911..c236c2f 100644
--- a/youtube/yt_data_extract.py
+++ b/youtube/yt_data_extract.py
@@ -1,4 +1,7 @@
+from youtube import util
+
 import html
+import json
 
 # videos (all of type str):
 
@@ -33,19 +36,11 @@ import html
 
 
 
-
-
 def get_plain_text(node):
     try:
-        return html.escape(node['simpleText'])
+        return node['simpleText']
     except KeyError:
-        return unformmated_text_runs(node['runs'])
-        
-def unformmated_text_runs(runs):
-    result = ''
-    for text_run in runs:
-        result += html.escape(text_run["text"])
-    return result
+        return ''.join(text_run['text'] for text_run in node['runs'])
 
 def format_text_runs(runs):
     if isinstance(runs, str):
@@ -75,14 +70,19 @@ def get_url(node):
 
 
 def get_text(node):
+    if node == {}:
+        return ''
     try:
         return node['simpleText']
     except KeyError:
-            pass
+        pass
     try:
         return node['runs'][0]['text']
     except IndexError: # empty text runs
         return ''
+    except KeyError:
+        print(node)
+        raise
 
 def get_formatted_text(node):
     try:
@@ -138,9 +138,85 @@ dispatch = {
 
 }
 
-def renderer_info(renderer):
+def ajax_info(item_json):
+    try:
+        info = {}          
+        for key, node in item_json.items():
+            try:
+                simple_key, function = dispatch[key]
+            except KeyError:
+                continue
+            info[simple_key] = function(node)
+        return info
+    except KeyError:
+        print(item_json)
+        raise
+
+
+
+def prefix_urls(item):
+    try:
+        item['thumbnail'] = '/' + item['thumbnail'].lstrip('/')
+    except KeyError:
+        pass
+
+    try:
+        item['author_url'] = util.URL_ORIGIN + item['author_url']
+    except KeyError:
+        pass
+
+def add_extra_html_info(item):
+    if item['type'] == 'video':
+        item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id']
+
+        video_info = {}
+        for key in ('id', 'title', 'author', 'duration'):
+            try:
+                video_info[key] = item[key]
+            except KeyError:
+                video_info[key] = ''
+
+        item['video_info'] = json.dumps(video_info)
+
+    elif item['type'] == 'playlist':
+        item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id']
+    elif item['type'] == 'channel':
+        item['url'] = util.URL_ORIGIN + "/channel/" + item['id']
+
+
+def renderer_info(renderer, additional_info={}):
+    type = list(renderer.keys())[0]
+    renderer = renderer[type]
+    info = {}
+    if type == 'itemSectionRenderer':
+        return renderer_info(renderer['contents'][0], additional_info)
+    
+    if type in ('movieRenderer', 'clarificationRenderer'):
+        info['type'] = 'unsupported'
+        return info
+
+    info.update(additional_info)
+
+    if type.startswith('compact') or (type.startswith('playlist') and type != 'playlistRenderer'):
+        info['item_size'] = 'small'
+    else:
+        info['item_size'] = 'medium'
+
+    if type in ('compactVideoRenderer', 'videoRenderer', 'playlistVideoRenderer', 'gridVideoRenderer'):
+        info['type'] = 'video'
+    elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer',
+                  'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer',
+                  'showRenderer', 'compactShowRenderer', 'gridShowRenderer'):
+        info['type'] = 'playlist'
+    elif type == 'channelRenderer':
+        info['type'] = 'channel'
+    elif type == 'playlistHeaderRenderer':
+        info['type'] = 'playlist_metadata'
+    else:
+        info['type'] = 'unsupported'
+        return info
+
     try:
-        info = {}
         if 'viewCountText' in renderer:     # prefer this one as it contains all the digits
             info['views'] = get_text(renderer['viewCountText'])
         elif 'shortViewCountText' in renderer:
@@ -183,23 +259,20 @@ def renderer_info(renderer):
                 except KeyError:
                     continue
                 info[simple_key] = function(node)
+        if info['type'] == 'video' and 'duration' not in info:
+            info['duration'] = 'Live'
+
         return info
     except KeyError:
         print(renderer)
         raise
-    
-def ajax_info(item_json):
-    try:
-        info = {}          
-        for key, node in item_json.items():
-            try:
-                simple_key, function = dispatch[key]
-            except KeyError:
-                continue
-            info[simple_key] = function(node)
-        return info
-    except KeyError:
-        print(item_json)
-        raise
-    
+
+
+def parse_info_prepare_for_html(renderer, additional_info={}):
+    item = renderer_info(renderer, additional_info)
+    prefix_urls(item)
+    add_extra_html_info(item)
+
+    return item
+
author	James Taylor <user234683@users.noreply.github.com>	2019-08-09 22:01:04 -0700
committer	James Taylor <user234683@users.noreply.github.com>	2019-08-09 22:01:04 -0700
commit	2e75c6d9603f8a5edf6495f8d4fb3115a67d823c (patch)
tree	8fb2d1bec2cf0e50c5fce6bc718f755485419db0 /youtube/yt_data_extract.py
parent	cc9283ad5332f59a69a91d9d0fab299779de513c (diff)
parent	adc40bc760345a23678a01f27d7697dfd3811914 (diff)
download	yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.lz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.xz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.zip