Use BeautifulSoup to extract information from YT searches

Google is blocking IP's the instances of invidio, because of these reasons livie will use BeautifulSoup to extract information from YT searches
author: Jesús <heckyel@hyperbola.info> 2019-11-27 16:23:59 -0500
committer: Jesús <heckyel@hyperbola.info> 2019-11-27 16:23:59 -0500
commit: 8dbd0685240518f255603d3d86ac07e20460b862 (patch)
tree: 8c9405c148e93dc5ff3f823c0966da34d2783b86 /livie.py
parent: abbf953e640a64a05c7b37e596b327d3c3af1530 (diff)
download: livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.lz
livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.xz
livie-8dbd0685240518f255603d3d86ac07e20460b862.zip
1 files changed, 29 insertions, 25 deletions
diff --git a/livie.py b/livie.py
index b03d1c4..5b5028a 100644
--- a/livie.py
+++ b/livie.py
@@ -1,34 +1,38 @@
 """This module does render video"""
 
 import sys
-import datetime
-import json
 import requests
+from bs4 import BeautifulSoup
 
-URL = 'https://invidio.us'
+URL = 'https://www.youtube.com'
+FILTER = '&sp=EgIQAQ%253D%253D'
 INPUT = sys.argv[1]
-SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT)
+SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
 REQUEST = requests.get(SEARCH)
-SD = '&itag=18&local=true'
-HD = '&itag=22&local=true'
+SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
+FIRST = True
 
-FIRST = True  # skip line loop
 
-VIDEOS = json.loads(REQUEST.content.decode('utf-8'))
+def replace(string):
+    """Remove unnecessary characters"""
+    string = string.replace(' - Duration: ', '')
+    string = string.replace('.', '')
+    string = string.replace(' views', '')
+    return string
 
-for video in VIDEOS:
+for vid in SOUP.find_all(class_='yt-lockup-content'):
     try:
-        title = video.get('title', '')
-        videoid = video.get('videoId', '')
-        author = video.get('author', '')
+        link = URL + vid.h3.a['href']
+        title = vid.h3.a.text
+        description = vid.h3.span.text
+        author = vid.find(class_='yt-lockup-byline').a.text
+        meta = vid.find(class_='yt-lockup-meta').ul.contents
+        time_srt = vid.find(class_='yt-lockup-title').span.text
+        time = replace(time_srt)
+        uploaded = meta[0].text
+        views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
+        views = replace(views_str.text)
 
-        # Make URL
-        sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
-        hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
-
-        timer = video.get('lengthSeconds', '')
-        time = str(datetime.timedelta(seconds=timer))
-        publish = video.get('publishedText', '')
     except TypeError:
         continue
 
@@ -38,9 +42,9 @@ for video in VIDEOS:
         print()  # print skip line
 
     # prints
-    print('    title: %s' % (title))
-    print('       SD: %s' % (sd))
-    print('       HD: %s' % (hd))
-    print('           HD ^ Only some videos available caused by DRM')
-    print('  channel: %s' % (author))
-    print('     time: %s' % (time))
+    print('    title: %s' % title)
+    print('      url: %s' % link)
+    print('  channel: %s' % author)
+    print(' uploaded: %s' % uploaded)
+    print('     time: %s' % time)
+    print('    views: %s' % views)
author	Jesús <heckyel@hyperbola.info>	2019-11-27 16:23:59 -0500
committer	Jesús <heckyel@hyperbola.info>	2019-11-27 16:23:59 -0500
commit	8dbd0685240518f255603d3d86ac07e20460b862 (patch)
tree	8c9405c148e93dc5ff3f823c0966da34d2783b86 /livie.py
parent	abbf953e640a64a05c7b37e596b327d3c3af1530 (diff)
download	livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.lz livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.xz livie-8dbd0685240518f255603d3d86ac07e20460b862.zip