Use BeautifulSoup to extract information from YT searches

Google is blocking IP's the instances of invidio, because of these reasons livie will use BeautifulSoup to extract information from YT searches
author: Jesús <heckyel@hyperbola.info> 2019-11-27 16:23:59 -0500
committer: Jesús <heckyel@hyperbola.info> 2019-11-27 16:23:59 -0500
commit: 8dbd0685240518f255603d3d86ac07e20460b862 (patch)
tree: 8c9405c148e93dc5ff3f823c0966da34d2783b86
parent: abbf953e640a64a05c7b37e596b327d3c3af1530 (diff)
download: livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.lz
livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.xz
livie-8dbd0685240518f255603d3d86ac07e20460b862.zip
3 files changed, 32 insertions, 28 deletions
diff --git a/README.md b/README.md
index 53b212b..2ab5f07 100644
--- a/README.md
+++ b/README.md
@@ -14,9 +14,10 @@ Livie allows the user to search youtube.com and play the video from `mpv`.
 
 - `python >= 3.5`
 - `python-requests`
+- `python-beautifulsoup4`
 - `mpv`
 
-  `sudo pacman -S python mpv python-requests`
+  `sudo pacman -S python mpv python-requests python-beautifulsoup4`
 
 ## Installation
 
diff --git a/livie.el b/livie.el
index 11e990c..226d3f7 100644
--- a/livie.el
+++ b/livie.el
@@ -47,8 +47,7 @@
   :group 'livie
   :type 'string)
 
-(defvar livie-youtube-regexp
-  "https://invidio.us/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true")
+(defvar livie-youtube-regexp "https://www.youtube.com/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}")
 
 (define-derived-mode livie-mode
   special-mode "livie"
diff --git a/livie.py b/livie.py
index b03d1c4..5b5028a 100644
--- a/livie.py
+++ b/livie.py
@@ -1,34 +1,38 @@
 """This module does render video"""
 
 import sys
-import datetime
-import json
 import requests
+from bs4 import BeautifulSoup
 
-URL = 'https://invidio.us'
+URL = 'https://www.youtube.com'
+FILTER = '&sp=EgIQAQ%253D%253D'
 INPUT = sys.argv[1]
-SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT)
+SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
 REQUEST = requests.get(SEARCH)
-SD = '&itag=18&local=true'
-HD = '&itag=22&local=true'
+SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
+FIRST = True
 
-FIRST = True  # skip line loop
 
-VIDEOS = json.loads(REQUEST.content.decode('utf-8'))
+def replace(string):
+    """Remove unnecessary characters"""
+    string = string.replace(' - Duration: ', '')
+    string = string.replace('.', '')
+    string = string.replace(' views', '')
+    return string
 
-for video in VIDEOS:
+for vid in SOUP.find_all(class_='yt-lockup-content'):
     try:
-        title = video.get('title', '')
-        videoid = video.get('videoId', '')
-        author = video.get('author', '')
+        link = URL + vid.h3.a['href']
+        title = vid.h3.a.text
+        description = vid.h3.span.text
+        author = vid.find(class_='yt-lockup-byline').a.text
+        meta = vid.find(class_='yt-lockup-meta').ul.contents
+        time_srt = vid.find(class_='yt-lockup-title').span.text
+        time = replace(time_srt)
+        uploaded = meta[0].text
+        views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
+        views = replace(views_str.text)
 
-        # Make URL
-        sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
-        hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
-
-        timer = video.get('lengthSeconds', '')
-        time = str(datetime.timedelta(seconds=timer))
-        publish = video.get('publishedText', '')
     except TypeError:
         continue
 
@@ -38,9 +42,9 @@ for video in VIDEOS:
         print()  # print skip line
 
     # prints
-    print('    title: %s' % (title))
-    print('       SD: %s' % (sd))
-    print('       HD: %s' % (hd))
-    print('           HD ^ Only some videos available caused by DRM')
-    print('  channel: %s' % (author))
-    print('     time: %s' % (time))
+    print('    title: %s' % title)
+    print('      url: %s' % link)
+    print('  channel: %s' % author)
+    print(' uploaded: %s' % uploaded)
+    print('     time: %s' % time)
+    print('    views: %s' % views)
author	Jesús <heckyel@hyperbola.info>	2019-11-27 16:23:59 -0500
committer	Jesús <heckyel@hyperbola.info>	2019-11-27 16:23:59 -0500
commit	8dbd0685240518f255603d3d86ac07e20460b862 (patch)
tree	8c9405c148e93dc5ff3f823c0966da34d2783b86
parent	abbf953e640a64a05c7b37e596b327d3c3af1530 (diff)
download	livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.lz livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.xz livie-8dbd0685240518f255603d3d86ac07e20460b862.zip