aboutsummaryrefslogtreecommitdiffstats
path: root/livie.py
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2019-11-27 16:23:59 -0500
committerJesús <heckyel@hyperbola.info>2019-11-27 16:23:59 -0500
commit8dbd0685240518f255603d3d86ac07e20460b862 (patch)
tree8c9405c148e93dc5ff3f823c0966da34d2783b86 /livie.py
parentabbf953e640a64a05c7b37e596b327d3c3af1530 (diff)
downloadlivie-8dbd0685240518f255603d3d86ac07e20460b862.tar.lz
livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.xz
livie-8dbd0685240518f255603d3d86ac07e20460b862.zip
Use BeautifulSoup to extract information from YT searches
Google is blocking IP's the instances of invidio, because of these reasons livie will use BeautifulSoup to extract information from YT searches
Diffstat (limited to 'livie.py')
-rw-r--r--livie.py54
1 files changed, 29 insertions, 25 deletions
diff --git a/livie.py b/livie.py
index b03d1c4..5b5028a 100644
--- a/livie.py
+++ b/livie.py
@@ -1,34 +1,38 @@
"""This module does render video"""
import sys
-import datetime
-import json
import requests
+from bs4 import BeautifulSoup
-URL = 'https://invidio.us'
+URL = 'https://www.youtube.com'
+FILTER = '&sp=EgIQAQ%253D%253D'
INPUT = sys.argv[1]
-SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT)
+SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER)
REQUEST = requests.get(SEARCH)
-SD = '&itag=18&local=true'
-HD = '&itag=22&local=true'
+SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding)
+FIRST = True
-FIRST = True # skip line loop
-VIDEOS = json.loads(REQUEST.content.decode('utf-8'))
+def replace(string):
+ """Remove unnecessary characters"""
+ string = string.replace(' - Duration: ', '')
+ string = string.replace('.', '')
+ string = string.replace(' views', '')
+ return string
-for video in VIDEOS:
+for vid in SOUP.find_all(class_='yt-lockup-content'):
try:
- title = video.get('title', '')
- videoid = video.get('videoId', '')
- author = video.get('author', '')
+ link = URL + vid.h3.a['href']
+ title = vid.h3.a.text
+ description = vid.h3.span.text
+ author = vid.find(class_='yt-lockup-byline').a.text
+ meta = vid.find(class_='yt-lockup-meta').ul.contents
+ time_srt = vid.find(class_='yt-lockup-title').span.text
+ time = replace(time_srt)
+ uploaded = meta[0].text
+ views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next()
+ views = replace(views_str.text)
- # Make URL
- sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD)
- hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD)
-
- timer = video.get('lengthSeconds', '')
- time = str(datetime.timedelta(seconds=timer))
- publish = video.get('publishedText', '')
except TypeError:
continue
@@ -38,9 +42,9 @@ for video in VIDEOS:
print() # print skip line
# prints
- print(' title: %s' % (title))
- print(' SD: %s' % (sd))
- print(' HD: %s' % (hd))
- print(' HD ^ Only some videos available caused by DRM')
- print(' channel: %s' % (author))
- print(' time: %s' % (time))
+ print(' title: %s' % title)
+ print(' url: %s' % link)
+ print(' channel: %s' % author)
+ print(' uploaded: %s' % uploaded)
+ print(' time: %s' % time)
+ print(' views: %s' % views)