diff options
author | Jesús <heckyel@hyperbola.info> | 2019-11-27 16:23:59 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2019-11-27 16:23:59 -0500 |
commit | 8dbd0685240518f255603d3d86ac07e20460b862 (patch) | |
tree | 8c9405c148e93dc5ff3f823c0966da34d2783b86 /livie.py | |
parent | abbf953e640a64a05c7b37e596b327d3c3af1530 (diff) | |
download | livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.lz livie-8dbd0685240518f255603d3d86ac07e20460b862.tar.xz livie-8dbd0685240518f255603d3d86ac07e20460b862.zip |
Use BeautifulSoup to extract information from YT searches
Google is blocking IP's the instances of invidio, because of these reasons
livie will use BeautifulSoup to extract information from YT searches
Diffstat (limited to 'livie.py')
-rw-r--r-- | livie.py | 54 |
1 files changed, 29 insertions, 25 deletions
@@ -1,34 +1,38 @@ """This module does render video""" import sys -import datetime -import json import requests +from bs4 import BeautifulSoup -URL = 'https://invidio.us' +URL = 'https://www.youtube.com' +FILTER = '&sp=EgIQAQ%253D%253D' INPUT = sys.argv[1] -SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT) +SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER) REQUEST = requests.get(SEARCH) -SD = '&itag=18&local=true' -HD = '&itag=22&local=true' +SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding) +FIRST = True -FIRST = True # skip line loop -VIDEOS = json.loads(REQUEST.content.decode('utf-8')) +def replace(string): + """Remove unnecessary characters""" + string = string.replace(' - Duration: ', '') + string = string.replace('.', '') + string = string.replace(' views', '') + return string -for video in VIDEOS: +for vid in SOUP.find_all(class_='yt-lockup-content'): try: - title = video.get('title', '') - videoid = video.get('videoId', '') - author = video.get('author', '') + link = URL + vid.h3.a['href'] + title = vid.h3.a.text + description = vid.h3.span.text + author = vid.find(class_='yt-lockup-byline').a.text + meta = vid.find(class_='yt-lockup-meta').ul.contents + time_srt = vid.find(class_='yt-lockup-title').span.text + time = replace(time_srt) + uploaded = meta[0].text + views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next() + views = replace(views_str.text) - # Make URL - sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD) - hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD) - - timer = video.get('lengthSeconds', '') - time = str(datetime.timedelta(seconds=timer)) - publish = video.get('publishedText', '') except TypeError: continue @@ -38,9 +42,9 @@ for video in VIDEOS: print() # print skip line # prints - print(' title: %s' % (title)) - print(' SD: %s' % (sd)) - print(' HD: %s' % (hd)) - print(' HD ^ Only some videos available caused by DRM') - print(' channel: %s' % (author)) - print(' time: %s' % (time)) + print(' title: %s' % title) + print(' url: %s' % link) + print(' channel: %s' % author) + print(' uploaded: %s' % uploaded) + print(' time: %s' % time) + print(' views: %s' % views) |