From 8dbd0685240518f255603d3d86ac07e20460b862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs?= Date: Wed, 27 Nov 2019 16:23:59 -0500 Subject: Use BeautifulSoup to extract information from YT searches Google is blocking IP's the instances of invidio, because of these reasons livie will use BeautifulSoup to extract information from YT searches --- README.md | 3 ++- livie.el | 3 +-- livie.py | 54 +++++++++++++++++++++++++++++------------------------- 3 files changed, 32 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 53b212b..2ab5f07 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,10 @@ Livie allows the user to search youtube.com and play the video from `mpv`. - `python >= 3.5` - `python-requests` +- `python-beautifulsoup4` - `mpv` - `sudo pacman -S python mpv python-requests` + `sudo pacman -S python mpv python-requests python-beautifulsoup4` ## Installation diff --git a/livie.el b/livie.el index 11e990c..226d3f7 100644 --- a/livie.el +++ b/livie.el @@ -47,8 +47,7 @@ :group 'livie :type 'string) -(defvar livie-youtube-regexp - "https://invidio.us/latest_version\\?id=[A-Za-z0-9_\\-]\\{11\\}&itag=\\<\\([0-9]*\\.[0-9]+\\|[0-9]+\\)[df]?\\>&local=true") +(defvar livie-youtube-regexp "https://www.youtube.com/watch\\?v=[A-Za-z0-9_\\-]\\{11\\}") (define-derived-mode livie-mode special-mode "livie" diff --git a/livie.py b/livie.py index b03d1c4..5b5028a 100644 --- a/livie.py +++ b/livie.py @@ -1,34 +1,38 @@ """This module does render video""" import sys -import datetime -import json import requests +from bs4 import BeautifulSoup -URL = 'https://invidio.us' +URL = 'https://www.youtube.com' +FILTER = '&sp=EgIQAQ%253D%253D' INPUT = sys.argv[1] -SEARCH = '%s/api/v1/search?q=%s' % (URL, INPUT) +SEARCH = '%s/results?search_query=%s%s' % (URL, INPUT, FILTER) REQUEST = requests.get(SEARCH) -SD = '&itag=18&local=true' -HD = '&itag=22&local=true' +SOUP = BeautifulSoup(REQUEST.content, 'lxml', from_encoding=REQUEST.encoding) +FIRST = True -FIRST = True # skip line loop -VIDEOS = json.loads(REQUEST.content.decode('utf-8')) +def replace(string): + """Remove unnecessary characters""" + string = string.replace(' - Duration: ', '') + string = string.replace('.', '') + string = string.replace(' views', '') + return string -for video in VIDEOS: +for vid in SOUP.find_all(class_='yt-lockup-content'): try: - title = video.get('title', '') - videoid = video.get('videoId', '') - author = video.get('author', '') + link = URL + vid.h3.a['href'] + title = vid.h3.a.text + description = vid.h3.span.text + author = vid.find(class_='yt-lockup-byline').a.text + meta = vid.find(class_='yt-lockup-meta').ul.contents + time_srt = vid.find(class_='yt-lockup-title').span.text + time = replace(time_srt) + uploaded = meta[0].text + views_str = vid.find(class_='yt-lockup-meta').ul.li.find_next() + views = replace(views_str.text) - # Make URL - sd = '%s/latest_version?id=%s%s' % (URL, videoid, SD) - hd = '%s/latest_version?id=%s%s' % (URL, videoid, HD) - - timer = video.get('lengthSeconds', '') - time = str(datetime.timedelta(seconds=timer)) - publish = video.get('publishedText', '') except TypeError: continue @@ -38,9 +42,9 @@ for video in VIDEOS: print() # print skip line # prints - print(' title: %s' % (title)) - print(' SD: %s' % (sd)) - print(' HD: %s' % (hd)) - print(' HD ^ Only some videos available caused by DRM') - print(' channel: %s' % (author)) - print(' time: %s' % (time)) + print(' title: %s' % title) + print(' url: %s' % link) + print(' channel: %s' % author) + print(' uploaded: %s' % uploaded) + print(' time: %s' % time) + print(' views: %s' % views) -- cgit v1.2.3