diff options
Diffstat (limited to 'hypervideo_dl/extractor/openload.py')
-rw-r--r-- | hypervideo_dl/extractor/openload.py | 110 |
1 files changed, 58 insertions, 52 deletions
diff --git a/hypervideo_dl/extractor/openload.py b/hypervideo_dl/extractor/openload.py index fe4740a..56b8330 100644 --- a/hypervideo_dl/extractor/openload.py +++ b/hypervideo_dl/extractor/openload.py @@ -1,22 +1,19 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import collections +import contextlib import json import os import subprocess import tempfile -from ..compat import ( - compat_urlparse, - compat_kwargs, -) +from ..compat import compat_urlparse from ..utils import ( - check_executable, - encodeArgument, ExtractorError, + Popen, + check_executable, + format_field, get_exe_version, is_outdated_version, - Popen, + shell_quote, ) @@ -37,13 +34,11 @@ def cookie_to_dict(cookie): cookie_dict['secure'] = cookie.secure if cookie.discard is not None: cookie_dict['discard'] = cookie.discard - try: + with contextlib.suppress(TypeError): if (cookie.has_nonstandard_attr('httpOnly') or cookie.has_nonstandard_attr('httponly') or cookie.has_nonstandard_attr('HttpOnly')): cookie_dict['httponly'] = True - except TypeError: - pass return cookie_dict @@ -51,13 +46,15 @@ def cookie_jar_to_list(cookie_jar): return [cookie_to_dict(cookie) for cookie in cookie_jar] -class PhantomJSwrapper(object): +class PhantomJSwrapper: """PhantomJS wrapper class This class is experimental. """ - _TEMPLATE = r''' + INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html' + + _BASE_JS = R''' phantom.onError = function(msg, trace) {{ var msgStack = ['PHANTOM ERROR: ' + msg]; if(trace && trace.length) {{ @@ -70,6 +67,9 @@ class PhantomJSwrapper(object): console.error(msgStack.join('\n')); phantom.exit(1); }}; + ''' + + _TEMPLATE = R''' var page = require('webpage').create(); var fs = require('fs'); var read = {{ mode: 'r', charset: 'utf-8' }}; @@ -112,9 +112,7 @@ class PhantomJSwrapper(object): self.exe = check_executable('phantomjs', ['-v']) if not self.exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) + raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True) self.extractor = extractor @@ -125,23 +123,25 @@ class PhantomJSwrapper(object): 'Your copy of PhantomJS is outdated, update it to version ' '%s or newer if you encounter any errors.' % required_version) - self.options = { - 'timeout': timeout, - } for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() self._TMP_FILES[name] = tmp + self.options = collections.ChainMap({ + 'timeout': timeout, + }, { + x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + for x in self._TMP_FILE_NAMES + }) + def __del__(self): for name in self._TMP_FILE_NAMES: - try: + with contextlib.suppress(OSError, KeyError): os.remove(self._TMP_FILES[name].name) - except (IOError, OSError, KeyError): - pass def _save_cookies(self, url): - cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) + cookies = cookie_jar_to_list(self.extractor.cookiejar) for cookie in cookies: if 'path' not in cookie: cookie['path'] = '/' @@ -158,7 +158,7 @@ class PhantomJSwrapper(object): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**compat_kwargs(cookie)) + self.extractor._set_cookie(**cookie) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ @@ -180,7 +180,7 @@ class PhantomJSwrapper(object): In most cases you don't need to add any `jscode`. It is executed in `page.onLoadFinished`. `saveAndExit();` is mandatory, use it instead of `phantom.exit()` - It is possible to wait for some element on the webpage, for example: + It is possible to wait for some element on the webpage, e.g. var check = function() { var elementFound = page.evaluate(function() { return document.querySelector('#b.done') !== null; @@ -205,33 +205,39 @@ class PhantomJSwrapper(object): self._save_cookies(url) - replaces = self.options - replaces['url'] = url user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent'] - replaces['ua'] = user_agent.replace('"', '\\"') - replaces['jscode'] = jscode - - for x in self._TMP_FILE_NAMES: - replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') - - with open(self._TMP_FILES['script'].name, 'wb') as f: - f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) - - if video_id is None: - self.extractor.to_screen('%s' % (note2,)) - else: - self.extractor.to_screen('%s: %s' % (video_id, note2)) - - p = Popen( - [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate_or_kill() - if p.returncode != 0: - raise ExtractorError( - 'Executing JS failed\n:' + encodeArgument(err)) + jscode = self._TEMPLATE.format_map(self.options.new_child({ + 'url': url, + 'ua': user_agent.replace('"', '\\"'), + 'jscode': jscode, + })) + + stdout = self.execute(jscode, video_id, note=note2) + with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') - self._load_cookies() - return (html, encodeArgument(out)) + return html, stdout + + def execute(self, jscode, video_id=None, *, note='Executing JS'): + """Execute JS and return stdout""" + if 'phantom.exit();' not in jscode: + jscode += ';\nphantom.exit();' + jscode = self._BASE_JS + jscode + + with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f: + f.write(jscode) + self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') + + cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name] + self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}') + try: + stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000, + text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception as e: + raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e) + if returncode: + raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}') + + return stdout |