diff options
Diffstat (limited to 'yt_dlp/extractor/openload.py')
-rw-r--r-- | yt_dlp/extractor/openload.py | 74 |
1 files changed, 46 insertions, 28 deletions
diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 79dad09e3..56b8330ff 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,3 +1,4 @@ +import collections import contextlib import json import os @@ -9,8 +10,10 @@ from ..utils import ( ExtractorError, Popen, check_executable, + format_field, get_exe_version, is_outdated_version, + shell_quote, ) @@ -49,7 +52,9 @@ class PhantomJSwrapper: This class is experimental. """ - _TEMPLATE = r''' + INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html' + + _BASE_JS = R''' phantom.onError = function(msg, trace) {{ var msgStack = ['PHANTOM ERROR: ' + msg]; if(trace && trace.length) {{ @@ -62,6 +67,9 @@ class PhantomJSwrapper: console.error(msgStack.join('\n')); phantom.exit(1); }}; + ''' + + _TEMPLATE = R''' var page = require('webpage').create(); var fs = require('fs'); var read = {{ mode: 'r', charset: 'utf-8' }}; @@ -104,9 +112,7 @@ class PhantomJSwrapper: self.exe = check_executable('phantomjs', ['-v']) if not self.exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) + raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True) self.extractor = extractor @@ -117,14 +123,18 @@ class PhantomJSwrapper: 'Your copy of PhantomJS is outdated, update it to version ' '%s or newer if you encounter any errors.' % required_version) - self.options = { - 'timeout': timeout, - } for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() self._TMP_FILES[name] = tmp + self.options = collections.ChainMap({ + 'timeout': timeout, + }, { + x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + for x in self._TMP_FILE_NAMES + }) + def __del__(self): for name in self._TMP_FILE_NAMES: with contextlib.suppress(OSError, KeyError): @@ -170,7 +180,7 @@ class PhantomJSwrapper: In most cases you don't need to add any `jscode`. It is executed in `page.onLoadFinished`. `saveAndExit();` is mandatory, use it instead of `phantom.exit()` - It is possible to wait for some element on the webpage, for example: + It is possible to wait for some element on the webpage, e.g. var check = function() { var elementFound = page.evaluate(function() { return document.querySelector('#b.done') !== null; @@ -195,31 +205,39 @@ class PhantomJSwrapper: self._save_cookies(url) - replaces = self.options - replaces['url'] = url user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent'] - replaces['ua'] = user_agent.replace('"', '\\"') - replaces['jscode'] = jscode - - for x in self._TMP_FILE_NAMES: - replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + jscode = self._TEMPLATE.format_map(self.options.new_child({ + 'url': url, + 'ua': user_agent.replace('"', '\\"'), + 'jscode': jscode, + })) - with open(self._TMP_FILES['script'].name, 'wb') as f: - f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) + stdout = self.execute(jscode, video_id, note=note2) - if video_id is None: - self.extractor.to_screen(f'{note2}') - else: - self.extractor.to_screen(f'{video_id}: {note2}') - - stdout, stderr, returncode = Popen.run( - [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], - text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if returncode: - raise ExtractorError(f'Executing JS failed:\n{stderr}') with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') - self._load_cookies() return html, stdout + + def execute(self, jscode, video_id=None, *, note='Executing JS'): + """Execute JS and return stdout""" + if 'phantom.exit();' not in jscode: + jscode += ';\nphantom.exit();' + jscode = self._BASE_JS + jscode + + with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f: + f.write(jscode) + self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') + + cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name] + self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}') + try: + stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000, + text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception as e: + raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e) + if returncode: + raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}') + + return stdout |