From c6e07cf1e16ff3d1a0691067249ba3777f8c0bcb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 18 Jul 2022 04:26:50 +0530 Subject: [cleanup] Misc --- yt_dlp/extractor/openload.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'yt_dlp/extractor/openload.py') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 79dad09e3..f844ee6fb 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -104,9 +104,8 @@ class PhantomJSwrapper: self.exe = check_executable('phantomjs', ['-v']) if not self.exe: - raise ExtractorError('PhantomJS executable not found in PATH, ' - 'download it from http://phantomjs.org', - expected=True) + raise ExtractorError( + 'PhantomJS not found, Please download it from https://phantomjs.org/download.html', expected=True) self.extractor = extractor -- cgit v1.2.3 From 62b58c0936cccc6f3e5115086406c7bfaf6fc551 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sun, 14 Aug 2022 21:04:13 +0900 Subject: [docs] Consistent use of `e.g.` (#4643) Authored by: Lesmiscore --- yt_dlp/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'yt_dlp/extractor/openload.py') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index f844ee6fb..f12a0eff1 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -169,7 +169,7 @@ class PhantomJSwrapper: In most cases you don't need to add any `jscode`. It is executed in `page.onLoadFinished`. `saveAndExit();` is mandatory, use it instead of `phantom.exit()` - It is possible to wait for some element on the webpage, for example: + It is possible to wait for some element on the webpage, e.g. var check = function() { var elementFound = page.evaluate(function() { return document.querySelector('#b.done') !== null; -- cgit v1.2.3 From 587021cd9f717181b44e881941aca3f8d753758b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 18 Aug 2022 21:34:47 +0530 Subject: [phantomjs] Add function to execute JS without a DOM Authored by: MinePlayersPE, pukkandan --- yt_dlp/extractor/openload.py | 62 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 24 deletions(-) (limited to 'yt_dlp/extractor/openload.py') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index f12a0eff1..e66ed4831 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,3 +1,4 @@ +import collections import contextlib import json import os @@ -9,8 +10,10 @@ from ..utils import ( ExtractorError, Popen, check_executable, + format_field, get_exe_version, is_outdated_version, + shell_quote, ) @@ -49,7 +52,7 @@ class PhantomJSwrapper: This class is experimental. """ - _TEMPLATE = r''' + _BASE_JS = R''' phantom.onError = function(msg, trace) {{ var msgStack = ['PHANTOM ERROR: ' + msg]; if(trace && trace.length) {{ @@ -62,6 +65,9 @@ class PhantomJSwrapper: console.error(msgStack.join('\n')); phantom.exit(1); }}; + ''' + + _TEMPLATE = R''' var page = require('webpage').create(); var fs = require('fs'); var read = {{ mode: 'r', charset: 'utf-8' }}; @@ -116,14 +122,18 @@ class PhantomJSwrapper: 'Your copy of PhantomJS is outdated, update it to version ' '%s or newer if you encounter any errors.' % required_version) - self.options = { - 'timeout': timeout, - } for name in self._TMP_FILE_NAMES: tmp = tempfile.NamedTemporaryFile(delete=False) tmp.close() self._TMP_FILES[name] = tmp + self.options = collections.ChainMap({ + 'timeout': timeout, + }, { + x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') + for x in self._TMP_FILE_NAMES + }) + def __del__(self): for name in self._TMP_FILE_NAMES: with contextlib.suppress(OSError, KeyError): @@ -194,31 +204,35 @@ class PhantomJSwrapper: self._save_cookies(url) - replaces = self.options - replaces['url'] = url user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent'] - replaces['ua'] = user_agent.replace('"', '\\"') - replaces['jscode'] = jscode - - for x in self._TMP_FILE_NAMES: - replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"') - - with open(self._TMP_FILES['script'].name, 'wb') as f: - f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) + jscode = self._TEMPLATE.format_map(self.options.new_child({ + 'url': url, + 'ua': user_agent.replace('"', '\\"'), + 'jscode': jscode, + })) - if video_id is None: - self.extractor.to_screen(f'{note2}') - else: - self.extractor.to_screen(f'{video_id}: {note2}') + stdout = self.execute(jscode, video_id, note2) - stdout, stderr, returncode = Popen.run( - [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], - text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - if returncode: - raise ExtractorError(f'Executing JS failed:\n{stderr}') with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') - self._load_cookies() return html, stdout + + def execute(self, jscode, video_id=None, note='Executing JS'): + """Execute JS and return stdout""" + if 'phantom.exit();' not in jscode: + jscode += ';\nphantom.exit();' + jscode = self._BASE_JS + jscode + + with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f: + f.write(jscode) + self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}') + + cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name] + self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}') + stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if returncode: + raise ExtractorError(f'Executing JS failed:\n{stderr.strip()}') + + return stdout -- cgit v1.2.3 From 992dc6b4863d0e60f2a1ce3933f67814d8a17f8d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 22 Aug 2022 06:19:06 +0530 Subject: [jsinterp] Implement timeout Workaround for #4716 --- yt_dlp/extractor/openload.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'yt_dlp/extractor/openload.py') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index e66ed4831..4bba7bdd0 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -219,7 +219,7 @@ class PhantomJSwrapper: return html, stdout - def execute(self, jscode, video_id=None, note='Executing JS'): + def execute(self, jscode, video_id=None, *, note='Executing JS'): """Execute JS and return stdout""" if 'phantom.exit();' not in jscode: jscode += ';\nphantom.exit();' @@ -231,8 +231,12 @@ class PhantomJSwrapper: cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name] self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}') - stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + try: + stdout, stderr, returncode = Popen.run(cmd, timeout=self.options['timeout'] / 1000, + text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except Exception as e: + raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e) if returncode: - raise ExtractorError(f'Executing JS failed:\n{stderr.strip()}') + raise ExtractorError(f'{note} failed:\n{stderr.strip()}') return stdout -- cgit v1.2.3 From d81ba7d491bf2c89246d8817438db48a5a4e4ae9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 30 Aug 2022 17:23:59 +0530 Subject: [jsinterp, extractor/youtube] Minor fixes --- yt_dlp/extractor/openload.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'yt_dlp/extractor/openload.py') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index 4bba7bdd0..d2756a006 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -52,6 +52,8 @@ class PhantomJSwrapper: This class is experimental. """ + INSTALL_HINT = 'Please download it from https://phantomjs.org/download.html' + _BASE_JS = R''' phantom.onError = function(msg, trace) {{ var msgStack = ['PHANTOM ERROR: ' + msg]; @@ -110,8 +112,7 @@ class PhantomJSwrapper: self.exe = check_executable('phantomjs', ['-v']) if not self.exe: - raise ExtractorError( - 'PhantomJS not found, Please download it from https://phantomjs.org/download.html', expected=True) + raise ExtractorError(f'PhantomJS not found, {self.INSTALL_HINT}', expected=True) self.extractor = extractor @@ -237,6 +238,6 @@ class PhantomJSwrapper: except Exception as e: raise ExtractorError(f'{note} failed: Unable to run PhantomJS binary', cause=e) if returncode: - raise ExtractorError(f'{note} failed:\n{stderr.strip()}') + raise ExtractorError(f'{note} failed with returncode {returncode}:\n{stderr.strip()}') return stdout -- cgit v1.2.3 From 69082b38dcb8ba5c6050d86f592c899a0a71760f Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Sat, 3 Sep 2022 01:44:01 -0500 Subject: [phantomjs] Fix bug in 587021cd9f717181b44e881941aca3f8d753758b (#4833) Authored by: elyse0 --- yt_dlp/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'yt_dlp/extractor/openload.py') diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index d2756a006..56b8330ff 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -212,7 +212,7 @@ class PhantomJSwrapper: 'jscode': jscode, })) - stdout = self.execute(jscode, video_id, note2) + stdout = self.execute(jscode, video_id, note=note2) with open(self._TMP_FILES['html'].name, 'rb') as f: html = f.read().decode('utf-8') -- cgit v1.2.3