From 27fe903c511691c078942bef5ee9a05a43b15c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs?= Date: Wed, 9 Jun 2021 17:54:27 -0500 Subject: initial --- test/__init__.py | 0 test/helper.py | 282 +++++ test/parameters.json | 43 + test/test_InfoExtractor.py | 1132 ++++++++++++++++++++ test/test_YoutubeDL.py | 1002 +++++++++++++++++ test/test_YoutubeDLCookieJar.py | 51 + test/test_aes.py | 63 ++ test/test_age_restriction.py | 50 + test/test_all_urls.py | 126 +++ test/test_cache.py | 59 + test/test_compat.py | 126 +++ test/test_download.py | 265 +++++ test/test_downloader_http.py | 115 ++ test/test_execution.py | 54 + test/test_http.py | 166 +++ test/test_netrc.py | 26 + test/test_options.py | 26 + test/test_postprocessors.py | 17 + test/test_socks.py | 118 ++ test/test_subtitles.py | 353 ++++++ test/test_unicode_literals.py | 63 ++ test/test_utils.py | 1480 ++++++++++++++++++++++++++ test/test_verbose_output.py | 71 ++ test/test_write_annotations.py | 80 ++ test/test_youtube_lists.py | 80 ++ test/test_youtube_misc.py | 26 + test/testcert.pem | 52 + test/testdata/cookies/httponly_cookies.txt | 6 + test/testdata/cookies/malformed_cookies.txt | 9 + test/testdata/cookies/session_cookies.txt | 6 + test/testdata/f4m/custom_base_url.f4m | 10 + test/testdata/m3u8/pluzz_francetv_11507.m3u8 | 14 + test/testdata/m3u8/teamcoco_11995.m3u8 | 16 + test/testdata/m3u8/ted_18923.m3u8 | 28 + test/testdata/m3u8/toggle_mobile_12211.m3u8 | 13 + test/testdata/m3u8/twitch_vod.m3u8 | 20 + test/testdata/m3u8/vidio.m3u8 | 10 + test/testdata/mpd/float_duration.mpd | 18 + test/testdata/mpd/unfragmented.mpd | 28 + test/testdata/mpd/urls_only.mpd | 218 ++++ test/testdata/xspf/foo_xspf.xspf | 34 + 41 files changed, 6356 insertions(+) create mode 100644 test/__init__.py create mode 100644 test/helper.py create mode 100644 test/parameters.json create mode 100644 test/test_InfoExtractor.py create mode 100644 test/test_YoutubeDL.py create mode 100644 test/test_YoutubeDLCookieJar.py create mode 100644 test/test_aes.py create mode 100644 test/test_age_restriction.py create mode 100644 test/test_all_urls.py create mode 100644 test/test_cache.py create mode 100644 test/test_compat.py create mode 100644 test/test_download.py create mode 100644 test/test_downloader_http.py create mode 100644 test/test_execution.py create mode 100644 test/test_http.py create mode 100644 test/test_netrc.py create mode 100644 test/test_options.py create mode 100644 test/test_postprocessors.py create mode 100644 test/test_socks.py create mode 100644 test/test_subtitles.py create mode 100644 test/test_unicode_literals.py create mode 100644 test/test_utils.py create mode 100644 test/test_verbose_output.py create mode 100644 test/test_write_annotations.py create mode 100644 test/test_youtube_lists.py create mode 100644 test/test_youtube_misc.py create mode 100644 test/testcert.pem create mode 100644 test/testdata/cookies/httponly_cookies.txt create mode 100644 test/testdata/cookies/malformed_cookies.txt create mode 100644 test/testdata/cookies/session_cookies.txt create mode 100644 test/testdata/f4m/custom_base_url.f4m create mode 100644 test/testdata/m3u8/pluzz_francetv_11507.m3u8 create mode 100644 test/testdata/m3u8/teamcoco_11995.m3u8 create mode 100644 test/testdata/m3u8/ted_18923.m3u8 create mode 100644 test/testdata/m3u8/toggle_mobile_12211.m3u8 create mode 100644 test/testdata/m3u8/twitch_vod.m3u8 create mode 100644 test/testdata/m3u8/vidio.m3u8 create mode 100644 test/testdata/mpd/float_duration.mpd create mode 100644 test/testdata/mpd/unfragmented.mpd create mode 100644 test/testdata/mpd/urls_only.mpd create mode 100644 test/testdata/xspf/foo_xspf.xspf (limited to 'test') diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 0000000..6eb9298 --- /dev/null +++ b/test/helper.py @@ -0,0 +1,282 @@ +from __future__ import unicode_literals + +import errno +import io +import hashlib +import json +import os.path +import re +import types +import ssl +import sys + +import hypervideo_dl.extractor +from hypervideo_dl import YoutubeDL +from hypervideo_dl.compat import ( + compat_os_name, + compat_str, +) +from hypervideo_dl.utils import ( + preferredencoding, + write_string, +) + + +def get_params(override=None): + PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "parameters.json") + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "local_parameters.json") + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) + if override: + parameters.update(override) + return parameters + + +def try_rm(filename): + """ Remove a file if it exists """ + try: + os.remove(filename) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + +def report_warning(message): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if sys.stderr.isatty() and compat_os_name != 'nt': + _msg_header = '\033[0;33mWARNING:\033[0m' + else: + _msg_header = 'WARNING:' + output = '%s %s\n' % (_msg_header, message) + if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + +class FakeYDL(YoutubeDL): + def __init__(self, override=None): + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + params = get_params(override=override) + super(FakeYDL, self).__init__(params, auto_init=False) + self.result = [] + + def to_screen(self, s, skip_eol=None): + print(s) + + def trouble(self, s, tb=None): + raise Exception(s) + + def download(self, x): + self.result.append(x) + + def expect_warning(self, regex): + # Silence an expected warning matching a regex + old_report_warning = self.report_warning + + def report_warning(self, message): + if re.match(regex, message): + return + old_report_warning(message) + self.report_warning = types.MethodType(report_warning, self) + + +def gettestcases(include_onlymatching=False): + for ie in hypervideo_dl.extractor.gen_extractors(): + for tc in ie.get_testcases(include_onlymatching): + yield tc + + +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() + + +def expect_value(self, got, expected, field): + if isinstance(expected, compat_str) and expected.startswith('re:'): + match_str = expected[len('re:'):] + match_rex = re.compile(match_str) + + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, field)) + self.assertTrue( + match_rex.match(got), + 'field %s (value: %r) should match %r' % (field, got, match_str)) + elif isinstance(expected, compat_str) and expected.startswith('startswith:'): + start_str = expected[len('startswith:'):] + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, field)) + self.assertTrue( + got.startswith(start_str), + 'field %s (value: %r) should start with %r' % (field, got, start_str)) + elif isinstance(expected, compat_str) and expected.startswith('contains:'): + contains_str = expected[len('contains:'):] + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, field)) + self.assertTrue( + contains_str in got, + 'field %s (value: %r) should contain %r' % (field, got, contains_str)) + elif isinstance(expected, type): + self.assertTrue( + isinstance(got, expected), + 'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got))) + elif isinstance(expected, dict) and isinstance(got, dict): + expect_dict(self, got, expected) + elif isinstance(expected, list) and isinstance(got, list): + self.assertEqual( + len(expected), len(got), + 'Expect a list of length %d, but got a list of length %d for field %s' % ( + len(expected), len(got), field)) + for index, (item_got, item_expected) in enumerate(zip(got, expected)): + type_got = type(item_got) + type_expected = type(item_expected) + self.assertEqual( + type_expected, type_got, + 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( + index, field, type_expected, type_got)) + expect_value(self, item_got, item_expected, field) + else: + if isinstance(expected, compat_str) and expected.startswith('md5:'): + self.assertTrue( + isinstance(got, compat_str), + 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) + got = 'md5:' + md5(got) + elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected): + self.assertTrue( + isinstance(got, (list, dict)), + 'Expected field %s to be a list or a dict, but it is of type %s' % ( + field, type(got).__name__)) + op, _, expected_num = expected.partition(':') + expected_num = int(expected_num) + if op == 'mincount': + assert_func = assertGreaterEqual + msg_tmpl = 'Expected %d items in field %s, but only got %d' + elif op == 'maxcount': + assert_func = assertLessEqual + msg_tmpl = 'Expected maximum %d items in field %s, but got %d' + elif op == 'count': + assert_func = assertEqual + msg_tmpl = 'Expected exactly %d items in field %s, but got %d' + else: + assert False + assert_func( + self, len(got), expected_num, + msg_tmpl % (expected_num, field, len(got))) + return + self.assertEqual( + expected, got, + 'Invalid value for field %s, expected %r, got %r' % (field, expected, got)) + + +def expect_dict(self, got_dict, expected_dict): + for info_field, expected in expected_dict.items(): + got = got_dict.get(info_field) + expect_value(self, got, expected, info_field) + + +def expect_info_dict(self, got_dict, expected_dict): + expect_dict(self, got_dict, expected_dict) + # Check for the presence of mandatory fields + if got_dict.get('_type') not in ('playlist', 'multi_video'): + for key in ('id', 'url', 'title', 'ext'): + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + + # Are checkable fields missing from the test case definition? + test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) + for key, value in got_dict.items() + if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit')) + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) + if missing_keys: + def _repr(v): + if isinstance(v, compat_str): + return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + else: + return repr(v) + info_dict_str = '' + if len(missing_keys) != len(expected_dict): + info_dict_str += ''.join( + ' %s: %s,\n' % (_repr(k), _repr(v)) + for k, v in test_info_dict.items() if k not in missing_keys) + + if info_dict_str: + info_dict_str += '\n' + info_dict_str += ''.join( + ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) + for k in missing_keys) + write_string( + '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) + self.assertFalse( + missing_keys, + 'Missing keys in test definition: %s' % ( + ', '.join(sorted(missing_keys)))) + + +def assertRegexpMatches(self, text, regexp, msg=None): + if hasattr(self, 'assertRegexp'): + return self.assertRegexp(text, regexp, msg) + else: + m = re.match(regexp, text) + if not m: + note = 'Regexp didn\'t match: %r not found' % (regexp) + if len(text) < 1000: + note += ' in %r' % text + if msg is None: + msg = note + else: + msg = note + ', ' + msg + self.assertTrue(m, msg) + + +def assertGreaterEqual(self, got, expected, msg=None): + if not (got >= expected): + if msg is None: + msg = '%r not greater than or equal to %r' % (got, expected) + self.assertTrue(got >= expected, msg) + + +def assertLessEqual(self, got, expected, msg=None): + if not (got <= expected): + if msg is None: + msg = '%r not less than or equal to %r' % (got, expected) + self.assertTrue(got <= expected, msg) + + +def assertEqual(self, got, expected, msg=None): + if not (got == expected): + if msg is None: + msg = '%r not equal to %r' % (got, expected) + self.assertTrue(got == expected, msg) + + +def expect_warnings(ydl, warnings_re): + real_warning = ydl.report_warning + + def _report_warning(w): + if not any(re.search(w_re, w) for w_re in warnings_re): + real_warning(w) + + ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] diff --git a/test/parameters.json b/test/parameters.json new file mode 100644 index 0000000..65fd544 --- /dev/null +++ b/test/parameters.json @@ -0,0 +1,43 @@ +{ + "consoletitle": false, + "continuedl": true, + "forcedescription": false, + "forcefilename": false, + "forceformat": false, + "forcethumbnail": false, + "forcetitle": false, + "forceurl": false, + "format": "best", + "ignoreerrors": false, + "listformats": null, + "logtostderr": false, + "matchtitle": null, + "max_downloads": null, + "nooverwrites": false, + "nopart": false, + "noprogress": false, + "outtmpl": "%(id)s.%(ext)s", + "password": null, + "playlistend": -1, + "playliststart": 1, + "prefer_free_formats": false, + "quiet": false, + "ratelimit": null, + "rejecttitle": null, + "retries": 10, + "simulate": false, + "subtitleslang": null, + "subtitlesformat": "best", + "test": true, + "updatetime": true, + "usenetrc": false, + "username": null, + "verbose": true, + "writedescription": false, + "writeinfojson": true, + "writesubtitles": false, + "allsubtitles": false, + "listsubtitles": false, + "socket_timeout": 20, + "fixup": "never" +} diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py new file mode 100644 index 0000000..5029072 --- /dev/null +++ b/test/test_InfoExtractor.py @@ -0,0 +1,1132 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import io +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from hypervideo_dl.compat import compat_etree_fromstring, compat_http_server +from hypervideo_dl.extractor.common import InfoExtractor +from hypervideo_dl.extractor import YoutubeIE, get_info_extractor +from hypervideo_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False + + +class TestIE(InfoExtractor): + pass + + +class TestInfoExtractor(unittest.TestCase): + def setUp(self): + self.ie = TestIE(FakeYDL()) + + def test_ie_key(self): + self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + + def test_html_search_regex(self): + html = '

Watch this video

' + search = lambda re, *args: self.ie._html_search_regex(re, html, *args) + self.assertEqual(search(r'

(.+?)

', 'foo'), 'Watch this video') + + def test_opengraph(self): + ie = self.ie + html = ''' + + + + + + + + + ''' + self.assertEqual(ie._og_search_title(html), 'Foo') + self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') + self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + self.assertEqual(ie._og_search_video_url(html, default=None), None) + self.assertEqual(ie._og_search_property('foobar', html), 'Foo') + self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') + self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') + self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') + self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) + + def test_html_search_meta(self): + ie = self.ie + html = ''' + + + + + + + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') + self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') + self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') + self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + + def test_search_json_ld_realworld(self): + # https://github.com/ytdl-org/youtube-dl/issues/23306 + expect_dict( + self, + self.ie._search_json_ld(r'''''', None), + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }) + + def test_download_json(self): + uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') + self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) + uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') + self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) + uri = encode_data_uri(b'{"foo": invalid}', 'application/json') + self.assertRaises(ExtractorError, self.ie._download_json, uri, None) + self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + + def test_parse_html5_media_entries(self): + # inline video tag + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://127.0.0.1/video.html', + r'