diff options
Diffstat (limited to 'test/test_utils.py')
-rw-r--r-- | test/test_utils.py | 214 |
1 files changed, 184 insertions, 30 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 1cd2b2f..039900c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -23,6 +23,7 @@ from hypervideo_dl.utils import ( caesar, clean_html, clean_podcast_url, + Config, date_from_str, datetime_from_str, DateRange, @@ -37,11 +38,18 @@ from hypervideo_dl.utils import ( ExtractorError, find_xpath_attr, fix_xml_ampersands, + format_bytes, float_or_none, get_element_by_class, get_element_by_attribute, get_elements_by_class, get_elements_by_attribute, + get_element_html_by_class, + get_element_html_by_attribute, + get_elements_html_by_class, + get_elements_html_by_attribute, + get_elements_text_and_html_by_attribute, + get_element_text_and_html_by_tag, InAdvancePagedList, int_or_none, intlist_to_bytes, @@ -116,6 +124,7 @@ from hypervideo_dl.compat import ( compat_chr, compat_etree_fromstring, compat_getenv, + compat_HTMLParseError, compat_os_name, compat_setenv, ) @@ -151,10 +160,12 @@ class TestUtil(unittest.TestCase): sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') - self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') + self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') - self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf') + self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') + self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') + self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') forbidden = '"\0\\/' for fc in forbidden: @@ -616,6 +627,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) + self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) + self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) @@ -634,6 +647,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) + self.assertEqual(parse_duration('01:02:03:050'), 3723.05) + self.assertEqual(parse_duration('103:050'), 103.05) def test_fix_xml_ampersands(self): self.assertEqual( @@ -1122,7 +1137,7 @@ class TestUtil(unittest.TestCase): def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') - self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') + self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') def test_intlist_to_bytes(self): @@ -1156,19 +1171,29 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_count('1000'), 1000) self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.1k'), 1100) + self.assertEqual(parse_count('1.1 k'), 1100) + self.assertEqual(parse_count('1,1 k'), 1100) self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk '), 1100000) + self.assertEqual(parse_count('1,1kk'), 1100000) + self.assertEqual(parse_count('100 views'), 100) + self.assertEqual(parse_count('1,100 views'), 1100) self.assertEqual(parse_count('1.1kk views'), 1100000) + self.assertEqual(parse_count('10M views'), 10000000) + self.assertEqual(parse_count('has 10M views'), 10000000) def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) - self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080}) - self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) + self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('ep1x2'), {}) + self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) @@ -1219,12 +1244,49 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') def test_render_table(self): self.assertEqual( render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), + 'a bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + + self.assertEqual( + render_table( ['a', 'bcd'], - [[123, 4], [9999, 51]]), + [[123, 4], [9999, 51]], + delim='-'), 'a bcd\n' + '--------\n' '123 4\n' '9999 51') + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) @@ -1390,21 +1452,21 @@ The first line </body> </tt>'''.encode('utf-8') srt_data = '''1 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> 2 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 </font>part 2</font></b> 3 -00:00:05,839 --> 00:00:09,560 +00:00:05,840 --> 00:00:09,560 <u><font color="lime">line 3 part 3</font></u> 4 -00:00:09,560 --> 00:00:12,359 +00:00:09,560 --> 00:00:12,360 <i><u><font color="yellow"><font color="lime">inner </font>style</font></u></i> @@ -1526,46 +1588,116 @@ Line 1 self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) + GET_ELEMENT_BY_CLASS_TEST_STRING = ''' + <span class="foo bar">nice</span> + ''' + def test_get_element_by_class(self): - html = ''' - <span class="foo bar">nice</span> - ''' + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + def test_get_element_html_by_class(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_class('foo', html), html.strip()) + self.assertEqual(get_element_by_class('no-such-class', html), None) + + GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = ''' + <div itemprop="author" itemscope>foo</div> + ''' + def test_get_element_by_attribute(self): - html = ''' - <span class="foo bar">nice</span> - ''' + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice') self.assertEqual(get_element_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) - html = ''' - <div itemprop="author" itemscope>foo</div> - ''' + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + def test_get_element_html_by_attribute(self): + html = self.GET_ELEMENT_BY_CLASS_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip()) + self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None) + self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None) + + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING + + self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip()) + + GET_ELEMENTS_BY_CLASS_TEST_STRING = ''' + <span class="foo bar">nice</span><span class="foo bar">also nice</span> + ''' + GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>'] + def test_get_elements_by_class(self): - html = ''' - <span class="foo bar">nice</span><span class="foo bar">also nice</span> - ''' + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_class('no-such-class', html), []) + def test_get_elements_html_by_class(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_class('no-such-class', html), []) + def test_get_elements_by_attribute(self): - html = ''' - <span class="foo bar">nice</span><span class="foo bar">also nice</span> - ''' + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) + def test_get_elements_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES) + self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), []) + self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), []) + + def test_get_elements_text_and_html_by_attribute(self): + html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING + + self.assertEqual( + list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)), + list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES))) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), []) + self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), []) + + GET_ELEMENT_BY_TAG_TEST_STRING = ''' + random text lorem ipsum</p> + <div> + this should be returned + <span>this should also be returned</span> + <div> + this should also be returned + </div> + closing tag above should not trick, so this should also be returned + </div> + but this text should not be returned + ''' + GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276] + GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] + GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] + + def test_get_element_text_and_html_by_tag(self): + html = self.GET_ELEMENT_BY_TAG_TEST_STRING + + self.assertEqual( + get_element_text_and_html_by_tag('div', html), + (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML)) + self.assertEqual( + get_element_text_and_html_by_tag('span', html), + (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) + self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) + def test_iri_to_uri(self): self.assertEqual( iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), @@ -1617,9 +1749,9 @@ Line 1 self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) - self.assertEqual(list(LazyList(it).reverse()), it[::-1]) - self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7]) - self.assertEqual(list(LazyList(it).reverse()[::-1]), it) + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): @@ -1632,15 +1764,37 @@ Line 1 test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) - ll = LazyList(range(10)).reverse() + ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) - ll.reverse() + ll = reversed(ll) test(ll, -15, 14, range(15)) + def test_format_bytes(self): + self.assertEqual(format_bytes(0), '0.00B') + self.assertEqual(format_bytes(1000), '1000.00B') + self.assertEqual(format_bytes(1024), '1.00KiB') + self.assertEqual(format_bytes(1024**2), '1.00MiB') + self.assertEqual(format_bytes(1024**3), '1.00GiB') + self.assertEqual(format_bytes(1024**4), '1.00TiB') + self.assertEqual(format_bytes(1024**5), '1.00PiB') + self.assertEqual(format_bytes(1024**6), '1.00EiB') + self.assertEqual(format_bytes(1024**7), '1.00ZiB') + self.assertEqual(format_bytes(1024**8), '1.00YiB') + self.assertEqual(format_bytes(1024**9), '1024.00YiB') + + def test_hide_login_info(self): + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), + ['-u', 'PRIVATE', '-p', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['-u']), ['-u']) + self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']), + ['-u', 'PRIVATE', '-u', 'PRIVATE']) + self.assertEqual(Config.hide_login_info(['--username=foo']), + ['--username=PRIVATE']) + if __name__ == '__main__': unittest.main() |