diff options
Diffstat (limited to 'hypervideo_dl/extractor/newgrounds.py')
-rw-r--r-- | hypervideo_dl/extractor/newgrounds.py | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/hypervideo_dl/extractor/newgrounds.py b/hypervideo_dl/extractor/newgrounds.py index bbbd9e8..6525a6d 100644 --- a/hypervideo_dl/extractor/newgrounds.py +++ b/hypervideo_dl/extractor/newgrounds.py @@ -6,7 +6,9 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, extract_attributes, + get_element_by_id, int_or_none, parse_count, parse_duration, @@ -29,7 +31,8 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 1378878540, 'upload_date': '20130911', 'duration': 143, - 'description': 'md5:6d885138814015dfd656c2ddb00dacfc', + 'view_count': int, + 'description': 'md5:b8b3c2958875189f07d8e313462e8c4f', }, }, { 'url': 'https://www.newgrounds.com/portal/view/1', @@ -41,6 +44,7 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Brian-Beaton', 'timestamp': 955064100, 'upload_date': '20000406', + 'view_count': int, 'description': 'Scrotum plays "catch."', 'age_limit': 17, }, @@ -54,7 +58,8 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'ZONE-SAMA', 'timestamp': 1487965140, 'upload_date': '20170224', - 'description': 'ZTV News Episode 8 (February 2017)', + 'view_count': int, + 'description': 'md5:aff9b330ec2e78ed93b1ad6d017accc6', 'age_limit': 17, }, 'params': { @@ -70,7 +75,8 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Egoraptor', 'timestamp': 1140663240, 'upload_date': '20060223', - 'description': 'Metal Gear is awesome is so is this movie.', + 'view_count': int, + 'description': 'md5:9246c181614e23754571995104da92e0', 'age_limit': 13, } }, { @@ -80,7 +86,7 @@ class NewgroundsIE(InfoExtractor): 'id': '297383', 'ext': 'swf', 'title': 'Metal Gear Awesome', - 'description': 'Metal Gear is awesome is so is this movie.', + 'description': 'Metal Gear Awesome', 'uploader': 'Egoraptor', 'upload_date': '20060223', 'timestamp': 1140663240, @@ -100,8 +106,7 @@ class NewgroundsIE(InfoExtractor): uploader = None webpage = self._download_webpage(url, media_id) - title = self._html_search_regex( - r'<title>(.+?)</title>', webpage, 'title') + title = self._html_extract_title(webpage) media_url_string = self._search_regex( r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) @@ -145,10 +150,13 @@ class NewgroundsIE(InfoExtractor): (r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+</dd>\s*<dd>[^<]+)', r'<dt>\s*Uploaded\s*</dt>\s*<dd>([^<]+)'), webpage, 'timestamp', default=None)) + duration = parse_duration(self._html_search_regex( r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, 'duration', default=None)) + description = clean_html(get_element_by_id('author_comments', webpage)) or self._og_search_description(webpage) + view_count = parse_count(self._html_search_regex( r'(?s)<dt>\s*(?:Views|Listens)\s*</dt>\s*<dd>([\d\.,]+)</dd>', webpage, 'view count', default=None)) @@ -177,7 +185,7 @@ class NewgroundsIE(InfoExtractor): 'duration': duration, 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), - 'description': self._og_search_description(webpage), + 'description': description, 'age_limit': age_limit, 'view_count': view_count, } @@ -210,8 +218,7 @@ class NewgroundsPlaylistIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - title = self._search_regex( - r'<title>([^>]+)</title>', webpage, 'title', default=None) + title = self._html_extract_title(webpage, default=None) # cut left menu webpage = self._search_regex( |