diff options
Diffstat (limited to 'hypervideo_dl/extractor/rumble.py')
-rw-r--r-- | hypervideo_dl/extractor/rumble.py | 168 |
1 files changed, 141 insertions, 27 deletions
diff --git a/hypervideo_dl/extractor/rumble.py b/hypervideo_dl/extractor/rumble.py index 102615c..f8bf4a1 100644 --- a/hypervideo_dl/extractor/rumble.py +++ b/hypervideo_dl/extractor/rumble.py @@ -2,13 +2,20 @@ import itertools import re from .common import InfoExtractor -from ..compat import compat_HTTPError +from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + UnsupportedError, + clean_html, + determine_ext, + format_field, + get_element_by_class, int_or_none, + join_nonempty, + parse_count, parse_iso8601, traverse_obj, unescapeHTML, - ExtractorError, ) @@ -112,24 +119,6 @@ class RumbleEmbedIE(InfoExtractor): _WEBPAGE_TESTS = [ { - 'note': 'Rumble embed', - 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', - 'md5': '53af34098a7f92c4e51cf0bd1c33f009', - 'info_dict': { - 'id': 'vb0ofn', - 'ext': 'mp4', - 'timestamp': 1612662578, - 'uploader': 'LovingMontana', - 'channel': 'LovingMontana', - 'upload_date': '20210207', - 'title': 'Winter-loving dog helps girls dig a snow fort ', - 'channel_url': 'https://rumble.com/c/c-546523', - 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/f/x/x/5fxxb.OvCc.1-small-Moose-The-Dog-Helps-Girls-D.jpg', - 'duration': 103, - 'live_status': 'not_live', - } - }, - { 'note': 'Rumble JS embed', 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', 'md5': '4701209ac99095592e73dbba21889690', @@ -155,7 +144,7 @@ class RumbleEmbedIE(InfoExtractor): if embeds: return embeds return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer( - r'<script>\s*Rumble\(\s*"play"\s*,\s*{\s*[\'"]video[\'"]\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)] + r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{\s*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)] def _real_extract(self, url): video_id = self._match_id(url) @@ -178,7 +167,13 @@ class RumbleEmbedIE(InfoExtractor): formats = [] for ext, ext_info in (video.get('ua') or {}).items(): - for height, video_info in (ext_info or {}).items(): + if isinstance(ext_info, dict): + for height, video_info in ext_info.items(): + if not traverse_obj(video_info, ('meta', 'h', {int_or_none})): + video_info.setdefault('meta', {})['h'] = height + ext_info = ext_info.values() + + for video_info in ext_info: meta = video_info.get('meta') or {} if not video_info.get('url'): continue @@ -189,18 +184,22 @@ class RumbleEmbedIE(InfoExtractor): video_info['url'], video_id, ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) continue + timeline = ext == 'timeline' + if timeline: + ext = determine_ext(video_info['url']) formats.append({ 'ext': ext, + 'acodec': 'none' if timeline else None, 'url': video_info['url'], - 'format_id': '%s-%sp' % (ext, height), - 'height': int_or_none(height), - 'fps': video.get('fps'), + 'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')), + 'format_note': 'Timeline' if timeline else None, + 'fps': None if timeline else video.get('fps'), **traverse_obj(meta, { 'tbr': 'bitrate', 'filesize': 'size', 'width': 'w', 'height': 'h', - }, default={}) + }, expected_type=lambda x: int(x) or None) }) subtitles = { @@ -235,6 +234,121 @@ class RumbleEmbedIE(InfoExtractor): } +class RumbleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$' + _EMBED_REGEX = [r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>'] + _TESTS = [{ + 'add_ie': ['RumbleEmbed'], + 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', + 'md5': '53af34098a7f92c4e51cf0bd1c33f009', + 'info_dict': { + 'id': 'vb0ofn', + 'ext': 'mp4', + 'timestamp': 1612662578, + 'uploader': 'LovingMontana', + 'channel': 'LovingMontana', + 'upload_date': '20210207', + 'title': 'Winter-loving dog helps girls dig a snow fort ', + 'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!', + 'channel_url': 'https://rumble.com/c/c-546523', + 'thumbnail': r're:https://.+\.jpg', + 'duration': 103, + 'like_count': int, + 'view_count': int, + 'live_status': 'not_live', + } + }, { + 'url': 'http://www.rumble.com/vDMUM1?key=value', + 'only_matching': True, + }, { + 'note': 'timeline format', + 'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html', + 'md5': '40d61fec6c0945bca3d0e1dc1aa53d79', + 'params': {'format': 'wv'}, + 'info_dict': { + 'id': 'v2bou5f', + 'ext': 'mp4', + 'uploader': 'Redacted News', + 'upload_date': '20230322', + 'timestamp': 1679445010, + 'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris', + 'duration': 892, + 'channel': 'Redacted News', + 'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42', + 'channel_url': 'https://rumble.com/c/Redacted', + 'live_status': 'not_live', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg', + }, + }, { + 'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html', + 'info_dict': { + 'id': 'v2blzyy', + 'ext': 'mp4', + 'live_status': 'was_live', + 'release_timestamp': 1679446804, + 'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636', + 'release_date': '20230322', + 'timestamp': 1679445692, + 'duration': 4435, + 'upload_date': '20230322', + 'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi', + 'uploader': 'Kim Iversen', + 'channel_url': 'https://rumble.com/c/KimIversen', + 'channel': 'Kim Iversen', + 'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg', + }, + }] + + _WEBPAGE_TESTS = [{ + 'url': 'https://rumble.com/videos?page=2', + 'playlist_count': 25, + 'info_dict': { + 'id': 'videos?page=2', + 'title': 'All videos', + 'description': 'Browse videos uploaded to Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/live-videos', + 'playlist_mincount': 19, + 'info_dict': { + 'id': 'live-videos', + 'title': 'Live Videos', + 'description': 'Live videos on Rumble.com', + 'age_limit': 0, + }, + }, { + 'url': 'https://rumble.com/search/video?q=rumble&sort=views', + 'playlist_count': 24, + 'info_dict': { + 'id': 'video?q=rumble&sort=views', + 'title': 'Search results for: rumble', + 'age_limit': 0, + }, + }] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None) + if not url_info: + raise UnsupportedError(url) + + release_ts_str = self._search_regex( + r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', + webpage, 'release date', fatal=False, default=None) + view_count_str = self._search_regex(r'<span class="media-heading-info">([\d,]+) Views', + webpage, 'view count', fatal=False, default=None) + + return self.url_result( + url_info['url'], ie_key=url_info['ie_key'], url_transparent=True, + view_count=parse_count(view_count_str), + release_timestamp=parse_iso8601(release_ts_str), + like_count=parse_count(get_element_by_class('rumbles-count', webpage)), + description=clean_html(get_element_by_class('media-description', webpage)), + ) + + class RumbleChannelIE(InfoExtractor): _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))' @@ -257,7 +371,7 @@ class RumbleChannelIE(InfoExtractor): try: webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + if isinstance(e.cause, HTTPError) and e.cause.status == 404: break raise for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage): |