diff options
Diffstat (limited to 'hypervideo_dl/extractor/rumble.py')
-rw-r--r-- | hypervideo_dl/extractor/rumble.py | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/hypervideo_dl/extractor/rumble.py b/hypervideo_dl/extractor/rumble.py index 4a02251..49c1f44 100644 --- a/hypervideo_dl/extractor/rumble.py +++ b/hypervideo_dl/extractor/rumble.py @@ -1,13 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools +import re + from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_str, compat_HTTPError from ..utils import ( determine_ext, int_or_none, parse_iso8601, try_get, + ExtractorError, ) @@ -28,6 +32,14 @@ class RumbleEmbedIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL, + webpage)] + def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( @@ -65,3 +77,36 @@ class RumbleEmbedIE(InfoExtractor): 'channel_url': author.get('url'), 'duration': int_or_none(video.get('duration')), } + + +class RumbleChannelIE(InfoExtractor): + _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))' + + _TESTS = [{ + 'url': 'https://rumble.com/c/Styxhexenhammer666', + 'playlist_mincount': 1160, + 'info_dict': { + 'id': 'Styxhexenhammer666', + }, + }, { + 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', + 'playlist_count': 4, + 'info_dict': { + 'id': 'goldenpoodleharleyeuna', + }, + }] + + def entries(self, url, playlist_id): + for page in itertools.count(1): + try: + webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage): + yield self.url_result('https://rumble.com' + video_url) + + def _real_extract(self, url): + url, playlist_id = self._match_valid_url(url).groups() + return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id) |