aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/rumble.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/rumble.py')
-rw-r--r--hypervideo_dl/extractor/rumble.py47
1 files changed, 46 insertions, 1 deletions
diff --git a/hypervideo_dl/extractor/rumble.py b/hypervideo_dl/extractor/rumble.py
index 4a02251..49c1f44 100644
--- a/hypervideo_dl/extractor/rumble.py
+++ b/hypervideo_dl/extractor/rumble.py
@@ -1,13 +1,17 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
+import re
+
from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_str, compat_HTTPError
from ..utils import (
determine_ext,
int_or_none,
parse_iso8601,
try_get,
+ ExtractorError,
)
@@ -28,6 +32,14 @@ class RumbleEmbedIE(InfoExtractor):
'only_matching': True,
}]
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>%s)' % RumbleEmbedIE._VALID_URL,
+ webpage)]
+
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
@@ -65,3 +77,36 @@ class RumbleEmbedIE(InfoExtractor):
'channel_url': author.get('url'),
'duration': int_or_none(video.get('duration')),
}
+
+
+class RumbleChannelIE(InfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'
+
+ _TESTS = [{
+ 'url': 'https://rumble.com/c/Styxhexenhammer666',
+ 'playlist_mincount': 1160,
+ 'info_dict': {
+ 'id': 'Styxhexenhammer666',
+ },
+ }, {
+ 'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'id': 'goldenpoodleharleyeuna',
+ },
+ }]
+
+ def entries(self, url, playlist_id):
+ for page in itertools.count(1):
+ try:
+ webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ break
+ raise
+ for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
+ yield self.url_result('https://rumble.com' + video_url)
+
+ def _real_extract(self, url):
+ url, playlist_id = self._match_valid_url(url).groups()
+ return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)