aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yt_dlp/extractor/extractors.py5
-rw-r--r--yt_dlp/extractor/generic.py28
-rw-r--r--yt_dlp/extractor/reddit.py82
3 files changed, 71 insertions, 44 deletions
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 6bad1f40c..d19c67243 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -1206,10 +1206,7 @@ from .redbulltv import (
RedBullTVRrnContentIE,
RedBullIE,
)
-from .reddit import (
- RedditIE,
- RedditRIE,
-)
+from .reddit import RedditIE
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .rentv import (
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index d6631e2f3..9c7fa4a21 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor):
'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
}
},
+ {
+ # Reddit-hosted video that will redirect and be processed by RedditIE
+ # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
+ 'url': 'https://v.redd.it/zv89llsvexdz',
+ 'md5': '87f5f02f6c1582654146f830f21f8662',
+ 'info_dict': {
+ 'id': 'zv89llsvexdz',
+ 'ext': 'mp4',
+ 'timestamp': 1501941939.0,
+ 'title': 'That small heart attack.',
+ 'upload_date': '20170805',
+ 'uploader': 'Antw87'
+ }
+ },
+ {
+ # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
+ 'url': 'https://v.redd.it/33hgok7dfbz71/',
+ 'md5': '7a1d587940242c9bb3bd6eb320b39258',
+ 'info_dict': {
+ 'id': '33hgok7dfbz71',
+ 'ext': 'mp4',
+ 'title': "The game Didn't want me to Knife that Guy I guess",
+ 'uploader': 'paraf1ve',
+ 'timestamp': 1636788683.0,
+ 'upload_date': '20211113'
+ }
+ }
+ #
]
def report_following_redirect(self, new_url):
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 3ea750aeb..a042a59cc 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -8,43 +8,11 @@ from ..utils import (
try_get,
unescapeHTML,
url_or_none,
+ traverse_obj
)
class RedditIE(InfoExtractor):
- _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
- _TEST = {
- # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
- 'url': 'https://v.redd.it/zv89llsvexdz',
- 'md5': '0a070c53eba7ec4534d95a5a1259e253',
- 'info_dict': {
- 'id': 'zv89llsvexdz',
- 'ext': 'mp4',
- 'title': 'zv89llsvexdz',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- formats = self._extract_m3u8_formats(
- 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
- 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
-
- formats.extend(self._extract_mpd_formats(
- 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
- mpd_id='dash', fatal=False))
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': video_id,
- 'formats': formats,
- }
-
-
-class RedditRIE(InfoExtractor):
_VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
@@ -147,19 +115,53 @@ class RedditRIE(InfoExtractor):
for resolution in resolutions:
add_thumbnail(resolution)
- return {
- '_type': 'url_transparent',
- 'url': video_url,
+ info = {
'title': data.get('title'),
'thumbnails': thumbnails,
'timestamp': float_or_none(data.get('created_utc')),
'uploader': data.get('author'),
- 'duration': int_or_none(try_get(
- data,
- (lambda x: x['media']['reddit_video']['duration'],
- lambda x: x['secure_media']['reddit_video']['duration']))),
'like_count': int_or_none(data.get('ups')),
'dislike_count': int_or_none(data.get('downs')),
'comment_count': int_or_none(data.get('num_comments')),
'age_limit': age_limit,
}
+
+ # Check if media is hosted on reddit:
+ reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False)
+ if reddit_video:
+ playlist_urls = [
+ try_get(reddit_video, lambda x: unescapeHTML(x[y]))
+ for y in ('dash_url', 'hls_url')
+ ]
+
+ # Update video_id
+ display_id = video_id
+ video_id = self._search_regex(
+ r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'],
+ 'video_id', default=display_id)
+
+ dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
+ hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+
+ formats = self._extract_m3u8_formats(
+ hls_playlist_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ dash_playlist_url, display_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ **info,
+ 'id': video_id,
+ 'display_id': display_id,
+ 'formats': formats,
+ 'duration': int_or_none(reddit_video.get('duration')),
+ }
+
+ # Not hosted on reddit, must continue extraction
+ return {
+ **info,
+ 'display_id': video_id,
+ '_type': 'url_transparent',
+ 'url': video_url,
+ }