aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/reddit.py
diff options
context:
space:
mode:
authorbashonly <88596187+bashonly@users.noreply.github.com>2023-04-16 12:07:55 -0500
committerGitHub <noreply@github.com>2023-04-16 17:07:55 +0000
commit7a6f6f24592a8065376f11a58e44878807732cf6 (patch)
treeff3b7e7f6d0cf42ee13e2c29afdd01f983a757ba /yt_dlp/extractor/reddit.py
parentea0570820336a0fe9c3b530d1b0d1e59313274f4 (diff)
downloadhypervideo-pre-7a6f6f24592a8065376f11a58e44878807732cf6.tar.lz
hypervideo-pre-7a6f6f24592a8065376f11a58e44878807732cf6.tar.xz
hypervideo-pre-7a6f6f24592a8065376f11a58e44878807732cf6.zip
[extractor/reddit] Support cookies and short URLs (#6825)
Closes #6665, Closes #6753 Authored by: bashonly
Diffstat (limited to 'yt_dlp/extractor/reddit.py')
-rw-r--r--yt_dlp/extractor/reddit.py72
1 files changed, 58 insertions, 14 deletions
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 9dba3eca8..3e458456c 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -1,4 +1,3 @@
-import random
import urllib.parse
from .common import InfoExtractor
@@ -14,7 +13,7 @@ from ..utils import (
class RedditIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/(?P<slug>(?:r|user)/[^/]+/comments/(?P<id>[^/?#&]+))'
+ _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
_TESTS = [{
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
'info_dict': {
@@ -110,6 +109,46 @@ class RedditIE(InfoExtractor):
'channel_id': 'dumbfuckers_club',
},
}, {
+ # post link without subreddit
+ 'url': 'https://www.reddit.com/comments/124pp33',
+ 'md5': '15eec9d828adcef4468b741a7e45a395',
+ 'info_dict': {
+ 'id': 'antsenjc2jqa1',
+ 'ext': 'mp4',
+ 'display_id': '124pp33',
+ 'title': 'Harmless prank of some old friends',
+ 'uploader': 'Dudezila',
+ 'channel_id': 'ContagiousLaughter',
+ 'duration': 17,
+ 'upload_date': '20230328',
+ 'timestamp': 1680012043,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ },
+ }, {
+ # quarantined subreddit post
+ 'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/',
+ 'md5': '3156ea69e3c1f1b6259683c5abd36e71',
+ 'info_dict': {
+ 'id': '8bwtclfggpsa1',
+ 'ext': 'mp4',
+ 'display_id': '12fujy3',
+ 'title': 'Based Hasan?',
+ 'uploader': 'KingNigelXLII',
+ 'channel_id': 'GenZedong',
+ 'duration': 16,
+ 'upload_date': '20230408',
+ 'timestamp': 1680979138,
+ 'age_limit': 0,
+ 'comment_count': int,
+ 'dislike_count': int,
+ 'like_count': int,
+ },
+ 'skip': 'Requires account that has opted-in to the GenZedong subreddit',
+ }, {
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
'only_matching': True,
}, {
@@ -137,21 +176,26 @@ class RedditIE(InfoExtractor):
'only_matching': True,
}]
- @staticmethod
- def _gen_session_id():
- id_length = 16
- rand_max = 1 << (id_length * 4)
- return '%0.*x' % (id_length, random.randrange(rand_max))
-
def _real_extract(self, url):
- subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id')
+ host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
- self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id())
- self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
- data = self._download_json(f'https://{subdomain}reddit.com/{slug}/.json', video_id, fatal=False)
+ data = self._download_json(
+ f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
if not data:
- # Fall back to old.reddit.com in case the requested subdomain fails
- data = self._download_json(f'https://old.reddit.com/{slug}/.json', video_id)
+ fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
+ self.to_screen(f'{host} request failed, retrying with {fallback_host}')
+ data = self._download_json(
+ f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
+
+ if traverse_obj(data, 'error') == 403:
+ reason = data.get('reason')
+ if reason == 'quarantined':
+ self.raise_login_required('Quarantined subreddit; an account that has opted in is required')
+ elif reason == 'private':
+ self.raise_login_required('Private subreddit; an account that has been approved is required')
+ else:
+ raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}')
+
data = data[0]['data']['children'][0]['data']
video_url = data['url']