[reddit] Fix 429 by generating a random `reddit_session`

Related: a76e2e0f8898c06939b6a123fa863ab8876cfa20, #1014, https://github.com/ytdl-org/youtube-dl/issues/29986 Original PR: https://github.com/ytdl-org/youtube-dl/pull/30017 Authored by: AjaxGb
author: pukkandan <pukkandan.ytdlp@gmail.com> 2021-09-26 20:00:56 +0530
committer: pukkandan <pukkandan.ytdlp@gmail.com> 2021-09-30 03:32:44 +0530
commit: 8e3fd7e034cdd54972d13394821cd9e55e1c3735 (patch)
tree: b0ad04d2d4c1f3435f2b7ba71f6b7648a7f57748 /yt_dlp/extractor/reddit.py
parent: 80c03fa98fdd54410bd36684ef453f6976a9c0bf (diff)
download: hypervideo-pre-8e3fd7e034cdd54972d13394821cd9e55e1c3735.tar.lz
hypervideo-pre-8e3fd7e034cdd54972d13394821cd9e55e1c3735.tar.xz
hypervideo-pre-8e3fd7e034cdd54972d13394821cd9e55e1c3735.zip
1 files changed, 17 insertions, 9 deletions
diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py
index 14592bc62..e5a1f6920 100644
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@@ -1,5 +1,4 @@
-from __future__ import unicode_literals
-
+import random
 
 from .common import InfoExtractor
 from ..utils import (
@@ -49,7 +48,7 @@ class RedditIE(InfoExtractor):
 
 
 class RedditRIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))'
     _TESTS = [{
         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
         'info_dict': {
@@ -99,13 +98,22 @@ class RedditRIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _real_extract(self, url):
-        slug, video_id = self._match_valid_url(url).group('slug', 'id')
-
-        self._set_cookie('reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
-        data = self._download_json(
-            f'https://old.reddit.com/r/{slug}/.json', video_id)[0]['data']['children'][0]['data']
+    @staticmethod
+    def _gen_session_id():
+        id_length = 16
+        rand_max = 1 << (id_length * 4)
+        return '%0.*x' % (id_length, random.randrange(rand_max))
 
+    def _real_extract(self, url):
+        subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id')
+
+        self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id())
+        self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
+        data = self._download_json(f'https://{subdomain}.reddit.com/r/{slug}/.json', video_id, fatal=False)
+        if not data:
+            # Fall back to old.reddit.com in case the requested subdomain fails
+            data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id)
+        data = data[0]['data']['children'][0]['data']
         video_url = data['url']
 
         # Avoid recursing into the same reddit URL
author	pukkandan <pukkandan.ytdlp@gmail.com>	2021-09-26 20:00:56 +0530
committer	pukkandan <pukkandan.ytdlp@gmail.com>	2021-09-30 03:32:44 +0530
commit	8e3fd7e034cdd54972d13394821cd9e55e1c3735 (patch)
tree	b0ad04d2d4c1f3435f2b7ba71f6b7648a7f57748 /yt_dlp/extractor/reddit.py
parent	80c03fa98fdd54410bd36684ef453f6976a9c0bf (diff)
download	hypervideo-pre-8e3fd7e034cdd54972d13394821cd9e55e1c3735.tar.lz hypervideo-pre-8e3fd7e034cdd54972d13394821cd9e55e1c3735.tar.xz hypervideo-pre-8e3fd7e034cdd54972d13394821cd9e55e1c3735.zip