aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-11-09 15:48:25 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-11-09 22:19:46 +0530
commited6bec168dd6af955f4ec0165356ac76b944c537 (patch)
treee88ae8996e2b5915903214f818a2a80e006c3703
parent0d8affc17faa540f41cb6fba7675dbf98364250b (diff)
downloadhypervideo-pre-ed6bec168dd6af955f4ec0165356ac76b944c537.tar.lz
hypervideo-pre-ed6bec168dd6af955f4ec0165356ac76b944c537.tar.xz
hypervideo-pre-ed6bec168dd6af955f4ec0165356ac76b944c537.zip
[extractor/doodstream] Remove extractor
It was added in youtube-dlc, likely without sufficient scrutiny Closes #3808, Closes #5251, Closes #5403
-rw-r--r--yt_dlp/extractor/_extractors.py3
-rw-r--r--yt_dlp/extractor/doodstream.py77
-rw-r--r--yt_dlp/extractor/unsupported.py60
3 files changed, 51 insertions, 89 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 0ca8b3e06..053ef44ae 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -477,7 +477,6 @@ from .digitalconcerthall import DigitalConcertHallIE
from .discovery import DiscoveryIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
-from .doodstream import DoodStreamIE
from .dropbox import DropboxIE
from .dropout import (
DropoutSeasonIE,
@@ -2023,7 +2022,7 @@ from .umg import UMGDeIE
from .unistra import UnistraIE
from .unity import UnityIE
from .unscripted import UnscriptedNewsVideoIE
-from .unsupported import KnownDRMIE
+from .unsupported import KnownDRMIE, KnownPiracyIE
from .uol import UOLIE
from .uplynk import (
UplynkIE,
diff --git a/yt_dlp/extractor/doodstream.py b/yt_dlp/extractor/doodstream.py
deleted file mode 100644
index b41da32e5..000000000
--- a/yt_dlp/extractor/doodstream.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import string
-import random
-import time
-
-from .common import InfoExtractor
-
-
-class DoodStreamIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm|wf)/[ed]/(?P<id>[a-z0-9]+)'
- _TESTS = [{
- 'url': 'http://dood.to/e/5s1wmbdacezb',
- 'md5': '4568b83b31e13242b3f1ff96c55f0595',
- 'info_dict': {
- 'id': '5s1wmbdacezb',
- 'ext': 'mp4',
- 'title': 'Kat Wonders - Monthly May 2020',
- 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
- 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
- }
- }, {
- 'url': 'http://dood.watch/d/5s1wmbdacezb',
- 'md5': '4568b83b31e13242b3f1ff96c55f0595',
- 'info_dict': {
- 'id': '5s1wmbdacezb',
- 'ext': 'mp4',
- 'title': 'Kat Wonders - Monthly May 2020',
- 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
- 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
- }
- }, {
- 'url': 'https://dood.to/d/jzrxn12t2s7n',
- 'md5': '3207e199426eca7c2aa23c2872e6728a',
- 'info_dict': {
- 'id': 'jzrxn12t2s7n',
- 'ext': 'mp4',
- 'title': 'Stacy Cruz Cute ALLWAYSWELL',
- 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
- 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
- }
- }, {
- 'url': 'https://dood.so/d/jzrxn12t2s7n',
- 'only_matching': True
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = f'https://dood.to/e/{video_id}'
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_meta(
- ('og:title', 'twitter:title'), webpage, default=None) or self._html_extract_title(webpage)
- thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None)
- token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
- description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'], webpage, default=None)
-
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
- 'referer': url
- }
-
- pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5')
- final_url = ''.join((
- self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers),
- *(random.choice(string.ascii_letters + string.digits) for _ in range(10)),
- f'?token={token}&expiry={int(time.time() * 1000)}',
- ))
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': final_url,
- 'http_headers': headers,
- 'ext': 'mp4',
- 'description': description,
- 'thumbnail': thumb,
- }
diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py
index 87ad87ca2..e40666ec0 100644
--- a/yt_dlp/extractor/unsupported.py
+++ b/yt_dlp/extractor/unsupported.py
@@ -1,11 +1,32 @@
from .common import InfoExtractor
-from ..utils import classproperty, ExtractorError
+from ..utils import ExtractorError, classproperty, remove_start
-class KnownDRMIE(InfoExtractor):
+class UnsupportedInfoExtractor(InfoExtractor):
IE_DESC = False
- IE_NAME = 'unsupported:drm'
- UNSUPPORTED_URLS = (
+ URLS = () # Redefine in subclasses
+
+ @classproperty
+ def IE_NAME(cls):
+ return remove_start(super().IE_NAME, 'Known')
+
+ @classproperty
+ def _VALID_URL(cls):
+ return rf'https?://(?:www\.)?(?:{"|".join(cls.URLS)})'
+
+
+LF = '\n '
+
+
+class KnownDRMIE(UnsupportedInfoExtractor):
+ """Sites that are known to use DRM for all their videos
+
+ Add to this list only if:
+ * You are reasonably certain that the site uses DRM for ALL their videos
+ * Multiple users have asked about this site on github/reddit/discord
+ """
+
+ URLS = (
r'play\.hbomax\.com',
r'channel(?:4|5)\.com',
r'peacocktv\.com',
@@ -82,12 +103,31 @@ class KnownDRMIE(InfoExtractor):
'only_matching': True,
}]
- @classproperty
- def _VALID_URL(cls):
- return rf'https?://(?:www\.)?(?:{"|".join(cls.UNSUPPORTED_URLS)})'
+ def _real_extract(self, url):
+ raise ExtractorError(
+ f'The requested site is known to use DRM protection. '
+ f'It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported.{LF}'
+ f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, '
+ 'unless you have evidence that the video is not DRM protected', expected=True)
+
+
+class KnownPiracyIE(UnsupportedInfoExtractor):
+ """Sites that have been deemed to be piracy
+
+ In order for this to not end up being a catalog of piracy sites,
+ only sites that were once supported should be added to this list
+ """
+
+ URLS = (
+ r'dood\.(?:to|watch|so|pm|wf|ru)',
+ )
+
+ _TESTS = [{
+ 'url': 'http://dood.to/e/5s1wmbdacezb',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
raise ExtractorError(
- f'The requested site is known to use DRM protection. It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported by yt-dlp. '
- f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, unless you have evidence that it is not DRM protected.',
- expected=True)
+ f'This website is no longer supported since it has been determined to be primarily used for piracy.{LF}'
+ f'{self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open issues for it', expected=True)