[extractor/generic:quoted-html] Add extractor (#5213)

Extracts embeds from escaped HTML within `data-html` attribute. Related: https://github.com/ytdl-org/youtube-dl/issues/21294, https://github.com/yt-dlp/yt-dlp/pull/5121 Authored by: coletdjnz Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
author: Matthew <coletdjnz@protonmail.com> 2022-10-14 17:32:52 +1300
committer: GitHub <noreply@github.com> 2022-10-14 04:32:52 +0000
commit: 6dca2aa66de8a142543d5c8b6ccadd251339648e (patch)
tree: 5997b112459e0d25001e2b328eece7af5943bc19 /yt_dlp/extractor/generic.py
parent: 6678a4f0b3074f41f02e968d1d48d7c64e48ef07 (diff)
download: hypervideo-pre-6dca2aa66de8a142543d5c8b6ccadd251339648e.tar.lz
hypervideo-pre-6dca2aa66de8a142543d5c8b6ccadd251339648e.tar.xz
hypervideo-pre-6dca2aa66de8a142543d5c8b6ccadd251339648e.zip
1 files changed, 0 insertions, 22 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index ad4e3c5b8..b7a5ffb5b 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1980,22 +1980,6 @@ class GenericIE(InfoExtractor):
             },
             'playlist_count': 6,
         },
-        {
-            # Squarespace video embed, 2019-08-28
-            'url': 'http://ootboxford.com',
-            'info_dict': {
-                'id': 'Tc7b_JGdZfw',
-                'title': 'Out of the Blue, at Childish Things 10',
-                'ext': 'mp4',
-                'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
-                'uploader_id': 'helendouglashouse',
-                'uploader': 'Helen & Douglas House',
-                'upload_date': '20140328',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
         # {
         #     # Zype embed
         #     'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
@@ -2784,12 +2768,6 @@ class GenericIE(InfoExtractor):
         # There probably should be a second run of generic extractor on unescaped webpage.
         # webpage = urllib.parse.unquote(webpage)
 
-        # Unescape squarespace embeds to be detected by generic extractor,
-        # see https://github.com/ytdl-org/youtube-dl/issues/21294
-        webpage = re.sub(
-            r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
-            lambda x: unescapeHTML(x.group(0)), webpage)
-
         # TODO: Move to respective extractors
         bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
         if bc_urls:
author	Matthew <coletdjnz@protonmail.com>	2022-10-14 17:32:52 +1300
committer	GitHub <noreply@github.com>	2022-10-14 04:32:52 +0000
commit	6dca2aa66de8a142543d5c8b6ccadd251339648e (patch)
tree	5997b112459e0d25001e2b328eece7af5943bc19 /yt_dlp/extractor/generic.py
parent	6678a4f0b3074f41f02e968d1d48d7c64e48ef07 (diff)
download	hypervideo-pre-6dca2aa66de8a142543d5c8b6ccadd251339648e.tar.lz hypervideo-pre-6dca2aa66de8a142543d5c8b6ccadd251339648e.tar.xz hypervideo-pre-6dca2aa66de8a142543d5c8b6ccadd251339648e.zip