aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-09-02 01:28:56 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-09-02 02:01:57 +0530
commit7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7 (patch)
treefd7f5ed6529950777369c6693d4e8400706cc0f2
parentadba24d2079d350fc03226adff3cae919d7a11db (diff)
downloadhypervideo-pre-7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7.tar.lz
hypervideo-pre-7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7.tar.xz
hypervideo-pre-7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7.zip
[extractor/youtube] Detect `lazy-load-for-videos` embeds
Closes #4812
-rw-r--r--yt_dlp/extractor/youtube.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 2748b5dc5..4a5d6805e 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -923,19 +923,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:\#|$)""" % {
'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
}
- _EMBED_REGEX = [r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
- \1''']
+ _EMBED_REGEX = [
+ r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''',
+ # https://wordpress.org/plugins/lazy-load-for-videos/
+ r'''(?xs)
+ <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
+ \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
+ ]
+
_PLAYER_INFO_RE = (
r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',