diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-09-02 01:28:56 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-09-02 02:01:57 +0530 |
commit | 7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7 (patch) | |
tree | fd7f5ed6529950777369c6693d4e8400706cc0f2 | |
parent | adba24d2079d350fc03226adff3cae919d7a11db (diff) | |
download | hypervideo-pre-7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7.tar.lz hypervideo-pre-7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7.tar.xz hypervideo-pre-7c6eb424d35e51c81f8fe9e1eb7cc18067c3a8a7.zip |
[extractor/youtube] Detect `lazy-load-for-videos` embeds
Closes #4812
-rw-r--r-- | yt_dlp/extractor/youtube.py | 33 |
1 files changed, 20 insertions, 13 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 2748b5dc5..4a5d6805e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -923,19 +923,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:\#|$)""" % { 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), } - _EMBED_REGEX = [r'''(?x) - (?: - <iframe[^>]+?src=| - data-video-url=| - <embed[^>]+?src=| - embedSWF\(?:\s*| - <object[^>]+data=| - new\s+SWFObject\( - ) - (["\']) - (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ - (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) - \1'''] + _EMBED_REGEX = [ + r'''(?x) + (?: + <iframe[^>]+?src=| + data-video-url=| + <embed[^>]+?src=| + embedSWF\(?:\s*| + <object[^>]+data=| + new\s+SWFObject\( + ) + (["\']) + (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/ + (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?) + \1''', + # https://wordpress.org/plugins/lazy-load-for-videos/ + r'''(?xs) + <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})" + \s[^>]*\bclass="[^"]*\blazy-load-youtube''', + ] + _PLAYER_INFO_RE = ( r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player', r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', |