diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-11-28 15:02:31 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-11-28 15:02:31 +0700 | 
| commit | 20c50c65566be31607e59d7ca2467c664a29c843 (patch) | |
| tree | 199c60b16bad52588819394209b3fe3669abfb02 | |
| parent | f9f9699f2fa87ec54d59b1834cc56adb6147fec6 (diff) | |
| download | hypervideo-pre-20c50c65566be31607e59d7ca2467c664a29c843.tar.lz hypervideo-pre-20c50c65566be31607e59d7ca2467c664a29c843.tar.xz hypervideo-pre-20c50c65566be31607e59d7ca2467c664a29c843.zip | |
[youtube] Improve yt initial player response extraction (closes #27216)
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 9 | 
1 files changed, 7 insertions, 2 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1e3ff7d44..4616ac53d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -283,6 +283,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):      }      _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' +    _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'      def _call_api(self, ep, query, video_id):          data = self._DEFAULT_API_DATA.copy() @@ -1068,7 +1069,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              },          },          { -            # with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093) +            # with '};' inside yt initial data (see [1]) +            # see [2] for an example with '};' inside ytInitialPlayerResponse +            # 1. https://github.com/ytdl-org/youtube-dl/issues/27093 +            # 2. https://github.com/ytdl-org/youtube-dl/issues/27216              'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',              'info_dict': {                  'id': 'CHqg6qOn4no', @@ -1686,7 +1690,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          if not video_info and not player_response:              player_response = extract_player_response(                  self._search_regex( -                    r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;', video_webpage, +                    (r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE, +                     self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,                      'initial player response', default='{}'),                  video_id) | 
