diff options
| -rw-r--r-- | yt_dlp/extractor/common.py | 4 | ||||
| -rw-r--r-- | yt_dlp/extractor/npr.py | 17 | 
2 files changed, 20 insertions, 1 deletions
| diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d88d5e6f9..71e982f02 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1461,7 +1461,7 @@ class InfoExtractor:              assert e['@type'] == 'VideoObject'              author = e.get('author')              info.update({ -                'url': url_or_none(e.get('contentUrl')), +                'url': traverse_obj(e, 'contentUrl', 'embedUrl', expected_type=url_or_none),                  'title': unescapeHTML(e.get('name')),                  'description': unescapeHTML(e.get('description')),                  'thumbnails': [{'url': url} @@ -1529,6 +1529,8 @@ class InfoExtractor:                      })                      if traverse_obj(e, ('video', 0, '@type')) == 'VideoObject':                          extract_video_object(e['video'][0]) +                    elif traverse_obj(e, ('subjectOf', 0, '@type')) == 'VideoObject': +                        extract_video_object(e['subjectOf'][0])                  elif item_type == 'VideoObject':                      extract_video_object(e)                      if expected_type is None: diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index 6d93f154c..e4ff8d6c2 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -51,6 +51,15 @@ class NprIE(InfoExtractor):          # multimedia, no formats, stream          'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',          'only_matching': True, +    }, { +        'url': 'https://www.npr.org/2022/03/15/1084896560/bonobo-tiny-desk-home-concert', +        'info_dict': { +            'id': '1086468851', +            'ext': 'mp4', +            'title': 'Bonobo: Tiny Desk (Home) Concert', +            'duration': 1061, +            'thumbnail': r're:^https?://media.npr.org/assets/img/.*\.jpg$', +        },      }]      def _real_extract(self, url): @@ -65,6 +74,10 @@ class NprIE(InfoExtractor):              })['list']['story'][0]          playlist_title = story.get('title', {}).get('$text') +        # Fetch the JSON-LD from the npr page. +        json_ld = self._search_json_ld( +            self._download_webpage(url, playlist_id), playlist_id, 'NewsArticle', fatal=False) +          KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')          quality = qualities(KNOWN_FORMATS) @@ -110,6 +123,10 @@ class NprIE(InfoExtractor):                  formats.extend(self._extract_m3u8_formats(                      stream_url, stream_id, 'mp4', 'm3u8_native',                      m3u8_id='hls', fatal=False)) + +            if not formats and json_ld.get('url'): +                formats.extend(self._extract_m3u8_formats(json_ld['url'], media_id, 'mp4', m3u8_id='hls', fatal=False)) +              self._sort_formats(formats)              entries.append({ | 
