diff options
author | Wes <morganw@gmail.com> | 2021-08-03 23:19:44 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-04 09:49:44 +0530 |
commit | 3e376d183ede2d9d24a14e4d5afee7a64679cca0 (patch) | |
tree | a6ee41177bf51b6403a74096884d6acdca4710a9 | |
parent | 888299e6ca65009e8ae2809c384ba1ba6b5ae701 (diff) | |
download | hypervideo-pre-3e376d183ede2d9d24a14e4d5afee7a64679cca0.tar.lz hypervideo-pre-3e376d183ede2d9d24a14e4d5afee7a64679cca0.tar.xz hypervideo-pre-3e376d183ede2d9d24a14e4d5afee7a64679cca0.zip |
[nbcolympics] Update extractor for 2020 olympics (#621)
Fixes: https://github.com/yt-dlp/yt-dlp/issues/617#issuecomment-891834323
Authored by: wesnm
-rw-r--r-- | yt_dlp/extractor/nbc.py | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 2f25b9e7b..8c63cf818 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -12,6 +12,7 @@ from ..utils import ( int_or_none, parse_age_limit, parse_duration, + RegexNotFoundError, smuggle_url, try_get, unified_timestamp, @@ -460,7 +461,7 @@ class NBCNewsIE(ThePlatformIE): class NBCOlympicsIE(InfoExtractor): IE_NAME = 'nbcolympics' - _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)' + _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)' _TEST = { # Geo-restricted to US @@ -483,13 +484,18 @@ class NBCOlympicsIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - drupal_settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', - webpage, 'drupal settings'), display_id) - - iframe_url = drupal_settings['vod']['iframe_url'] - theplatform_url = iframe_url.replace( - 'vplayer.nbcolympics.com', 'player.theplatform.com') + try: + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + iframe_url = drupal_settings['vod']['iframe_url'] + theplatform_url = iframe_url.replace( + 'vplayer.nbcolympics.com', 'player.theplatform.com') + except RegexNotFoundError: + theplatform_url = self._search_regex( + r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2", + webpage, 'embedding URL', group="embedUrl") return { '_type': 'url_transparent', |