aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWes <morganw@gmail.com>2021-08-03 23:19:44 -0500
committerGitHub <noreply@github.com>2021-08-04 09:49:44 +0530
commit3e376d183ede2d9d24a14e4d5afee7a64679cca0 (patch)
treea6ee41177bf51b6403a74096884d6acdca4710a9
parent888299e6ca65009e8ae2809c384ba1ba6b5ae701 (diff)
downloadhypervideo-pre-3e376d183ede2d9d24a14e4d5afee7a64679cca0.tar.lz
hypervideo-pre-3e376d183ede2d9d24a14e4d5afee7a64679cca0.tar.xz
hypervideo-pre-3e376d183ede2d9d24a14e4d5afee7a64679cca0.zip
[nbcolympics] Update extractor for 2020 olympics (#621)
Fixes: https://github.com/yt-dlp/yt-dlp/issues/617#issuecomment-891834323 Authored by: wesnm
-rw-r--r--yt_dlp/extractor/nbc.py22
1 files changed, 14 insertions, 8 deletions
diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py
index 2f25b9e7b..8c63cf818 100644
--- a/yt_dlp/extractor/nbc.py
+++ b/yt_dlp/extractor/nbc.py
@@ -12,6 +12,7 @@ from ..utils import (
int_or_none,
parse_age_limit,
parse_duration,
+ RegexNotFoundError,
smuggle_url,
try_get,
unified_timestamp,
@@ -460,7 +461,7 @@ class NBCNewsIE(ThePlatformIE):
class NBCOlympicsIE(InfoExtractor):
IE_NAME = 'nbcolympics'
- _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
+ _VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
_TEST = {
# Geo-restricted to US
@@ -483,13 +484,18 @@ class NBCOlympicsIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
- drupal_settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), display_id)
-
- iframe_url = drupal_settings['vod']['iframe_url']
- theplatform_url = iframe_url.replace(
- 'vplayer.nbcolympics.com', 'player.theplatform.com')
+ try:
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ iframe_url = drupal_settings['vod']['iframe_url']
+ theplatform_url = iframe_url.replace(
+ 'vplayer.nbcolympics.com', 'player.theplatform.com')
+ except RegexNotFoundError:
+ theplatform_url = self._search_regex(
+ r"([\"'])embedUrl\1: *([\"'])(?P<embedUrl>.+)\2",
+ webpage, 'embedding URL', group="embedUrl")
return {
'_type': 'url_transparent',