diff options
Diffstat (limited to 'youtube_dlc/extractor/wdr.py')
-rw-r--r-- | youtube_dlc/extractor/wdr.py | 35 |
1 files changed, 29 insertions, 6 deletions
diff --git a/youtube_dlc/extractor/wdr.py b/youtube_dlc/extractor/wdr.py index 44d4a13ca..9658ecea7 100644 --- a/youtube_dlc/extractor/wdr.py +++ b/youtube_dlc/extractor/wdr.py @@ -17,6 +17,7 @@ from ..utils import ( unified_strdate, update_url_query, urlhandle_detect_ext, + url_or_none, ) @@ -42,15 +43,16 @@ class WDRIE(InfoExtractor): is_live = metadata.get('mediaType') == 'live' tracker_data = metadata['trackerData'] + title = tracker_data['trackerClipTitle'] media_resource = metadata['mediaResource'] formats = [] subtitles = {} # check if the metadata contains a direct URL to a file - for kind, media_resource in media_resource.items(): + for kind, media in media_resource.items(): if kind == 'captionsHash': - for ext, url in media_resource.items(): + for ext, url in media.items(): subtitles.setdefault('de', []).append({ 'url': url, 'ext': ext, @@ -59,8 +61,10 @@ class WDRIE(InfoExtractor): if kind not in ('dflt', 'alt'): continue + if not isinstance(media, dict): + continue - for tag_name, medium_url in media_resource.items(): + for tag_name, medium_url in media.items(): if tag_name not in ('videoURL', 'audioURL'): continue @@ -90,7 +94,22 @@ class WDRIE(InfoExtractor): self._sort_formats(formats) - title = tracker_data['trackerClipTitle'] + caption_url = media_resource.get('captionURL') + if caption_url: + subtitles['de'] = [{ + 'url': caption_url, + 'ext': 'ttml', + }] + captions_hash = media_resource.get('captionsHash') + if isinstance(captions_hash, dict): + for ext, format_url in captions_hash.items(): + format_url = url_or_none(format_url) + if not format_url: + continue + subtitles.setdefault('de', []).append({ + 'url': format_url, + 'ext': determine_ext(format_url, None) or ext, + }) return { 'id': tracker_data.get('trackerClipId', video_id), @@ -106,7 +125,7 @@ class WDRIE(InfoExtractor): class WDRPageIE(InfoExtractor): _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' _PAGE_REGEX = r'/(?:mediathek/)?(?:[^/]+/)*(?P<display_id>[^/]+)\.html' - _VALID_URL = r'https?://(?:www\d?\.)?(?:wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL + _VALID_URL = r'https?://(?:www\d?\.)?(?:(?:kinder\.)?wdr\d?|sportschau)\.de' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL _TESTS = [ { @@ -213,7 +232,11 @@ class WDRPageIE(InfoExtractor): { 'url': 'http://www.sportschau.de/handballem2018/audio-vorschau---die-handball-em-startet-mit-grossem-favoritenfeld-100.html', 'only_matching': True, - } + }, + { + 'url': 'https://kinder.wdr.de/tv/die-sendung-mit-dem-elefanten/av/video-folge---astronaut-100.html', + 'only_matching': True, + }, ] def _real_extract(self, url): |