aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/zdf.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/zdf.py')
-rw-r--r--yt_dlp/extractor/zdf.py130
1 files changed, 57 insertions, 73 deletions
diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
index 3a7f01f7a..1eab384b9 100644
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@@ -3,13 +3,14 @@ import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
+ NO_DEFAULT,
+ ExtractorError,
determine_ext,
+ extract_attributes,
float_or_none,
int_or_none,
join_nonempty,
merge_dicts,
- NO_DEFAULT,
- orderedSet,
parse_codecs,
qualities,
traverse_obj,
@@ -188,7 +189,7 @@ class ZDFIE(ZDFBaseIE):
},
}, {
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
- 'md5': '57af4423db0455a3975d2dc4578536bc',
+ 'md5': '1b93bdec7d02fc0b703c5e7687461628',
'info_dict': {
'ext': 'mp4',
'id': 'video_funk_1770473',
@@ -250,17 +251,15 @@ class ZDFIE(ZDFBaseIE):
title = content.get('title') or content['teaserHeadline']
t = content['mainVideoContent']['http://zdf.de/rels/target']
-
- ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
-
+ ptmd_path = traverse_obj(t, (
+ (('streams', 'default'), None),
+ ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
+ ), get_all=False)
if not ptmd_path:
- ptmd_path = traverse_obj(
- t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
- 'http://zdf.de/rels/streams/ptmd-template').replace(
- '{playerId}', 'ngplayer_2_4')
+ raise ExtractorError('Could not extract ptmd_path')
info = self._extract_ptmd(
- urljoin(url, ptmd_path), video_id, player['apiToken'], url)
+ urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url)
thumbnails = []
layouts = try_get(
@@ -309,15 +308,16 @@ class ZDFIE(ZDFBaseIE):
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
video_id)
- document = video['document']
-
- title = document['titel']
- content_id = document['basename']
-
formats = []
- format_urls = set()
- for f in document['formitaeten']:
- self._extract_format(content_id, formats, format_urls, f)
+ formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
+ document = formitaeten and video['document']
+ if formitaeten:
+ title = document['titel']
+ content_id = document['basename']
+
+ format_urls = set()
+ for f in formitaeten or []:
+ self._extract_format(content_id, formats, format_urls, f)
self._sort_formats(formats)
thumbnails = []
@@ -364,9 +364,9 @@ class ZDFChannelIE(ZDFBaseIE):
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
'info_dict': {
'id': 'das-aktuelle-sportstudio',
- 'title': 'das aktuelle sportstudio | ZDF',
+ 'title': 'das aktuelle sportstudio',
},
- 'playlist_mincount': 23,
+ 'playlist_mincount': 18,
}, {
'url': 'https://www.zdf.de/dokumentation/planet-e',
'info_dict': {
@@ -375,6 +375,14 @@ class ZDFChannelIE(ZDFBaseIE):
},
'playlist_mincount': 50,
}, {
+ 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
+ 'info_dict': {
+ 'id': 'aktenzeichen-xy-ungeloest',
+ 'title': 'Aktenzeichen XY... ungelöst',
+ 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
+ },
+ 'playlist_mincount': 2,
+ }, {
'url': 'https://www.zdf.de/filme/taunuskrimi/',
'only_matching': True,
}]
@@ -383,60 +391,36 @@ class ZDFChannelIE(ZDFBaseIE):
def suitable(cls, url):
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
+ def _og_search_title(self, webpage, fatal=False):
+ title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
+ return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
+
def _real_extract(self, url):
channel_id = self._match_id(url)
webpage = self._download_webpage(url, channel_id)
- entries = [
- self.url_result(item_url, ie=ZDFIE.ie_key())
- for item_url in orderedSet(re.findall(
- r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
-
- return self.playlist_result(
- entries, channel_id, self._og_search_title(webpage, fatal=False))
-
- r"""
- player = self._extract_player(webpage, channel_id)
-
- channel_id = self._search_regex(
- r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
- 'channel id', group='id')
-
- channel = self._call_api(
- 'https://api.zdf.de/content/documents/%s.json' % channel_id,
- player, url, channel_id)
-
- items = []
- for module in channel['module']:
- for teaser in try_get(module, lambda x: x['teaser'], list) or []:
- t = try_get(
- teaser, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- items.extend(try_get(
- t,
- lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
- items.extend(try_get(
- module,
- lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
-
- entries = []
- entry_urls = set()
- for item in items:
- t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- sharing_url = t.get('http://zdf.de/rels/sharing-url')
- if not sharing_url or not isinstance(sharing_url, compat_str):
- continue
- if sharing_url in entry_urls:
- continue
- entry_urls.add(sharing_url)
- entries.append(self.url_result(
- sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
- return self.playlist_result(entries, channel_id, channel.get('title'))
- """
+ matches = re.finditer(
+ r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
+ webpage)
+
+ if self._downloader.params.get('noplaylist', False):
+ entry = next(
+ (self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
+ None)
+ self.to_screen('Downloading just the main video because of --no-playlist')
+ if entry:
+ return entry
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
+
+ def check_video(m):
+ v_ref = self._search_regex(
+ r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
+ webpage, 'check id', default='')
+ v_ref = extract_attributes(v_ref)
+ return v_ref.get('data-target-video-type') != 'novideo'
+
+ return self.playlist_from_matches(
+ (m.group('url') for m in matches if check_video(m)),
+ channel_id, self._og_search_title(webpage, fatal=False))