diff options
Diffstat (limited to 'youtube_dlc/extractor/urplay.py')
-rw-r--r-- | youtube_dlc/extractor/urplay.py | 77 |
1 files changed, 51 insertions, 26 deletions
diff --git a/youtube_dlc/extractor/urplay.py b/youtube_dlc/extractor/urplay.py index 4bc2b78fb..2c41f78bd 100644 --- a/youtube_dlc/extractor/urplay.py +++ b/youtube_dlc/extractor/urplay.py @@ -2,8 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_timestamp -import re +from ..utils import ( + dict_get, + int_or_none, + unified_timestamp, +) class URPlayIE(InfoExtractor): @@ -14,7 +17,7 @@ class URPlayIE(InfoExtractor): 'info_dict': { 'id': '203704', 'ext': 'mp4', - 'title': 'Om vetenskap, kritiskt tänkande och motstånd', + 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', 'timestamp': 1513292400, 'upload_date': '20171214', @@ -26,7 +29,7 @@ class URPlayIE(InfoExtractor): 'ext': 'mp4', 'title': 'Tripp, Trapp, Träd : Sovkudde', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', - 'timestamp': 1440093600, + 'timestamp': 1440086400, 'upload_date': '20150820', }, }, { @@ -36,28 +39,27 @@ class URPlayIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - + url = url.replace('skola.se/Produkter', 'play.se/program') webpage = self._download_webpage(url, video_id) - urplayer_data = re.sub(""", "\"", self._search_regex( - r'components\/Player\/Player\" data-react-props=\"({.+?})\"', - webpage, 'urplayer data')) - urplayer_data = self._parse_json(urplayer_data, video_id) - for i in range(len(urplayer_data['accessibleEpisodes'])): - if urplayer_data.get('accessibleEpisodes', {})[i].get('id') == int(video_id): - urplayer_data = urplayer_data['accessibleEpisodes'][i] - break + urplayer_data = self._parse_json(self._html_search_regex( + r'data-react-class="components/Player/Player"[^>]+data-react-props="({.+?})"', + webpage, 'urplayer data'), video_id)['currentProduct'] + episode = urplayer_data['title'] host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] formats = [] - urplayer_streams = urplayer_data.get("streamingInfo") - for quality in ('sd'), ('hd'): - location = (urplayer_streams.get("raw", {}).get(quality, {}).get("location") - or urplayer_streams.get("sweComplete", {}).get(quality, {}).get("location")) - if location: + urplayer_streams = urplayer_data.get('streamingInfo', {}) + + for k, v in urplayer_streams.get('raw', {}).items(): + if not (k in ('sd', 'hd') and isinstance(v, dict)): + continue + file_http = v.get('location') + if file_http: formats.extend(self._extract_wowza_formats( - 'http://%s/%s/playlist.m3u8' % (host, location), video_id, - skip_protocols=['f4m', 'rtmp', 'rtsp'])) + 'http://%s/%splaylist.m3u8' % (host, file_http), + video_id, skip_protocols=['f4m', 'rtmp', 'rtsp'])) self._sort_formats(formats) + subtitles = {} subs = urplayer_streams.get("sweComplete", {}).get("tt", {}).get("location") if subs: @@ -65,14 +67,37 @@ class URPlayIE(InfoExtractor): 'url': subs, }) + image = urplayer_data.get('image') or {} + thumbnails = [] + for k, v in image.items(): + t = { + 'id': k, + 'url': v, + } + wh = k.split('x') + if len(wh) == 2: + t.update({ + 'width': int_or_none(wh[0]), + 'height': int_or_none(wh[1]), + }) + thumbnails.append(t) + + series = urplayer_data.get('series') or {} + series_title = dict_get(series, ('seriesTitle', 'title')) or dict_get(urplayer_data, ('seriesTitle', 'mainTitle')) + return { 'id': video_id, - 'title': urplayer_data['title'], - 'description': self._og_search_description(webpage), - 'thumbnail': urplayer_data.get('image', {}).get('1280x720'), - 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), - webpage, 'timestamp')), - 'series': urplayer_data.get('seriesTitle'), 'subtitles': subtitles, + 'title': '%s : %s' % (series_title, episode) if series_title else episode, + 'description': urplayer_data.get('description'), + 'thumbnails': thumbnails, + 'timestamp': unified_timestamp(urplayer_data.get('publishedAt')), + 'series': series_title, 'formats': formats, + 'duration': int_or_none(urplayer_data.get('duration')), + 'categories': urplayer_data.get('categories'), + 'tags': urplayer_data.get('keywords'), + 'season': series.get('label'), + 'episode': episode, + 'episode_number': int_or_none(urplayer_data.get('episodeNumber')), } |