aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/rtlnl.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/rtlnl.py')
-rw-r--r--hypervideo_dl/extractor/rtlnl.py156
1 files changed, 152 insertions, 4 deletions
diff --git a/hypervideo_dl/extractor/rtlnl.py b/hypervideo_dl/extractor/rtlnl.py
index 9eaa06f..724cb64 100644
--- a/hypervideo_dl/extractor/rtlnl.py
+++ b/hypervideo_dl/extractor/rtlnl.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
from .common import InfoExtractor
from ..utils import (
int_or_none,
@@ -11,6 +8,7 @@ from ..utils import (
class RtlNlIE(InfoExtractor):
IE_NAME = 'rtl.nl'
IE_DESC = 'rtl.nl and rtlxl.nl'
+ _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)']
_VALID_URL = r'''(?x)
https?://(?:(?:www|static)\.)?
(?:
@@ -118,7 +116,6 @@ class RtlNlIE(InfoExtractor):
formats = self._extract_m3u8_formats(
m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False)
- self._sort_formats(formats)
thumbnails = []
@@ -144,3 +141,154 @@ class RtlNlIE(InfoExtractor):
'duration': parse_duration(material.get('duration')),
'thumbnails': thumbnails,
}
+
+
+class RTLLuBaseIE(InfoExtractor):
+ _MEDIA_REGEX = {
+ 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)',
+ 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)',
+ 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)',
+ }
+
+ def get_media_url(self, webpage, video_id, media_type):
+ return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None)
+
+ def get_formats_and_subtitles(self, webpage, video_id):
+ video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio')
+
+ formats, subtitles = [], {}
+ if video_url is not None:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id)
+ if audio_url is not None:
+ formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'})
+
+ return formats, subtitles
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ is_live = video_id in ('live', 'live-2', 'lauschteren')
+
+ # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id>
+ # we can context from <rtl-comments context=<context> in webpage
+ webpage = self._download_webpage(url, video_id)
+
+ formats, subtitles = self.get_formats_and_subtitles(webpage, video_id)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage, default=None),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None),
+ 'is_live': is_live,
+ }
+
+
+class RTLLuTeleVODIE(RTLLuBaseIE):
+ IE_NAME = 'rtl.lu:tele-vod'
+ _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?'
+ _TESTS = [{
+ 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html',
+ 'info_dict': {
+ 'id': '3266757',
+ 'title': 'Informatiounsversammlung Héichwaasser',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg',
+ 'description': 'md5:b1db974408cc858c9fd241812e4a2a14',
+ }
+ }, {
+ 'url': 'https://www.rtl.lu/video/3295215',
+ 'info_dict': {
+ 'id': '3295215',
+ 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht',
+ 'ext': 'mp4',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg',
+ 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b',
+ }
+ }]
+
+
+class RTLLuArticleIE(RTLLuBaseIE):
+ IE_NAME = 'rtl.lu:article'
+ _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html'
+ _TESTS = [{
+ # Audio-only
+ 'url': 'https://www.rtl.lu/sport/news/a/1934360.html',
+ 'info_dict': {
+ 'id': '1934360',
+ 'ext': 'mp3',
+ 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg',
+ 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7',
+ 'title': 'md5:40aa85f135578fbd549d3c9370321f99',
+ }
+ }, {
+ # 5minutes
+ 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html',
+ 'info_dict': {
+ 'id': '1853173',
+ 'ext': 'mp4',
+ 'description': 'md5:ac031da0740e997a5cf4633173634fee',
+ 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg',
+ }
+ }, {
+ # today.lu
+ 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html',
+ 'info_dict': {
+ 'id': '1936203',
+ 'ext': 'mp4',
+ 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower',
+ 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...',
+ 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg',
+ }
+ }]
+
+
+class RTLLuLiveIE(RTLLuBaseIE):
+ _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)'
+ _TESTS = [{
+ # Tele:live
+ 'url': 'https://www.rtl.lu/tele/live',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg',
+ }
+ }, {
+ # Tele:live-2
+ 'url': 'https://www.rtl.lu/tele/live-2',
+ 'info_dict': {
+ 'id': 'live-2',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg',
+ }
+ }, {
+ # Radio:lauschteren
+ 'url': 'https://www.rtl.lu/radio/lauschteren',
+ 'info_dict': {
+ 'id': 'lauschteren',
+ 'ext': 'mp4',
+ 'live_status': 'is_live',
+ 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
+ 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg',
+ }
+ }]
+
+
+class RTLLuRadioIE(RTLLuBaseIE):
+ _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?'
+ _TESTS = [{
+ 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html',
+ 'info_dict': {
+ 'id': '4033058',
+ 'ext': 'mp3',
+ 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9',
+ 'title': '5 vir 12 - Stau um Stau',
+ 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg',
+ }
+ }]