aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTeemu Ikonen <tpikonen@gmail.com>2022-04-05 15:15:47 +0300
committerGitHub <noreply@github.com>2022-04-05 05:15:47 -0700
commit0a8a7e68fabf6fc9387f270301e51225ac349b00 (patch)
tree1cd67400938971147d678a7edd6644375fa816e4
parentf4d706a931bdf2534c23353b5843d3220efe6f89 (diff)
downloadhypervideo-pre-0a8a7e68fabf6fc9387f270301e51225ac349b00.tar.lz
hypervideo-pre-0a8a7e68fabf6fc9387f270301e51225ac349b00.tar.xz
hypervideo-pre-0a8a7e68fabf6fc9387f270301e51225ac349b00.zip
[ruutu] Detect embeds (#3294)
Authored by: tpikonen
-rw-r--r--yt_dlp/extractor/generic.py26
-rw-r--r--yt_dlp/extractor/ruutu.py15
2 files changed, 40 insertions, 1 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 65e803dd7..2c503e581 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -149,6 +149,7 @@ from .blogger import BloggerIE
from .mainstreaming import MainStreamingIE
from .gfycat import GfycatIE
from .panopto import PanoptoBaseIE
+from .ruutu import RuutuIE
class GenericIE(InfoExtractor):
@@ -2511,7 +2512,24 @@ class GenericIE(InfoExtractor):
'id': 'insert-a-quiz-into-a-panopto-video'
},
'playlist_count': 1
- }
+ },
+ {
+ # Ruutu embed
+ 'url': 'https://www.nelonen.fi/ohjelmat/madventures-suomi/2160731-riku-ja-tunna-lahtevat-peurajahtiin-tv-sta-tutun-biologin-kanssa---metsastysreissu-huipentuu-kasvissyojan-painajaiseen',
+ 'md5': 'a2513a98d3496099e6eced40f7e6a14b',
+ 'info_dict': {
+ 'id': '4044426',
+ 'ext': 'mp4',
+ 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!',
+ 'thumbnail': r're:^https?://.+\.jpg$',
+ 'duration': 108,
+ 'series' : 'Madventures Suomi',
+ 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381',
+ 'categories': ['Matkailu', 'Elämäntyyli'],
+ 'age_limit': 0,
+ 'upload_date': '20220308',
+ },
+ },
]
def report_following_redirect(self, new_url):
@@ -3737,6 +3755,12 @@ class GenericIE(InfoExtractor):
panopto_urls = PanoptoBaseIE._extract_urls(webpage)
if panopto_urls:
return self.playlist_from_matches(panopto_urls, video_id, video_title)
+
+ # Look for Ruutu embeds
+ ruutu_url = RuutuIE._extract_url(webpage)
+ if ruutu_url:
+ return self.url_result(ruutu_url, RuutuIE)
+
# Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries:
diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py
index d9cf39d71..5a30e3360 100644
--- a/yt_dlp/extractor/ruutu.py
+++ b/yt_dlp/extractor/ruutu.py
@@ -1,6 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
+import json
+import re
+
from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
@@ -8,6 +11,8 @@ from ..utils import (
ExtractorError,
find_xpath_attr,
int_or_none,
+ traverse_obj,
+ try_call,
unified_strdate,
url_or_none,
xpath_attr,
@@ -123,6 +128,16 @@ class RuutuIE(InfoExtractor):
]
_API_BASE = 'https://gatling.nelonenmedia.fi'
+ @classmethod
+ def _extract_url(cls, webpage):
+ settings = try_call(
+ lambda: json.loads(re.search(
+ r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False))
+ video_id = traverse_obj(settings, (
+ 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value'))
+ if video_id:
+ return f'http://www.ruutu.fi/video/{video_id}'
+
def _real_extract(self, url):
video_id = self._match_id(url)