aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/videocampus_sachsen.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/videocampus_sachsen.py')
-rw-r--r--hypervideo_dl/extractor/videocampus_sachsen.py237
1 files changed, 197 insertions, 40 deletions
diff --git a/hypervideo_dl/extractor/videocampus_sachsen.py b/hypervideo_dl/extractor/videocampus_sachsen.py
index 96e9857..982ab3d 100644
--- a/hypervideo_dl/extractor/videocampus_sachsen.py
+++ b/hypervideo_dl/extractor/videocampus_sachsen.py
@@ -1,12 +1,80 @@
-# coding: utf-8
+import functools
+import re
+
from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
class VideocampusSachsenIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?:
+ IE_NAME = 'ViMP'
+ _INSTANCES = (
+ 'bergauf.tv',
+ 'campus.demo.vimp.com',
+ 'corporate.demo.vimp.com',
+ 'dancehalldatabase.com',
+ 'drehzahl.tv',
+ 'educhannel.hs-gesundheit.de',
+ 'emedia.ls.haw-hamburg.de',
+ 'globale-evolution.net',
+ 'hohu.tv',
+ 'htvideos.hightechhigh.org',
+ 'k210039.vimp.mivitec.net',
+ 'media.cmslegal.com',
+ 'media.hs-furtwangen.de',
+ 'media.hwr-berlin.de',
+ 'mediathek.dkfz.de',
+ 'mediathek.htw-berlin.de',
+ 'mediathek.polizei-bw.de',
+ 'medien.hs-merseburg.de',
+ 'mportal.europa-uni.de',
+ 'pacific.demo.vimp.com',
+ 'slctv.com',
+ 'streaming.prairiesouth.ca',
+ 'tube.isbonline.cn',
+ 'univideo.uni-kassel.de',
+ 'ursula2.genetics.emory.edu',
+ 'ursulablicklevideoarchiv.com',
+ 'v.agrarumweltpaedagogik.at',
+ 'video.eplay-tv.de',
+ 'video.fh-dortmund.de',
+ 'video.hs-offenburg.de',
+ 'video.hs-pforzheim.de',
+ 'video.hspv.nrw.de',
+ 'video.irtshdf.fr',
+ 'video.pareygo.de',
+ 'video.tu-freiberg.de',
+ 'videocampus.sachsen.de',
+ 'videoportal.uni-freiburg.de',
+ 'videoportal.vm.uni-freiburg.de',
+ 'videos.duoc.cl',
+ 'videos.uni-paderborn.de',
+ 'vimp-bemus.udk-berlin.de',
+ 'vimp.aekwl.de',
+ 'vimp.hs-mittweida.de',
+ 'vimp.oth-regensburg.de',
+ 'vimp.ph-heidelberg.de',
+ 'vimp.sma-events.com',
+ 'vimp.weka-fachmedien.de',
+ 'webtv.univ-montp3.fr',
+ 'www.b-tu.de/media',
+ 'www.bergauf.tv',
+ 'www.bigcitytv.de',
+ 'www.cad-videos.de',
+ 'www.drehzahl.tv',
+ 'www.fh-bielefeld.de/medienportal',
+ 'www.hohu.tv',
+ 'www.orvovideo.com',
+ 'www.rwe.tv',
+ 'www.salzi.tv',
+ 'www.wenglor-media.com',
+ 'www2.univ-sba.dz',
+ )
+ _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
m/(?P<tmp_id>[0-9a-f]+)|
- (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})
- )'''
+ (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})|
+ media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?)
+ )''' % ('|'.join(map(re.escape, _INSTANCES)))
_TESTS = [
{
@@ -14,6 +82,8 @@ class VideocampusSachsenIE(InfoExtractor):
'info_dict': {
'id': 'e6b9349905c1628631f175712250f2a1',
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
'ext': 'mp4',
},
},
@@ -22,6 +92,8 @@ class VideocampusSachsenIE(InfoExtractor):
'info_dict': {
'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?',
+ 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
'display_id': 'Was-ist-selbstgesteuertes-Lernen',
'ext': 'mp4',
},
@@ -31,66 +103,151 @@ class VideocampusSachsenIE(InfoExtractor):
'info_dict': {
'id': '09d4ed029002eb1bdda610f1103dd54c',
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
+ 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
+ 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
'ext': 'mp4',
},
},
+ {
+ 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
+ 'info_dict': {
+ 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
+ 'id': '0183356e41af7bfb83d7667b20d9b6a3',
+ 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
+ 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
+ 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
+ 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
+ 'info_dict': {
+ 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
+ 'title': 'Preisverleihung »Produkte des Jahres 2022«',
+ 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
+ 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
+ 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
+ 'ext': 'mp4',
+ },
+ },
+ {
+ 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
+ 'info_dict': {
+ 'id': 'fc99c527e4205b121cb7c74433469262',
+ 'title': 'Was ist selbstgesteuertes Lernen?',
+ 'ext': 'mp4',
+ },
+ },
]
def _real_extract(self, url):
- video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id')
+ host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
+ 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
- if not tmp_id:
- video_id = self._html_search_regex(
- r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&',
+ if not video_id:
+ video_id = embed_id or self._html_search_regex(
+ rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
webpage, 'video_id')
- title = self._html_search_regex(
- (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')),
- webpage, 'title', group='content', fatal=False)
+ if not (display_id or tmp_id):
+ # Title, description from embedded page's meta wouldn't be correct
+ title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
+ description = None
+ thumbnail = None
+ else:
+ title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
+ description = self._html_search_meta(
+ ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
+ thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
+
+ formats, subtitles = [], {}
+ try:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
+ video_id, 'mp4', m3u8_id='hls', fatal=True)
+ except ExtractorError as e:
+ if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500):
+ raise
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
+ formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
return {
'id': video_id,
'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
'display_id': display_id,
'formats': formats,
- 'subtitles': subtitles
+ 'subtitles': subtitles,
}
-class VideocampusSachsenEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)'
+class ViMPPlaylistIE(InfoExtractor):
+ IE_NAME = 'ViMP:Playlist'
+ _VALID_URL = r'''(?x)(?P<host>https?://(?:%s))/(?:
+ album/view/aid/(?P<album_id>[0-9]+)|
+ (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
+ )''' % '|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES))
- _TESTS = [
- {
- 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
- 'info_dict': {
- 'id': 'fc99c527e4205b121cb7c74433469262',
- 'title': 'Was ist selbstgesteuertes Lernen?',
- 'ext': 'mp4',
- },
- }
- ]
+ _TESTS = [{
+ 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
+ 'info_dict': {
+ 'id': 'channel-3',
+ 'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
+ 'info_dict': {
+ 'id': 'album-208',
+ 'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
+ 'info_dict': {
+ 'id': 'category-91',
+ 'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
+ },
+ 'playlist_mincount': 7,
+ }]
+ _PAGE_SIZE = 10
+
+ def _fetch_page(self, host, url_part, id, data, page):
+ webpage = self._download_webpage(
+ f'{host}/media/ajax/component/boxList/{url_part}', id,
+ query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
+ urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
+
+ for url in urls:
+ yield self.url_result(host + url, VideocampusSachsenIE)
def _real_extract(self, url):
- video_id = self._match_id(url)
+ host, album_id, mode, name, id = self._match_valid_url(url).group(
+ 'host', 'album_id', 'mode', 'name', 'id')
- webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False)
- formats, subtitles = self._extract_m3u8_formats_and_subtitles(
- f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
+ webpage = self._download_webpage(url, album_id or id, fatal=False) or ''
+ title = (self._html_search_meta('title', webpage, fatal=False)
+ or self._html_extract_title(webpage))
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
+ url_part = (f'aid/{album_id}' if album_id
+ else f'category/{name}/category_id/{id}' if mode == 'category'
+ else f'title/{name}/channel/{id}')
+
+ mode = mode or 'album'
+ data = {
+ 'vars[mode]': mode,
+ f'vars[{mode}]': album_id or id,
+ 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
+ 'vars[context_id]': album_id or id,
+ 'vars[layout]': 'thumb',
+ 'vars[per_page][thumb]': str(self._PAGE_SIZE),
}
+
+ return self.playlist_result(
+ OnDemandPagedList(functools.partial(
+ self._fetch_page, host, url_part, album_id or id, data), self._PAGE_SIZE),
+ playlist_title=title, id=f'{mode}-{album_id or id}')