aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThe Hatsune Daishi <nao20010128@gmail.com>2021-08-04 18:14:37 +0900
committerGitHub <noreply@github.com>2021-08-04 14:44:37 +0530
commit9997eee4af137ab46fc00336b1d4b7d7eb6431d4 (patch)
tree4bbb79b54e01d7a7e35cf61a3fc6e625b107dc54
parent3e376d183ede2d9d24a14e4d5afee7a64679cca0 (diff)
downloadhypervideo-pre-9997eee4af137ab46fc00336b1d4b7d7eb6431d4.tar.lz
hypervideo-pre-9997eee4af137ab46fc00336b1d4b7d7eb6431d4.tar.xz
hypervideo-pre-9997eee4af137ab46fc00336b1d4b7d7eb6431d4.zip
[openrec] Add extractors (#624)
Authored by: nao20010128nao
-rw-r--r--README.md2
-rw-r--r--yt_dlp/extractor/extractors.py4
-rw-r--r--yt_dlp/extractor/openrec.py126
3 files changed, 131 insertions, 1 deletions
diff --git a/README.md b/README.md
index 7fb2c0874..9ac50dbe0 100644
--- a/README.md
+++ b/README.md
@@ -88,7 +88,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/
* **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats
-* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon
+* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec
* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index e12121073..99fd7cb78 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -936,6 +936,10 @@ from .ooyala import (
OoyalaIE,
OoyalaExternalIE,
)
+from .openrec import (
+ OpenRecIE,
+ OpenRecCaptureIE,
+)
from .ora import OraTVIE
from .orf import (
ORFTVthekIE,
diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py
new file mode 100644
index 000000000..d7073ab44
--- /dev/null
+++ b/yt_dlp/extractor/openrec.py
@@ -0,0 +1,126 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ traverse_obj,
+ try_get,
+ unified_strdate
+)
+from ..compat import compat_str
+
+
+class OpenRecIE(InfoExtractor):
+ IE_NAME = 'openrec'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/live/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/live/2p8v31qe4zy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.openrec.tv/live/wez93eqvjzl',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://www.openrec.tv/live/%s' % video_id, video_id)
+
+ window_stores = self._parse_json(
+ self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
+ movie_store = traverse_obj(
+ window_stores,
+ ('v8', 'state', 'movie'),
+ ('v8', 'movie'),
+ expected_type=dict)
+ if not movie_store:
+ raise ExtractorError('Failed to extract live info')
+
+ title = movie_store.get('title')
+ description = movie_store.get('introduction')
+ thumbnail = movie_store.get('thumbnailUrl')
+
+ channel_user = movie_store.get('channel', {}).get('user')
+ uploader = try_get(channel_user, lambda x: x['name'], compat_str)
+ uploader_id = try_get(channel_user, lambda x: x['id'], compat_str)
+
+ timestamp = traverse_obj(movie_store, ('startedAt', 'time'), expected_type=int)
+
+ m3u8_playlists = movie_store.get('media')
+ formats = []
+ for (name, m3u8_url) in m3u8_playlists.items():
+ if not m3u8_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8',
+ m3u8_id='hls-%s' % name, live=True))
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': timestamp,
+ 'is_live': True,
+ }
+
+
+class OpenRecCaptureIE(InfoExtractor):
+ IE_NAME = 'openrec:capture'
+ _VALID_URL = r'https?://(?:www\.)?openrec\.tv/capture/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.openrec.tv/capture/l9nk2x4gn14',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.openrec.tv/capture/mldjr82p7qk',
+ 'info_dict': {
+ 'id': 'mldjr82p7qk',
+ 'title': 'たいじの恥ずかしい英語力',
+ 'uploader': 'たいちゃんねる',
+ 'uploader_id': 'Yaritaiji',
+ 'upload_date': '20210803',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage('https://www.openrec.tv/capture/%s' % video_id, video_id)
+
+ window_stores = self._parse_json(
+ self._search_regex(r'(?m)window\.pageStore\s*=\s*(\{.+?\});$', webpage, 'window.pageStore'), video_id)
+ movie_store = window_stores.get('movie')
+
+ capture_data = window_stores.get('capture')
+ if not capture_data:
+ raise ExtractorError('Cannot extract title')
+ title = capture_data.get('title')
+ thumbnail = capture_data.get('thumbnailUrl')
+ upload_date = unified_strdate(capture_data.get('createdAt'))
+
+ channel_info = movie_store.get('channel') or {}
+ uploader = channel_info.get('name')
+ uploader_id = channel_info.get('id')
+
+ m3u8_url = capture_data.get('source')
+ if not m3u8_url:
+ raise ExtractorError('Cannot extract m3u8 url')
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'upload_date': upload_date,
+ }