diff options
Diffstat (limited to 'hypervideo_dl/extractor/tubitv.py')
-rw-r--r-- | hypervideo_dl/extractor/tubitv.py | 43 |
1 files changed, 39 insertions, 4 deletions
diff --git a/hypervideo_dl/extractor/tubitv.py b/hypervideo_dl/extractor/tubitv.py index ebfb05c..2e9b325 100644 --- a/hypervideo_dl/extractor/tubitv.py +++ b/hypervideo_dl/extractor/tubitv.py @@ -7,13 +7,19 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + js_to_json, sanitized_Request, urlencode_postdata, ) class TubiTvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)' + _VALID_URL = r'''(?x) + (?: + tubitv:| + https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/ + ) + (?P<id>[0-9]+)''' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' _GEO_COUNTRIES = ['US'] @@ -75,9 +81,13 @@ class TubiTvIE(InfoExtractor): 'http://tubitv.com/oz/videos/%s/content' % video_id, video_id) title = video_data['title'] - formats = self._extract_m3u8_formats( - self._proto_relative_url(video_data['url']), - video_id, 'mp4', 'm3u8_native') + formats = [] + url = video_data['url'] + # URL can be sometimes empty. Does this only happen when there is DRM? + if url: + formats = self._extract_m3u8_formats( + self._proto_relative_url(url), + video_id, 'mp4', 'm3u8_native') self._sort_formats(formats) thumbnails = [] @@ -108,3 +118,28 @@ class TubiTvIE(InfoExtractor): 'uploader_id': video_data.get('publisher_id'), 'release_year': int_or_none(video_data.get('year')), } + + +class TubiTvShowIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)' + _TESTS = [{ + 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true', + 'playlist_mincount': 390, + 'info_dict': { + 'id': 'the-joy-of-painting-with-bob-ross', + } + }] + + def _entries(self, show_url, show_name): + show_webpage = self._download_webpage(show_url, show_name) + show_json = self._parse_json(self._search_regex( + r"window\.__data\s*=\s*({.+?});\s*</script>", + show_webpage, 'data',), show_name, transform_source=js_to_json)['video'] + for episode_id in show_json['fullContentById'].keys(): + yield self.url_result( + 'tubitv:%s' % episode_id, + ie=TubiTvIE.ie_key(), video_id=episode_id) + + def _real_extract(self, url): + show_name = self._match_valid_url(url).group('show_name') + return self.playlist_result(self._entries(url, show_name), playlist_id=show_name) |