diff options
| author | Sonic <47434066+TwoThousandHedgehogs@users.noreply.github.com> | 2021-12-24 06:49:33 -0500 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-12-24 17:19:33 +0530 | 
| commit | e9efb99f66b57a2e25937dff3dd834b5e6c381e3 (patch) | |
| tree | 13f19774706211b11e3fdf8031dfa0dcf7e98db3 | |
| parent | a709d87335fb33be0e94a6292f5c6f094b266541 (diff) | |
| download | hypervideo-pre-e9efb99f66b57a2e25937dff3dd834b5e6c381e3.tar.lz hypervideo-pre-e9efb99f66b57a2e25937dff3dd834b5e6c381e3.tar.xz hypervideo-pre-e9efb99f66b57a2e25937dff3dd834b5e6c381e3.zip | |
[dropout] Add extractor (#2045)
Authored-by: TwoThousandHedgehogs, pukkandan
| -rw-r--r-- | yt_dlp/extractor/dropout.py | 212 | ||||
| -rw-r--r-- | yt_dlp/extractor/extractors.py | 4 | 
2 files changed, 216 insertions, 0 deletions
| diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py new file mode 100644 index 000000000..a7442d8f0 --- /dev/null +++ b/yt_dlp/extractor/dropout.py @@ -0,0 +1,212 @@ +# coding: utf-8 +from .common import InfoExtractor +from .vimeo import VHXEmbedIE +from ..utils import ( +    clean_html, +    ExtractorError, +    get_element_by_class, +    get_element_by_id, +    get_elements_by_class, +    int_or_none, +    join_nonempty, +    unified_strdate, +    urlencode_postdata, +) + + +class DropoutIE(InfoExtractor): +    _LOGIN_URL = 'https://www.dropout.tv/login' +    _NETRC_MACHINE = 'dropout' + +    _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?:[^/]+/)*videos/(?P<id>[^/]+)/?$' +    _TESTS = [ +        { +            'url': 'https://www.dropout.tv/game-changer/season:2/videos/yes-or-no', +            'note': 'Episode in a series', +            'md5': '5e000fdfd8d8fa46ff40456f1c2af04a', +            'info_dict': { +                'id': '738153', +                'display_id': 'yes-or-no', +                'ext': 'mp4', +                'title': 'Yes or No', +                'description': 'Ally, Brennan, and Zac are asked a simple question, but is there a correct answer?', +                'release_date': '20200508', +                'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/351e3f24-c4a3-459a-8b79-dc80f1e5b7fd.jpg', +                'series': 'Game Changer', +                'season_number': 2, +                'season': 'Season 2', +                'episode_number': 6, +                'episode': 'Yes or No', +                'duration': 1180, +                'uploader_id': 'user80538407', +                'uploader_url': 'https://vimeo.com/user80538407', +                'uploader': 'OTT Videos' +            }, +            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] +        }, +        { +            'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1/videos/episode-1', +            'note': 'Episode in a series (missing release_date)', +            'md5': '712caf7c191f1c47c8f1879520c2fa5c', +            'info_dict': { +                'id': '320562', +                'display_id': 'episode-1', +                'ext': 'mp4', +                'title': 'The Beginning Begins', +                'description': 'The cast introduces their PCs, including a neurotic elf, a goblin PI, and a corn-worshipping cleric.', +                'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/4421ed0d-f630-4c88-9004-5251b2b8adfa.jpg', +                'series': 'Dimension 20: Fantasy High', +                'season_number': 1, +                'season': 'Season 1', +                'episode_number': 1, +                'episode': 'The Beginning Begins', +                'duration': 6838, +                'uploader_id': 'user80538407', +                'uploader_url': 'https://vimeo.com/user80538407', +                'uploader': 'OTT Videos' +            }, +            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] +        }, +        { +            'url': 'https://www.dropout.tv/videos/misfits-magic-holiday-special', +            'note': 'Episode not in a series', +            'md5': 'c30fa18999c5880d156339f13c953a26', +            'info_dict': { +                'id': '1915774', +                'display_id': 'misfits-magic-holiday-special', +                'ext': 'mp4', +                'title': 'Misfits & Magic Holiday Special', +                'description': 'The magical misfits spend Christmas break at Gowpenny, with an unwelcome visitor.', +                'release_date': '20211215', +                'thumbnail': 'https://vhx.imgix.net/chuncensoredstaging/assets/d91ea8a6-b250-42ed-907e-b30fb1c65176-8e24b8e5.jpg', +                'duration': 11698, +                'uploader_id': 'user80538407', +                'uploader_url': 'https://vimeo.com/user80538407', +                'uploader': 'OTT Videos' +            }, +            'expected_warnings': ['Ignoring subtitle tracks found in the HLS manifest'] +        } +    ] + +    def _get_authenticity_token(self, display_id): +        signin_page = self._download_webpage( +            self._LOGIN_URL, display_id, note='Getting authenticity token') +        return self._html_search_regex( +            r'name=["\']authenticity_token["\'] value=["\'](.+?)["\']', +            signin_page, 'authenticity_token') + +    def _login(self, display_id): +        username, password = self._get_login_info() +        if not (username and password): +            self.raise_login_required(method='password') + +        response = self._download_webpage( +            self._LOGIN_URL, display_id, note='Logging in', data=urlencode_postdata({ +                'email': username, +                'password': password, +                'authenticity_token': self._get_authenticity_token(display_id), +                'utf8': True +            })) + +        user_has_subscription = self._search_regex( +            r'user_has_subscription:\s*["\'](.+?)["\']', response, 'subscription status', default='none') +        if user_has_subscription.lower() == 'true': +            return response +        elif user_has_subscription.lower() == 'false': +            raise ExtractorError('Account is not subscribed') +        else: +            raise ExtractorError('Incorrect username/password') + +    def _real_extract(self, url): +        display_id = self._match_id(url) +        try: +            self._login(display_id) +            webpage = self._download_webpage(url, display_id, note='Downloading video webpage') +        finally: +            self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out') + +        embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') +        thumbnail = self._og_search_thumbnail(webpage) +        watch_info = get_element_by_id('watch-info', webpage) or '' + +        title = clean_html(get_element_by_class('video-title', watch_info)) +        season_episode = get_element_by_class( +            'site-font-secondary-color', get_element_by_class('text', watch_info)) +        episode_number = int_or_none(self._search_regex( +            r'Episode (\d+)', season_episode or '', 'episode', default=None)) + +        return { +            '_type': 'url_transparent', +            'ie_key': VHXEmbedIE.ie_key(), +            'url': embed_url, +            'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'), +            'display_id': display_id, +            'title': title, +            'description': self._html_search_meta('description', webpage, fatal=False), +            'thumbnail': thumbnail.split('?')[0] if thumbnail else None,  # Ignore crop/downscale +            'series': clean_html(get_element_by_class('series-title', watch_info)), +            'episode_number': episode_number, +            'episode': title if episode_number else None, +            'season_number': int_or_none(self._search_regex( +                r'Season (\d+),', season_episode or '', 'season', default=None)), +            'release_date': unified_strdate(self._search_regex( +                r'data-meta-field-name=["\']release_dates["\'] data-meta-field-value=["\'](.+?)["\']', +                watch_info, 'release date', default=None)), +        } + + +class DropoutSeasonIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?dropout\.tv/(?P<id>[^\/$&?#]+)(?:/?$|/season:[0-9]+/?$)' +    _TESTS = [ +        { +            'url': 'https://www.dropout.tv/dimension-20-fantasy-high/season:1', +            'note': 'Multi-season series with the season in the url', +            'playlist_count': 17, +            'info_dict': { +                'id': 'dimension-20-fantasy-high-season-1', +                'title': 'Dimension 20 Fantasy High - Season 1' +            } +        }, +        { +            'url': 'https://www.dropout.tv/dimension-20-fantasy-high', +            'note': 'Multi-season series with the season not in the url', +            'playlist_count': 17, +            'info_dict': { +                'id': 'dimension-20-fantasy-high-season-1', +                'title': 'Dimension 20 Fantasy High - Season 1' +            } +        }, +        { +            'url': 'https://www.dropout.tv/dimension-20-shriek-week', +            'note': 'Single-season series', +            'playlist_count': 4, +            'info_dict': { +                'id': 'dimension-20-shriek-week-season-1', +                'title': 'Dimension 20 Shriek Week - Season 1' +            } +        } +    ] + +    def _real_extract(self, url): +        season_id = self._match_id(url) +        season_title = season_id.replace('-', ' ').title() +        webpage = self._download_webpage(url, season_id) + +        entries = [ +            self.url_result( +                url=self._search_regex(r'<a href=["\'](.+?)["\'] class=["\']browse-item-link["\']', +                                       item, 'item_url'), +                ie=DropoutIE.ie_key() +            ) for item in get_elements_by_class('js-collection-item', webpage) +        ] + +        seasons = (get_element_by_class('select-dropdown-wrapper', webpage) or '').strip().replace('\n', '') +        current_season = self._search_regex(r'<option[^>]+selected>([^<]+)</option>', +                                            seasons, 'current_season', default='').strip() + +        return { +            '_type': 'playlist', +            'id': join_nonempty(season_id, current_season.lower().replace(' ', '-')), +            'title': join_nonempty(season_title, current_season, delim=' - '), +            'entries': entries +        } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0b359a253..2a907bc57 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -385,6 +385,10 @@ from .disney import DisneyIE  from .dispeak import DigitallySpeakingIE  from .doodstream import DoodStreamIE  from .dropbox import DropboxIE +from .dropout import ( +    DropoutSeasonIE, +    DropoutIE +)  from .dw import (      DWIE,      DWArticleIE, | 
