diff options
author | Unknown <blackjack4494@web.de> | 2020-09-13 01:04:02 +0200 |
---|---|---|
committer | Unknown <blackjack4494@web.de> | 2020-09-13 01:04:02 +0200 |
commit | d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e (patch) | |
tree | 9ba92d0f89b072450e5ce0233c7427946e90c4fa | |
parent | dc6193cb220353a26218da2f690f11bca55c9757 (diff) | |
download | hypervideo-pre-d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e.tar.lz hypervideo-pre-d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e.tar.xz hypervideo-pre-d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e.zip |
[gdcvault] fix extractor
at least when not logged in?
-rw-r--r-- | youtube_dlc/extractor/gdcvault.py | 73 |
1 files changed, 22 insertions, 51 deletions
diff --git a/youtube_dlc/extractor/gdcvault.py b/youtube_dlc/extractor/gdcvault.py index 2f555c1d4..1220be2d9 100644 --- a/youtube_dlc/extractor/gdcvault.py +++ b/youtube_dlc/extractor/gdcvault.py @@ -122,67 +122,38 @@ class GDCVaultIE(InfoExtractor): request = sanitized_Request(login_url, urlencode_postdata(login_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(request, display_id, 'Logging in') - start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') + webpage = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') self._download_webpage(logout_url, display_id, 'Logging out') - return start_page + return webpage def _real_extract(self, url): video_id, name = re.match(self._VALID_URL, url).groups() display_id = name or video_id - webpage_url = 'http://www.gdcvault.com/play/' + video_id - start_page = self._download_webpage(webpage_url, display_id) - - direct_url = self._search_regex( - r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);', - start_page, 'url', default=None) - if direct_url: - title = self._html_search_regex( - r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>', - start_page, 'title') - video_url = 'http://www.gdcvault.com' + direct_url - # resolve the url so that we can detect the correct extension - video_url = self._request_webpage( - HEADRequest(video_url), video_id).geturl() - - return { - 'id': video_id, - 'display_id': display_id, - 'url': video_url, - 'title': title, - } + webpage = self._download_webpage(url, display_id) + + title = self._html_search_regex( + r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>', + webpage, 'title') + + PLAYER_REGEX = r'<iframe src=\"(?P<manifest_url>.*?)\".*?</iframe>' + manifest_url = self._html_search_regex( + PLAYER_REGEX, webpage, 'manifest_url') - embed_url = KalturaIE._extract_url(start_page) - if embed_url: - embed_url = smuggle_url(embed_url, {'source_url': url}) - ie_key = 'Kaltura' - else: - PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>' - - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root', default=None) - if xml_root is None: - # Probably need to authenticate - login_res = self._login(webpage_url, display_id) - if login_res is None: - self.report_warning('Could not login.') - else: - start_page = login_res - # Grab the url from the authenticated page - xml_root = self._html_search_regex( - PLAYER_REGEX, start_page, 'xml root') - - xml_name = self._html_search_regex( - r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>', - start_page, 'xml filename') - embed_url = '%s/xml/%s' % (xml_root, xml_name) - ie_key = 'DigitallySpeaking' + partner_id = self._search_regex( + r'/p(?:artner_id)?/(\d+)', manifest_url, 'partner id', + default='1670711') + + kaltura_id = self._search_regex( + r'entry_id=(?P<id>(?:[^&])+)', manifest_url, + 'kaltura id', group='id') return { '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id), + 'ie_key': KalturaIE.ie_key(), 'id': video_id, 'display_id': display_id, - 'url': embed_url, - 'ie_key': ie_key, - } + 'title': title, + }
\ No newline at end of file |