aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorUnknown <blackjack4494@web.de>2020-09-13 01:04:02 +0200
committerUnknown <blackjack4494@web.de>2020-09-13 01:04:02 +0200
commitd23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e (patch)
tree9ba92d0f89b072450e5ce0233c7427946e90c4fa
parentdc6193cb220353a26218da2f690f11bca55c9757 (diff)
downloadhypervideo-pre-d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e.tar.lz
hypervideo-pre-d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e.tar.xz
hypervideo-pre-d23f9ec7ebdb11cfa3bb3b3a9eb388a6363a878e.zip
[gdcvault] fix extractor
at least when not logged in?
-rw-r--r--youtube_dlc/extractor/gdcvault.py73
1 files changed, 22 insertions, 51 deletions
diff --git a/youtube_dlc/extractor/gdcvault.py b/youtube_dlc/extractor/gdcvault.py
index 2f555c1d4..1220be2d9 100644
--- a/youtube_dlc/extractor/gdcvault.py
+++ b/youtube_dlc/extractor/gdcvault.py
@@ -122,67 +122,38 @@ class GDCVaultIE(InfoExtractor):
request = sanitized_Request(login_url, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
self._download_webpage(request, display_id, 'Logging in')
- start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
+ webpage = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
self._download_webpage(logout_url, display_id, 'Logging out')
- return start_page
+ return webpage
def _real_extract(self, url):
video_id, name = re.match(self._VALID_URL, url).groups()
display_id = name or video_id
- webpage_url = 'http://www.gdcvault.com/play/' + video_id
- start_page = self._download_webpage(webpage_url, display_id)
-
- direct_url = self._search_regex(
- r's1\.addVariable\("file",\s*encodeURIComponent\("(/[^"]+)"\)\);',
- start_page, 'url', default=None)
- if direct_url:
- title = self._html_search_regex(
- r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
- start_page, 'title')
- video_url = 'http://www.gdcvault.com' + direct_url
- # resolve the url so that we can detect the correct extension
- video_url = self._request_webpage(
- HEADRequest(video_url), video_id).geturl()
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'url': video_url,
- 'title': title,
- }
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_regex(
+ r'<td><strong>Session Name:?</strong></td>\s*<td>(.*?)</td>',
+ webpage, 'title')
+
+ PLAYER_REGEX = r'<iframe src=\"(?P<manifest_url>.*?)\".*?</iframe>'
+ manifest_url = self._html_search_regex(
+ PLAYER_REGEX, webpage, 'manifest_url')
- embed_url = KalturaIE._extract_url(start_page)
- if embed_url:
- embed_url = smuggle_url(embed_url, {'source_url': url})
- ie_key = 'Kaltura'
- else:
- PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
-
- xml_root = self._html_search_regex(
- PLAYER_REGEX, start_page, 'xml root', default=None)
- if xml_root is None:
- # Probably need to authenticate
- login_res = self._login(webpage_url, display_id)
- if login_res is None:
- self.report_warning('Could not login.')
- else:
- start_page = login_res
- # Grab the url from the authenticated page
- xml_root = self._html_search_regex(
- PLAYER_REGEX, start_page, 'xml root')
-
- xml_name = self._html_search_regex(
- r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
- start_page, 'xml filename')
- embed_url = '%s/xml/%s' % (xml_root, xml_name)
- ie_key = 'DigitallySpeaking'
+ partner_id = self._search_regex(
+ r'/p(?:artner_id)?/(\d+)', manifest_url, 'partner id',
+ default='1670711')
+
+ kaltura_id = self._search_regex(
+ r'entry_id=(?P<id>(?:[^&])+)', manifest_url,
+ 'kaltura id', group='id')
return {
'_type': 'url_transparent',
+ 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
+ 'ie_key': KalturaIE.ie_key(),
'id': video_id,
'display_id': display_id,
- 'url': embed_url,
- 'ie_key': ie_key,
- }
+ 'title': title,
+ } \ No newline at end of file