diff options
Diffstat (limited to 'hypervideo_dl/extractor/udemy.py')
-rw-r--r-- | hypervideo_dl/extractor/udemy.py | 35 |
1 files changed, 22 insertions, 13 deletions
diff --git a/hypervideo_dl/extractor/udemy.py b/hypervideo_dl/extractor/udemy.py index 4faad58..117acc7 100644 --- a/hypervideo_dl/extractor/udemy.py +++ b/hypervideo_dl/extractor/udemy.py @@ -1,8 +1,9 @@ import re -import urllib.request from .common import InfoExtractor -from ..compat import compat_HTTPError, compat_str, compat_urlparse +from ..compat import compat_str, compat_urlparse +from ..networking import Request +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, determine_ext, @@ -10,9 +11,10 @@ from ..utils import ( float_or_none, int_or_none, js_to_json, - sanitized_Request, + smuggle_url, try_get, unescapeHTML, + unsmuggle_url, url_or_none, urlencode_postdata, ) @@ -106,7 +108,7 @@ class UdemyIE(InfoExtractor): % (course_id, lecture_id), lecture_id, 'Downloading lecture JSON', query={ 'fields[lecture]': 'title,description,view_html,asset', - 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', + 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed', }) def _handle_error(self, response): @@ -151,11 +153,10 @@ class UdemyIE(InfoExtractor): headers['X-Udemy-Bearer-Token'] = cookie.value headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value - if isinstance(url_or_request, urllib.request.Request): - for header, value in headers.items(): - url_or_request.add_header(header, value) + if isinstance(url_or_request, Request): + url_or_request.headers.update(headers) else: - url_or_request = sanitized_Request(url_or_request, headers=headers) + url_or_request = Request(url_or_request, headers=headers) response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs) self._handle_error(response) @@ -199,16 +200,19 @@ class UdemyIE(InfoExtractor): def _real_extract(self, url): lecture_id = self._match_id(url) + course_id = unsmuggle_url(url, {})[1].get('course_id') - webpage = self._download_webpage(url, lecture_id) - - course_id, _ = self._extract_course_info(webpage, lecture_id) + webpage = None + if not course_id: + webpage = self._download_webpage(url, lecture_id) + course_id, _ = self._extract_course_info(webpage, lecture_id) try: lecture = self._download_lecture(course_id, lecture_id) except ExtractorError as e: # Error could possibly mean we are not enrolled in the course - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: + webpage = webpage or self._download_webpage(url, lecture_id) self._enroll_course(url, webpage, course_id) lecture = self._download_lecture(course_id, lecture_id) else: @@ -391,6 +395,9 @@ class UdemyIE(InfoExtractor): if f.get('url'): formats.append(f) + if not formats and asset.get('course_is_drmed'): + self.report_drm(video_id) + return { 'id': video_id, 'title': title, @@ -449,7 +456,9 @@ class UdemyCourseIE(UdemyIE): # XXX: Do not subclass from concrete IE if lecture_id: entry = { '_type': 'url_transparent', - 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']), + 'url': smuggle_url( + f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}', + {'course_id': course_id}), 'title': entry.get('title'), 'ie_key': UdemyIE.ie_key(), } |