diff options
author | Jesús <heckyel@hyperbola.info> | 2021-10-18 10:04:47 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-10-18 10:04:47 -0500 |
commit | cabfc066cb929d5496d984a0fde895a059dc0648 (patch) | |
tree | cc0ab44a1c6113c6116fa15e809204f98cee1e73 /youtube_dl/extractor/linuxacademy.py | |
parent | d98d94032e3b45d4a1dc21404e2a520964aadedd (diff) | |
download | hypervideo-pre-cabfc066cb929d5496d984a0fde895a059dc0648.tar.lz hypervideo-pre-cabfc066cb929d5496d984a0fde895a059dc0648.tar.xz hypervideo-pre-cabfc066cb929d5496d984a0fde895a059dc0648.zip |
update from upstream
Diffstat (limited to 'youtube_dl/extractor/linuxacademy.py')
-rw-r--r-- | youtube_dl/extractor/linuxacademy.py | 243 |
1 files changed, 0 insertions, 243 deletions
diff --git a/youtube_dl/extractor/linuxacademy.py b/youtube_dl/extractor/linuxacademy.py deleted file mode 100644 index 7ec4a6557..000000000 --- a/youtube_dl/extractor/linuxacademy.py +++ /dev/null @@ -1,243 +0,0 @@ -from __future__ import unicode_literals - -import json -import random -import re - -from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_HTTPError, - compat_str, -) -from ..utils import ( - clean_html, - ExtractorError, - js_to_json, - parse_duration, - try_get, - unified_timestamp, - urlencode_postdata, - urljoin, -) - - -class LinuxAcademyIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - (?:www\.)?linuxacademy\.com/cp/ - (?: - courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)| - modules/view/id/(?P<course_id>\d+) - ) - ''' - _TESTS = [{ - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675', - 'info_dict': { - 'id': '7971-2', - 'ext': 'mp4', - 'title': 'What Is Data Science', - 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', - 'timestamp': 1607387907, - 'upload_date': '20201208', - 'duration': 304, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Requires Linux Academy account credentials', - }, { - 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2', - 'only_matching': True, - }, { - 'url': 'https://linuxacademy.com/cp/modules/view/id/154', - 'info_dict': { - 'id': '154', - 'title': 'AWS Certified Cloud Practitioner', - 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c', - 'duration': 28835, - }, - 'playlist_count': 41, - 'skip': 'Requires Linux Academy account credentials', - }] - - _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' - _ORIGIN_URL = 'https://linuxacademy.com' - _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx' - _NETRC_MACHINE = 'linuxacademy' - - def _real_initialize(self): - self._login() - - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - def random_string(): - return ''.join([ - random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~') - for _ in range(32)]) - - webpage, urlh = self._download_webpage_handle( - self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ - 'client_id': self._CLIENT_ID, - 'response_type': 'token id_token', - 'response_mode': 'web_message', - 'redirect_uri': self._ORIGIN_URL, - 'scope': 'openid email user_impersonation profile', - 'audience': self._ORIGIN_URL, - 'state': random_string(), - 'nonce': random_string(), - }) - - login_data = self._parse_json( - self._search_regex( - r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, - 'login info', group='value'), None, - transform_source=lambda x: compat_b64decode(x).decode('utf-8') - )['extraParams'] - - login_data.update({ - 'client_id': self._CLIENT_ID, - 'redirect_uri': self._ORIGIN_URL, - 'tenant': 'lacausers', - 'connection': 'Username-Password-Authentication', - 'username': username, - 'password': password, - 'sso': 'true', - }) - - login_state_url = urlh.geturl() - - try: - login_page = self._download_webpage( - 'https://login.linuxacademy.com/usernamepassword/login', None, - 'Downloading login page', data=json.dumps(login_data).encode(), - headers={ - 'Content-Type': 'application/json', - 'Origin': 'https://login.linuxacademy.com', - 'Referer': login_state_url, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read(), None) - message = error.get('description') or error['code'] - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, message), expected=True) - raise - - callback_page, urlh = self._download_webpage_handle( - 'https://login.linuxacademy.com/login/callback', None, - 'Downloading callback page', - data=urlencode_postdata(self._hidden_inputs(login_page)), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Origin': 'https://login.linuxacademy.com', - 'Referer': login_state_url, - }) - - access_token = self._search_regex( - r'access_token=([^=&]+)', urlh.geturl(), - 'access token', default=None) - if not access_token: - access_token = self._parse_json( - self._search_regex( - r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page, - 'authorization response'), None, - transform_source=js_to_json)['response']['access_token'] - - self._download_webpage( - 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' - % access_token, None, 'Downloading token validation page') - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id') - item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id) - - webpage = self._download_webpage(url, item_id) - - # course path - if course_id: - module = self._parse_json( - self._search_regex( - r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'), - item_id) - entries = [] - chapter_number = None - chapter = None - chapter_id = None - for item in module['items']: - if not isinstance(item, dict): - continue - - def type_field(key): - return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower() - type_fields = (type_field('name'), type_field('slug')) - # Move to next module section - if 'section' in type_fields: - chapter = item.get('course_name') - chapter_id = item.get('course_module') - chapter_number = 1 if not chapter_number else chapter_number + 1 - continue - # Skip non-lessons - if 'lesson' not in type_fields: - continue - lesson_url = urljoin(url, item.get('url')) - if not lesson_url: - continue - title = item.get('title') or item.get('lesson_name') - description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text')) - entries.append({ - '_type': 'url_transparent', - 'url': lesson_url, - 'ie_key': LinuxAcademyIE.ie_key(), - 'title': title, - 'description': description, - 'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')), - 'duration': parse_duration(item.get('duration')), - 'chapter': chapter, - 'chapter_id': chapter_id, - 'chapter_number': chapter_number, - }) - return { - '_type': 'playlist', - 'entries': entries, - 'id': course_id, - 'title': module.get('title'), - 'description': module.get('md_desc') or clean_html(module.get('desc')), - 'duration': parse_duration(module.get('duration')), - } - - # single video path - m3u8_url = self._parse_json( - self._search_regex( - r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'), - item_id)[0]['file'] - formats = self._extract_m3u8_formats( - m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') - self._sort_formats(formats) - info = { - 'id': item_id, - 'formats': formats, - } - lesson = self._parse_json( - self._search_regex( - (r'window\.lesson\s*=\s*({.+?})\s*;', - r'player\.lesson\s*=\s*({.+?})\s*;'), - webpage, 'lesson', default='{}'), item_id, fatal=False) - if lesson: - info.update({ - 'title': lesson.get('lesson_name'), - 'description': lesson.get('md_desc') or clean_html(lesson.get('desc')), - 'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')), - 'duration': parse_duration(lesson.get('duration')), - }) - if not info.get('title'): - info['title'] = self._search_regex( - (r'>Lecture\s*:\s*(?P<value>[^<]+)', - r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, - 'title', group='value') - return info |