diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-03-21 20:59:03 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-03-21 21:27:33 +0530 |
commit | 037cc66ec8c7cb0dfe9f333a0079201868e44e1b (patch) | |
tree | eefaaa9021d8004a6e39ebdb0af5f6e3225fd247 | |
parent | 9160a0c6a251312917ea34ff60fdc9a22e364f11 (diff) | |
download | hypervideo-pre-037cc66ec8c7cb0dfe9f333a0079201868e44e1b.tar.lz hypervideo-pre-037cc66ec8c7cb0dfe9f333a0079201868e44e1b.tar.xz hypervideo-pre-037cc66ec8c7cb0dfe9f333a0079201868e44e1b.zip |
[linuxacadamy] Improve regex
TODO: We need to make a more robust standard regex for fetching js objects from html
-rw-r--r-- | yt_dlp/extractor/linuxacademy.py | 16 |
1 files changed, 13 insertions, 3 deletions
diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index 7ec4a6557..70c84c2c1 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor): 'ext': 'mp4', 'title': 'What Is Data Science', 'description': 'md5:c574a3c20607144fb36cb65bdde76c99', - 'timestamp': 1607387907, - 'upload_date': '20201208', + 'timestamp': int, # The timestamp and upload date changes + 'upload_date': r're:\d+', 'duration': 304, }, 'params': { @@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor): }, 'playlist_count': 41, 'skip': 'Requires Linux Academy account credentials', + }, { + 'url': 'https://linuxacademy.com/cp/modules/view/id/39', + 'info_dict': { + 'id': '39', + 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)', + 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f', + 'duration': 89280, + }, + 'playlist_count': 73, + 'skip': 'Requires Linux Academy account credentials', }] _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize' @@ -162,7 +172,7 @@ class LinuxAcademyIE(InfoExtractor): if course_id: module = self._parse_json( self._search_regex( - r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'), + r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'), item_id) entries = [] chapter_number = None |