aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2021-03-21 20:59:03 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2021-03-21 21:27:33 +0530
commit037cc66ec8c7cb0dfe9f333a0079201868e44e1b (patch)
treeeefaaa9021d8004a6e39ebdb0af5f6e3225fd247
parent9160a0c6a251312917ea34ff60fdc9a22e364f11 (diff)
downloadhypervideo-pre-037cc66ec8c7cb0dfe9f333a0079201868e44e1b.tar.lz
hypervideo-pre-037cc66ec8c7cb0dfe9f333a0079201868e44e1b.tar.xz
hypervideo-pre-037cc66ec8c7cb0dfe9f333a0079201868e44e1b.zip
[linuxacadamy] Improve regex
TODO: We need to make a more robust standard regex for fetching js objects from html
-rw-r--r--yt_dlp/extractor/linuxacademy.py16
1 files changed, 13 insertions, 3 deletions
diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py
index 7ec4a6557..70c84c2c1 100644
--- a/yt_dlp/extractor/linuxacademy.py
+++ b/yt_dlp/extractor/linuxacademy.py
@@ -38,8 +38,8 @@ class LinuxAcademyIE(InfoExtractor):
'ext': 'mp4',
'title': 'What Is Data Science',
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
- 'timestamp': 1607387907,
- 'upload_date': '20201208',
+ 'timestamp': int, # The timestamp and upload date changes
+ 'upload_date': r're:\d+',
'duration': 304,
},
'params': {
@@ -59,6 +59,16 @@ class LinuxAcademyIE(InfoExtractor):
},
'playlist_count': 41,
'skip': 'Requires Linux Academy account credentials',
+ }, {
+ 'url': 'https://linuxacademy.com/cp/modules/view/id/39',
+ 'info_dict': {
+ 'id': '39',
+ 'title': 'Red Hat Certified Systems Administrator - RHCSA (EX200) Exam Prep (legacy)',
+ 'description': 'md5:0f1d3369e90c3fb14a79813b863c902f',
+ 'duration': 89280,
+ },
+ 'playlist_count': 73,
+ 'skip': 'Requires Linux Academy account credentials',
}]
_AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
@@ -162,7 +172,7 @@ class LinuxAcademyIE(InfoExtractor):
if course_id:
module = self._parse_json(
self._search_regex(
- r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
+ r'window\.module\s*=\s*({(?:(?!};)[^"]|"([^"]|\\")*")+})\s*;', webpage, 'module'),
item_id)
entries = []
chapter_number = None