aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp
diff options
context:
space:
mode:
authorBepis <36346617+bbepis@users.noreply.github.com>2022-02-19 23:00:51 +1100
committerGitHub <noreply@github.com>2022-02-19 04:00:51 -0800
commitfebff4c1194de0528c087274bc17e3a8be3296ba (patch)
treeb23a151c69ebccdee292339d6d3973d9c150b0d2 /yt_dlp
parented66a17ef0b18159dda901f0122520c25ea95d6b (diff)
downloadhypervideo-pre-febff4c1194de0528c087274bc17e3a8be3296ba.tar.lz
hypervideo-pre-febff4c1194de0528c087274bc17e3a8be3296ba.tar.xz
hypervideo-pre-febff4c1194de0528c087274bc17e3a8be3296ba.zip
[tubitv] Fix/improve TV series extraction (#2829)
Authored by: bbepis
Diffstat (limited to 'yt_dlp')
-rw-r--r--yt_dlp/extractor/tubitv.py12
-rw-r--r--yt_dlp/utils.py2
2 files changed, 12 insertions, 2 deletions
diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py
index 2e9b325ba..e9b66ec77 100644
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@@ -107,6 +107,9 @@ class TubiTvIE(InfoExtractor):
'url': self._proto_relative_url(sub_url),
})
+ season_number, episode_number, episode_title = self._search_regex(
+ r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
+
return {
'id': video_id,
'title': title,
@@ -117,6 +120,9 @@ class TubiTvIE(InfoExtractor):
'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'),
'release_year': int_or_none(video_data.get('year')),
+ 'season_number': int_or_none(season_number),
+ 'episode_number': int_or_none(episode_number),
+ 'episode_title': episode_title
}
@@ -132,9 +138,11 @@ class TubiTvShowIE(InfoExtractor):
def _entries(self, show_url, show_name):
show_webpage = self._download_webpage(show_url, show_name)
+
show_json = self._parse_json(self._search_regex(
- r"window\.__data\s*=\s*({.+?});\s*</script>",
- show_webpage, 'data',), show_name, transform_source=js_to_json)['video']
+ r'window\.__data\s*=\s*({[^<]+});\s*</script>',
+ show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
+
for episode_id in show_json['fullContentById'].keys():
yield self.url_result(
'tubitv:%s' % episode_id,
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index c5489d494..f5cad0e54 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3143,6 +3143,8 @@ def js_to_json(code, vars={}):
return '"%s"' % v
+ code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|