aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yt_dlp/extractor/bilibili.py7
-rw-r--r--yt_dlp/extractor/generic.py10
-rw-r--r--yt_dlp/extractor/tubitv.py12
-rw-r--r--yt_dlp/extractor/twitcasting.py3
-rw-r--r--yt_dlp/extractor/youtube.py2
-rw-r--r--yt_dlp/extractor/zee5.py10
-rw-r--r--yt_dlp/utils.py2
7 files changed, 34 insertions, 12 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index a775aa97f..1bbf7ca1c 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -225,10 +225,6 @@ class BiliBiliIE(InfoExtractor):
'quality': -2 if 'hd.mp4' in backup_url else -3,
})
- for a_format in formats:
- a_format.setdefault('http_headers', {}).update({
- 'Referer': url,
- })
for audio in audios:
formats.append({
'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'),
@@ -252,6 +248,9 @@ class BiliBiliIE(InfoExtractor):
'id': video_id,
'duration': float_or_none(durl.get('length'), 1000),
'formats': formats,
+ 'http_headers': {
+ 'Referer': url,
+ },
})
break
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index baedd7948..d975e4bdb 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -3991,12 +3991,16 @@ class GenericIE(InfoExtractor):
# here's a fun little line of code for you:
video_id = os.path.splitext(video_id)[0]
+ headers = {
+ 'referer': full_response.geturl()
+ }
entry_info_dict = {
'id': video_id,
'uploader': video_uploader,
'title': video_title,
'age_limit': age_limit,
+ 'http_headers': headers,
}
if RtmpIE.suitable(video_url):
@@ -4014,11 +4018,11 @@ class GenericIE(InfoExtractor):
elif ext == 'xspf':
return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
elif ext == 'm3u8':
- entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4')
+ entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
elif ext == 'mpd':
- entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id)
+ entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
elif ext == 'f4m':
- entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
# Just matching .ism/manifest is not enough to be reliably sure
# whether it's actually an ISM manifest or some other streaming
diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py
index 2e9b325ba..e9b66ec77 100644
--- a/yt_dlp/extractor/tubitv.py
+++ b/yt_dlp/extractor/tubitv.py
@@ -107,6 +107,9 @@ class TubiTvIE(InfoExtractor):
'url': self._proto_relative_url(sub_url),
})
+ season_number, episode_number, episode_title = self._search_regex(
+ r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
+
return {
'id': video_id,
'title': title,
@@ -117,6 +120,9 @@ class TubiTvIE(InfoExtractor):
'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'),
'release_year': int_or_none(video_data.get('year')),
+ 'season_number': int_or_none(season_number),
+ 'episode_number': int_or_none(episode_number),
+ 'episode_title': episode_title
}
@@ -132,9 +138,11 @@ class TubiTvShowIE(InfoExtractor):
def _entries(self, show_url, show_name):
show_webpage = self._download_webpage(show_url, show_name)
+
show_json = self._parse_json(self._search_regex(
- r"window\.__data\s*=\s*({.+?});\s*</script>",
- show_webpage, 'data',), show_name, transform_source=js_to_json)['video']
+ r'window\.__data\s*=\s*({[^<]+});\s*</script>',
+ show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
+
for episode_id in show_json['fullContentById'].keys():
yield self.url_result(
'tubitv:%s' % episode_id,
diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index 08222df95..af911de98 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -94,6 +94,9 @@ class TwitCastingIE(InfoExtractor):
urlh.geturl(), video_id, data=request_data,
headers={'Origin': 'https://twitcasting.tv'},
note='Retrying authentication')
+ # has to check here as the first request can contain password input form even if the password is correct
+ if re.search(r'<form\s+method="POST">\s*<input\s+[^>]+?name="password"', webpage):
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
title = (clean_html(get_element_by_id('movietitle', webpage))
or self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True))
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index c03637f5f..4d9815eb3 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2418,7 +2418,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not idx:
return nfunc
return json.loads(js_to_json(self._search_regex(
- rf'var {nfunc}\s*=\s*(\[.+?\]);', jscode,
+ rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
def _extract_n_function(self, video_id, player_url):
diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
index 55c225d85..ebe393ec7 100644
--- a/yt_dlp/extractor/zee5.py
+++ b/yt_dlp/extractor/zee5.py
@@ -23,7 +23,7 @@ class Zee5IE(InfoExtractor):
zee5:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:
- (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){3}
+ (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3}
|movies/[^#/?]+
)/(?P<display_id>[^#/?]+)/
)
@@ -82,6 +82,9 @@ class Zee5IE(InfoExtractor):
}, {
'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730',
'only_matching': True
+ }, {
+ 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412',
+ 'only_matching': True
}]
_DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false'
_DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb'
@@ -179,7 +182,7 @@ class Zee5SeriesIE(InfoExtractor):
(?:
zee5:series:|
https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
- (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){2}/
+ (?:tv-shows|web-series|kids|zee5originals)(?:/[^#/?]+){2}/
)
(?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#])
'''
@@ -216,6 +219,9 @@ class Zee5SeriesIE(InfoExtractor):
}, {
'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes',
'only_matching': True,
+ }, {
+ 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408',
+ 'only_matching': True,
}]
def _entries(self, show_id):
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 51931f164..b3f2a0625 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3017,6 +3017,8 @@ def js_to_json(code, vars={}):
return '"%s"' % v
+ code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
+
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|