diff options
-rw-r--r-- | yt_dlp/extractor/bilibili.py | 7 | ||||
-rw-r--r-- | yt_dlp/extractor/generic.py | 10 | ||||
-rw-r--r-- | yt_dlp/extractor/tubitv.py | 12 | ||||
-rw-r--r-- | yt_dlp/extractor/twitcasting.py | 3 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/zee5.py | 10 | ||||
-rw-r--r-- | yt_dlp/utils.py | 2 |
7 files changed, 34 insertions, 12 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a775aa97f..1bbf7ca1c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -225,10 +225,6 @@ class BiliBiliIE(InfoExtractor): 'quality': -2 if 'hd.mp4' in backup_url else -3, }) - for a_format in formats: - a_format.setdefault('http_headers', {}).update({ - 'Referer': url, - }) for audio in audios: formats.append({ 'url': audio.get('baseUrl') or audio.get('base_url') or audio.get('url'), @@ -252,6 +248,9 @@ class BiliBiliIE(InfoExtractor): 'id': video_id, 'duration': float_or_none(durl.get('length'), 1000), 'formats': formats, + 'http_headers': { + 'Referer': url, + }, }) break diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index baedd7948..d975e4bdb 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3991,12 +3991,16 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] + headers = { + 'referer': full_response.geturl() + } entry_info_dict = { 'id': video_id, 'uploader': video_uploader, 'title': video_title, 'age_limit': age_limit, + 'http_headers': headers, } if RtmpIE.suitable(video_url): @@ -4014,11 +4018,11 @@ class GenericIE(InfoExtractor): elif ext == 'xspf': return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) elif ext == 'm3u8': - entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4') + entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers) elif ext == 'mpd': - entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id) + entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers) elif ext == 'f4m': - entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id) + entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers) elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: # Just matching .ism/manifest is not enough to be reliably sure # whether it's actually an ISM manifest or some other streaming diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 2e9b325ba..e9b66ec77 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -107,6 +107,9 @@ class TubiTvIE(InfoExtractor): 'url': self._proto_relative_url(sub_url), }) + season_number, episode_number, episode_title = self._search_regex( + r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None)) + return { 'id': video_id, 'title': title, @@ -117,6 +120,9 @@ class TubiTvIE(InfoExtractor): 'duration': int_or_none(video_data.get('duration')), 'uploader_id': video_data.get('publisher_id'), 'release_year': int_or_none(video_data.get('year')), + 'season_number': int_or_none(season_number), + 'episode_number': int_or_none(episode_number), + 'episode_title': episode_title } @@ -132,9 +138,11 @@ class TubiTvShowIE(InfoExtractor): def _entries(self, show_url, show_name): show_webpage = self._download_webpage(show_url, show_name) + show_json = self._parse_json(self._search_regex( - r"window\.__data\s*=\s*({.+?});\s*</script>", - show_webpage, 'data',), show_name, transform_source=js_to_json)['video'] + r'window\.__data\s*=\s*({[^<]+});\s*</script>', + show_webpage, 'data'), show_name, transform_source=js_to_json)['video'] + for episode_id in show_json['fullContentById'].keys(): yield self.url_result( 'tubitv:%s' % episode_id, diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 08222df95..af911de98 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -94,6 +94,9 @@ class TwitCastingIE(InfoExtractor): urlh.geturl(), video_id, data=request_data, headers={'Origin': 'https://twitcasting.tv'}, note='Retrying authentication') + # has to check here as the first request can contain password input form even if the password is correct + if re.search(r'<form\s+method="POST">\s*<input\s+[^>]+?name="password"', webpage): + raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) title = (clean_html(get_element_by_id('movietitle', webpage)) or self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True)) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c03637f5f..4d9815eb3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2418,7 +2418,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not idx: return nfunc return json.loads(js_to_json(self._search_regex( - rf'var {nfunc}\s*=\s*(\[.+?\]);', jscode, + rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode, f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)] def _extract_n_function(self, video_id, player_url): diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 55c225d85..ebe393ec7 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -23,7 +23,7 @@ class Zee5IE(InfoExtractor): zee5:| https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?: - (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){3} + (?:tv-shows|kids|web-series|zee5originals)(?:/[^#/?]+){3} |movies/[^#/?]+ )/(?P<display_id>[^#/?]+)/ ) @@ -82,6 +82,9 @@ class Zee5IE(InfoExtractor): }, { 'url': 'https://www.zee5.com/global/hi/tv-shows/details/kundali-bhagya/0-6-366/kundali-bhagya-march-08-2021/0-1-manual_7g9jv1os7730', 'only_matching': True + }, { + 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', + 'only_matching': True }] _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb' @@ -179,7 +182,7 @@ class Zee5SeriesIE(InfoExtractor): (?: zee5:series:| https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? - (?:tv-shows|kids|zee5originals)(?:/[^#/?]+){2}/ + (?:tv-shows|web-series|kids|zee5originals)(?:/[^#/?]+){2}/ ) (?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#]) ''' @@ -216,6 +219,9 @@ class Zee5SeriesIE(InfoExtractor): }, { 'url': 'https://www.zee5.com/tv-shows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes', 'only_matching': True, + }, { + 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408', + 'only_matching': True, }] def _entries(self, show_id): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 51931f164..b3f2a0625 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3017,6 +3017,8 @@ def js_to_json(code, vars={}): return '"%s"' % v + code = re.sub(r'new Date\((".+")\)', r'\g<1>', code) + return re.sub(r'''(?sx) "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| |