aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-03-16 19:46:19 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-03-16 19:46:29 +0530
commita1b2d84360f625701eb1ae67452ea8a3c7e276e8 (patch)
tree5b7c21bc0c060600f0d0a239baea546c993a5f98
parent5dbc77df267b4b3b91af0559d2e72f616551f445 (diff)
downloadhypervideo-pre-a1b2d84360f625701eb1ae67452ea8a3c7e276e8.tar.lz
hypervideo-pre-a1b2d84360f625701eb1ae67452ea8a3c7e276e8.tar.xz
hypervideo-pre-a1b2d84360f625701eb1ae67452ea8a3c7e276e8.zip
[youtube] Avoid false positives when detecting damaged formats
Closes #3083
-rw-r--r--yt_dlp/extractor/youtube.py24
1 files changed, 13 insertions, 11 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 199225985..4143116ef 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -1297,7 +1297,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'expected_warnings': [
'DASH manifest missing',
- 'Some formats are possibly damaged'
]
},
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
@@ -3013,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.report_warning(last_error)
return prs, player_url
- def _extract_formats(self, streaming_data, video_id, player_url, is_live):
+ def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration):
itags, stream_ids = {}, []
itag_qualities, res_qualities = {}, {}
q = qualities([
@@ -3024,7 +3023,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
- approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None
for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
@@ -3091,7 +3089,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
- is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
+ # Make sure to avoid false positives with small duration differences.
+ # Eg: __2ABJjxzNo, ySuUZEjARPY
+ is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
if is_damaged:
self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
dct = {
@@ -3227,14 +3227,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return webpage, master_ytcfg, player_responses, player_url
- def _list_formats(self, video_id, microformats, video_details, player_responses, player_url):
+ def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
is_live = get_first(video_details, 'isLive')
if is_live is None:
is_live = get_first(live_broadcast_details, 'isLiveNow')
streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
- formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
+ formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration))
return live_broadcast_details, is_live, streaming_data, formats
@@ -3315,7 +3315,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self.playlist_result(
entries, video_id, video_title, video_description)
- live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url)
+ duration = int_or_none(
+ get_first(video_details, 'lengthSeconds')
+ or get_first(microformats, 'lengthSeconds')
+ or parse_duration(search_meta('duration'))) or None
+
+ live_broadcast_details, is_live, streaming_data, formats = self._list_formats(
+ video_id, microformats, video_details, player_responses, player_url, duration)
if not formats:
if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
@@ -3387,10 +3393,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
get_first(video_details, 'channelId')
or get_first(microformats, 'externalChannelId')
or search_meta('channelId'))
- duration = int_or_none(
- get_first(video_details, 'lengthSeconds')
- or get_first(microformats, 'lengthSeconds')
- or parse_duration(search_meta('duration'))) or None
owner_profile_url = get_first(microformats, 'ownerProfileUrl')
live_content = get_first(video_details, 'isLiveContent')