diff options
| author | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-10-31 13:26:44 +0530 | 
|---|---|---|
| committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-10-31 13:26:44 +0530 | 
| commit | a0bb6ce58db5b3124962037ca12e78cbd348f56c (patch) | |
| tree | 5e19e47b8eaa9849dc1fb6413e55d429a754d7ac | |
| parent | da4832007574a60b397dff11f26cc20cace685de (diff) | |
| download | hypervideo-pre-a0bb6ce58db5b3124962037ca12e78cbd348f56c.tar.lz hypervideo-pre-a0bb6ce58db5b3124962037ca12e78cbd348f56c.tar.xz hypervideo-pre-a0bb6ce58db5b3124962037ca12e78cbd348f56c.zip | |
[youtube] refactor itag processing
| -rw-r--r-- | yt_dlp/extractor/youtube.py | 56 | 
1 files changed, 23 insertions, 33 deletions
| diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56cd2ed8d..64475edec 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2434,7 +2434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          return prs, player_url      def _extract_formats(self, streaming_data, video_id, player_url, is_live): -        itags, stream_ids = [], [] +        itags, stream_ids = {}, []          itag_qualities, res_qualities = {}, {}          q = qualities([              # Normally tiny is the smallest video-only formats. But @@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                      throttled = True              if itag: -                itags.append(itag) +                itags[itag] = 'https'                  stream_ids.append(stream_id)              tbr = float_or_none( @@ -2548,46 +2548,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))          get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) -        def guess_quality(f): -            for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)): -                if val in qdict: -                    return q(qdict[val]) -            return -1 +        def process_manifest_format(f, proto, itag): +            if itag in itags: +                if itags[itag] == proto or f'{itag}-{proto}' in itags: +                    return False +                itag = f'{itag}-{proto}' +            if itag: +                f['format_id'] = itag +                itags[itag] = proto + +            f['quality'] = next(( +                q(qdict[val]) +                for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) +                if val in qdict), -1) +            return True          for sd in streaming_data:              hls_manifest_url = get_hls and sd.get('hlsManifestUrl')              if hls_manifest_url:                  for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): -                    itag = self._search_regex( -                        r'/itag/(\d+)', f['url'], 'itag', default=None) -                    if itag in itags: -                        itag += '-hls' -                        if itag in itags: -                            continue -                    if itag: -                        f['format_id'] = itag -                        itags.append(itag) -                    f['quality'] = guess_quality(f) -                    yield f +                    if process_manifest_format(f, 'hls', self._search_regex( +                            r'/itag/(\d+)', f['url'], 'itag', default=None)): +                        yield f              dash_manifest_url = get_dash and sd.get('dashManifestUrl')              if dash_manifest_url:                  for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): -                    itag = f['format_id'] -                    if itag in itags: -                        itag += '-dash' -                        if itag in itags: -                            continue -                    if itag: -                        f['format_id'] = itag -                        itags.append(itag) -                    f['quality'] = guess_quality(f) -                    filesize = int_or_none(self._search_regex( -                        r'/clen/(\d+)', f.get('fragment_base_url') -                        or f['url'], 'file size', default=None)) -                    if filesize: -                        f['filesize'] = filesize -                    yield f +                    if process_manifest_format(f, 'dash', f['format_id']): +                        f['filesize'] = int_or_none(self._search_regex( +                            r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) +                        yield f      def _real_extract(self, url):          url, smuggled_data = unsmuggle_url(url, {}) | 
