3 files changed, 62 insertions, 6 deletions
diff --git a/youtube/watch.py b/youtube/watch.py
index 7494b95..80885f9 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -56,7 +56,10 @@ def get_video_sources(info, target_resolution):
             continue
 
         # audio source
-        if fmt['acodec'] and not fmt['vcodec'] and fmt['audio_bitrate']:
+        if fmt['acodec'] and not fmt['vcodec'] and (
+                fmt['audio_bitrate'] or fmt['bitrate']):
+            if fmt['bitrate']:  # prefer this one, more accurate right now
+                fmt['audio_bitrate'] = int(fmt['bitrate']/1000)
             source = {
                 'type': 'audio/' + fmt['ext'],
                 'bitrate': fmt['audio_bitrate'],
@@ -77,15 +80,51 @@ def get_video_sources(info, target_resolution):
                                     + source['vcodec'] + '"')
             video_only_sources.append(source)
 
+    # Remove alternative mp4 codecs from video sources
+    def codec_name(vcodec):
+        if vcodec.startswith('avc'):
+            return 'h.264'
+        elif vcodec.startswith('av01'):
+            return 'av1'
+        else:
+            return 'unknown'
+    quality_to_codecs = {}
+    for src in video_only_sources:
+        if src['quality'] in quality_to_codecs:
+            quality_to_codecs[src['quality']].add(codec_name(src['vcodec']))
+        else:
+            quality_to_codecs[src['quality']] = {codec_name(src['vcodec'])}
+    i = 0
+    while i < len(video_only_sources):
+        src = video_only_sources[i]
+        codecs_for_quality = quality_to_codecs[src['quality']]
+        have_both = ('h.264' in codecs_for_quality
+                     and 'av1' in codecs_for_quality)
+        have_one = ('h.264' in codecs_for_quality
+                    or 'av1' in codecs_for_quality)
+        name = codec_name(src['vcodec'])
+        if name == 'unknown' and have_one:
+            del video_only_sources[i]
+            continue
+        if not have_both:
+            i += 1
+            continue
+        if name == 'av1' and settings.preferred_video_codec == 0:
+            del video_only_sources[i]
+        elif name == 'h.264' and settings.preferred_video_codec == 1:
+            del video_only_sources[i]
+        else:
+            i += 1
+
     audio_sources.sort(key=lambda source: source['audio_bitrate'])
     video_only_sources.sort(key=lambda src: src['quality'])
     uni_sources.sort(key=lambda src: src['quality'])
 
     for source in video_only_sources:
         # choose an audio source to go with it
-        # 0.15 is semiarbitrary empirical constant to spread audio sources
+        # 0.5 is semiarbitrary empirical constant to spread audio sources
         # between 144p and 1080p. Use something better eventually.
-        target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.15
+        target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.5
         compat_audios = [a for a in audio_sources if a['ext'] == source['ext']]
         if compat_audios:
             closest_audio_source = compat_audios[0]
@@ -421,7 +460,13 @@ def video_quality_string(format):
 def short_video_quality_string(fmt):
     result = str(fmt['quality'] or '?') + 'p'
     if fmt['fps']:
-        result += ' ' + str(fmt['fps']) + 'fps'
+        result += str(fmt['fps'])
+    if fmt['vcodec'].startswith('av01'):
+        result += ' AV1'
+    elif fmt['vcodec'].startswith('avc'):
+        result += ' h264'
+    else:
+        result += ' ' + fmt['vcodec']
     return result
 
 
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index ca999ba..f97597c 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -166,14 +166,17 @@ def extract_formatted_text(node):
         return [{'text': node['simpleText']}]
     return []
 
-def extract_int(string, default=None):
+def extract_int(string, default=None, whole_word=True):
     if isinstance(string, int):
         return string
     if not isinstance(string, str):
         string = extract_str(string)
     if not string:
         return default
-    match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+    if whole_word:
+        match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+    else:
+        match = re.search(r'(\d+)', string.replace(',', ''))
     if match is None:
         return default
     try:
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 43be909..e0af28e 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -445,6 +445,14 @@ def _extract_formats(info, player_response):
         for key, value in hardcoded_itag_info.items():
             conservative_update(fmt, key, value) # prefer info from YouTube
         fmt['quality'] = hardcoded_itag_info.get('height')
+        conservative_update(
+            fmt, 'quality',
+            extract_int(yt_fmt.get('quality'), whole_word=False)
+        )
+        conservative_update(
+            fmt, 'quality',
+            extract_int(yt_fmt.get('qualityLabel'), whole_word=False)
+        )
 
         info['formats'].append(fmt)