Support more audio and video qualities

Adds support for AV1-encoded videos, which includes any videos above 1080p. These weren't getting included because they did not have a quality entry in the format table at the top of watch_extraction.py. So get the quality from the quality labels of the format if it's not there. Because YouTube often includes BOTH AV1 and H.264 (AVC) for each quality, after these are included, there will be way too many quality options and the code needs to choose which one to use. The choice is somewhat hard: AV1 is encoded in fewer bytes than H.264 and is patent-free, however, it has less hardware support, so might be more difficult to play. For instance, on my system, AV1 does not work on 1080p, but H.264 does. Adds a setting about which to prefer, set to H.264 as the default. Also adds support for the lower quality mp4 audio quality, which now gets used at 144p to save network bandwidth. For similar reasons, this was not getting included because it did not have an audio_bitrate entry in the table. Prefer bitrate instead for the quality. Signed-off-by: Jesús <heckyel@hyperbola.info>
author: James Taylor <user234683@users.noreply.github.com> 2021-08-31 13:38:28 -0700
committer: Jesús <heckyel@hyperbola.info> 2021-08-31 16:40:19 -0500
commit: 7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62 (patch)
tree: fb56107188cda2871799c15571cb98e21cfff286
parent: 30e59081b14c98b49f718a1bc131ac46d09c84bf (diff)
download: yt-local-7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62.tar.lz
yt-local-7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62.tar.xz
yt-local-7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62.zip
4 files changed, 73 insertions, 6 deletions
diff --git a/settings.py b/settings.py
index c97e3d8..fdaebc7 100644
--- a/settings.py
+++ b/settings.py
@@ -168,6 +168,17 @@ For security reasons, enabling this is not recommended.''',
         'category': 'playback',
     }),
 
+    ('preferred_video_codec', {
+        'type': int,
+        'default': 0,
+        'comment': '',
+        'options': [
+            (0, 'h.264'),
+            (1, 'AV1'),
+        ],
+        'category': 'playback',
+    }),
+
     ('prefer_uni_sources', {
         'label': 'Prefer integrated sources',
         'type': bool,
diff --git a/youtube/watch.py b/youtube/watch.py
index 7494b95..80885f9 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -56,7 +56,10 @@ def get_video_sources(info, target_resolution):
             continue
 
         # audio source
-        if fmt['acodec'] and not fmt['vcodec'] and fmt['audio_bitrate']:
+        if fmt['acodec'] and not fmt['vcodec'] and (
+                fmt['audio_bitrate'] or fmt['bitrate']):
+            if fmt['bitrate']:  # prefer this one, more accurate right now
+                fmt['audio_bitrate'] = int(fmt['bitrate']/1000)
             source = {
                 'type': 'audio/' + fmt['ext'],
                 'bitrate': fmt['audio_bitrate'],
@@ -77,15 +80,51 @@ def get_video_sources(info, target_resolution):
                                     + source['vcodec'] + '"')
             video_only_sources.append(source)
 
+    # Remove alternative mp4 codecs from video sources
+    def codec_name(vcodec):
+        if vcodec.startswith('avc'):
+            return 'h.264'
+        elif vcodec.startswith('av01'):
+            return 'av1'
+        else:
+            return 'unknown'
+    quality_to_codecs = {}
+    for src in video_only_sources:
+        if src['quality'] in quality_to_codecs:
+            quality_to_codecs[src['quality']].add(codec_name(src['vcodec']))
+        else:
+            quality_to_codecs[src['quality']] = {codec_name(src['vcodec'])}
+    i = 0
+    while i < len(video_only_sources):
+        src = video_only_sources[i]
+        codecs_for_quality = quality_to_codecs[src['quality']]
+        have_both = ('h.264' in codecs_for_quality
+                     and 'av1' in codecs_for_quality)
+        have_one = ('h.264' in codecs_for_quality
+                    or 'av1' in codecs_for_quality)
+        name = codec_name(src['vcodec'])
+        if name == 'unknown' and have_one:
+            del video_only_sources[i]
+            continue
+        if not have_both:
+            i += 1
+            continue
+        if name == 'av1' and settings.preferred_video_codec == 0:
+            del video_only_sources[i]
+        elif name == 'h.264' and settings.preferred_video_codec == 1:
+            del video_only_sources[i]
+        else:
+            i += 1
+
     audio_sources.sort(key=lambda source: source['audio_bitrate'])
     video_only_sources.sort(key=lambda src: src['quality'])
     uni_sources.sort(key=lambda src: src['quality'])
 
     for source in video_only_sources:
         # choose an audio source to go with it
-        # 0.15 is semiarbitrary empirical constant to spread audio sources
+        # 0.5 is semiarbitrary empirical constant to spread audio sources
         # between 144p and 1080p. Use something better eventually.
-        target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.15
+        target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.5
         compat_audios = [a for a in audio_sources if a['ext'] == source['ext']]
         if compat_audios:
             closest_audio_source = compat_audios[0]
@@ -421,7 +460,13 @@ def video_quality_string(format):
 def short_video_quality_string(fmt):
     result = str(fmt['quality'] or '?') + 'p'
     if fmt['fps']:
-        result += ' ' + str(fmt['fps']) + 'fps'
+        result += str(fmt['fps'])
+    if fmt['vcodec'].startswith('av01'):
+        result += ' AV1'
+    elif fmt['vcodec'].startswith('avc'):
+        result += ' h264'
+    else:
+        result += ' ' + fmt['vcodec']
     return result
 
 
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index ca999ba..f97597c 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -166,14 +166,17 @@ def extract_formatted_text(node):
         return [{'text': node['simpleText']}]
     return []
 
-def extract_int(string, default=None):
+def extract_int(string, default=None, whole_word=True):
     if isinstance(string, int):
         return string
     if not isinstance(string, str):
         string = extract_str(string)
     if not string:
         return default
-    match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+    if whole_word:
+        match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+    else:
+        match = re.search(r'(\d+)', string.replace(',', ''))
     if match is None:
         return default
     try:
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 43be909..e0af28e 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -445,6 +445,14 @@ def _extract_formats(info, player_response):
         for key, value in hardcoded_itag_info.items():
             conservative_update(fmt, key, value) # prefer info from YouTube
         fmt['quality'] = hardcoded_itag_info.get('height')
+        conservative_update(
+            fmt, 'quality',
+            extract_int(yt_fmt.get('quality'), whole_word=False)
+        )
+        conservative_update(
+            fmt, 'quality',
+            extract_int(yt_fmt.get('qualityLabel'), whole_word=False)
+        )
 
         info['formats'].append(fmt)
author	James Taylor <user234683@users.noreply.github.com>	2021-08-31 13:38:28 -0700
committer	Jesús <heckyel@hyperbola.info>	2021-08-31 16:40:19 -0500
commit	7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62 (patch)
tree	fb56107188cda2871799c15571cb98e21cfff286
parent	30e59081b14c98b49f718a1bc131ac46d09c84bf (diff)
download	yt-local-7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62.tar.lz yt-local-7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62.tar.xz yt-local-7c79f530a53e9ff4a9fc61d6b7adde6e9c241c62.zip