aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--settings.py11
-rw-r--r--youtube/watch.py53
-rw-r--r--youtube/yt_data_extract/common.py7
-rw-r--r--youtube/yt_data_extract/watch_extraction.py8
4 files changed, 73 insertions, 6 deletions
diff --git a/settings.py b/settings.py
index c97e3d8..fdaebc7 100644
--- a/settings.py
+++ b/settings.py
@@ -168,6 +168,17 @@ For security reasons, enabling this is not recommended.''',
'category': 'playback',
}),
+ ('preferred_video_codec', {
+ 'type': int,
+ 'default': 0,
+ 'comment': '',
+ 'options': [
+ (0, 'h.264'),
+ (1, 'AV1'),
+ ],
+ 'category': 'playback',
+ }),
+
('prefer_uni_sources', {
'label': 'Prefer integrated sources',
'type': bool,
diff --git a/youtube/watch.py b/youtube/watch.py
index 7494b95..80885f9 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -56,7 +56,10 @@ def get_video_sources(info, target_resolution):
continue
# audio source
- if fmt['acodec'] and not fmt['vcodec'] and fmt['audio_bitrate']:
+ if fmt['acodec'] and not fmt['vcodec'] and (
+ fmt['audio_bitrate'] or fmt['bitrate']):
+ if fmt['bitrate']: # prefer this one, more accurate right now
+ fmt['audio_bitrate'] = int(fmt['bitrate']/1000)
source = {
'type': 'audio/' + fmt['ext'],
'bitrate': fmt['audio_bitrate'],
@@ -77,15 +80,51 @@ def get_video_sources(info, target_resolution):
+ source['vcodec'] + '"')
video_only_sources.append(source)
+ # Remove alternative mp4 codecs from video sources
+ def codec_name(vcodec):
+ if vcodec.startswith('avc'):
+ return 'h.264'
+ elif vcodec.startswith('av01'):
+ return 'av1'
+ else:
+ return 'unknown'
+ quality_to_codecs = {}
+ for src in video_only_sources:
+ if src['quality'] in quality_to_codecs:
+ quality_to_codecs[src['quality']].add(codec_name(src['vcodec']))
+ else:
+ quality_to_codecs[src['quality']] = {codec_name(src['vcodec'])}
+ i = 0
+ while i < len(video_only_sources):
+ src = video_only_sources[i]
+ codecs_for_quality = quality_to_codecs[src['quality']]
+ have_both = ('h.264' in codecs_for_quality
+ and 'av1' in codecs_for_quality)
+ have_one = ('h.264' in codecs_for_quality
+ or 'av1' in codecs_for_quality)
+ name = codec_name(src['vcodec'])
+ if name == 'unknown' and have_one:
+ del video_only_sources[i]
+ continue
+ if not have_both:
+ i += 1
+ continue
+ if name == 'av1' and settings.preferred_video_codec == 0:
+ del video_only_sources[i]
+ elif name == 'h.264' and settings.preferred_video_codec == 1:
+ del video_only_sources[i]
+ else:
+ i += 1
+
audio_sources.sort(key=lambda source: source['audio_bitrate'])
video_only_sources.sort(key=lambda src: src['quality'])
uni_sources.sort(key=lambda src: src['quality'])
for source in video_only_sources:
# choose an audio source to go with it
- # 0.15 is semiarbitrary empirical constant to spread audio sources
+ # 0.5 is semiarbitrary empirical constant to spread audio sources
# between 144p and 1080p. Use something better eventually.
- target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.15
+ target_audio_bitrate = source['quality']*source.get('fps', 30)/30*0.5
compat_audios = [a for a in audio_sources if a['ext'] == source['ext']]
if compat_audios:
closest_audio_source = compat_audios[0]
@@ -421,7 +460,13 @@ def video_quality_string(format):
def short_video_quality_string(fmt):
result = str(fmt['quality'] or '?') + 'p'
if fmt['fps']:
- result += ' ' + str(fmt['fps']) + 'fps'
+ result += str(fmt['fps'])
+ if fmt['vcodec'].startswith('av01'):
+ result += ' AV1'
+ elif fmt['vcodec'].startswith('avc'):
+ result += ' h264'
+ else:
+ result += ' ' + fmt['vcodec']
return result
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index ca999ba..f97597c 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -166,14 +166,17 @@ def extract_formatted_text(node):
return [{'text': node['simpleText']}]
return []
-def extract_int(string, default=None):
+def extract_int(string, default=None, whole_word=True):
if isinstance(string, int):
return string
if not isinstance(string, str):
string = extract_str(string)
if not string:
return default
- match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+ if whole_word:
+ match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+ else:
+ match = re.search(r'(\d+)', string.replace(',', ''))
if match is None:
return default
try:
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 43be909..e0af28e 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -445,6 +445,14 @@ def _extract_formats(info, player_response):
for key, value in hardcoded_itag_info.items():
conservative_update(fmt, key, value) # prefer info from YouTube
fmt['quality'] = hardcoded_itag_info.get('height')
+ conservative_update(
+ fmt, 'quality',
+ extract_int(yt_fmt.get('quality'), whole_word=False)
+ )
+ conservative_update(
+ fmt, 'quality',
+ extract_int(yt_fmt.get('qualityLabel'), whole_word=False)
+ )
info['formats'].append(fmt)