aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r--youtube/yt_data_extract/common.py7
-rw-r--r--youtube/yt_data_extract/watch_extraction.py8
2 files changed, 13 insertions, 2 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index ca999ba..f97597c 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -166,14 +166,17 @@ def extract_formatted_text(node):
return [{'text': node['simpleText']}]
return []
-def extract_int(string, default=None):
+def extract_int(string, default=None, whole_word=True):
if isinstance(string, int):
return string
if not isinstance(string, str):
string = extract_str(string)
if not string:
return default
- match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+ if whole_word:
+ match = re.search(r'\b(\d+)\b', string.replace(',', ''))
+ else:
+ match = re.search(r'(\d+)', string.replace(',', ''))
if match is None:
return default
try:
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 43be909..e0af28e 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -445,6 +445,14 @@ def _extract_formats(info, player_response):
for key, value in hardcoded_itag_info.items():
conservative_update(fmt, key, value) # prefer info from YouTube
fmt['quality'] = hardcoded_itag_info.get('height')
+ conservative_update(
+ fmt, 'quality',
+ extract_int(yt_fmt.get('quality'), whole_word=False)
+ )
+ conservative_update(
+ fmt, 'quality',
+ extract_int(yt_fmt.get('qualityLabel'), whole_word=False)
+ )
info['formats'].append(fmt)