diff options
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/common.py | 7 | ||||
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 8 |
2 files changed, 13 insertions, 2 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index ca999ba..f97597c 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -166,14 +166,17 @@ def extract_formatted_text(node): return [{'text': node['simpleText']}] return [] -def extract_int(string, default=None): +def extract_int(string, default=None, whole_word=True): if isinstance(string, int): return string if not isinstance(string, str): string = extract_str(string) if not string: return default - match = re.search(r'\b(\d+)\b', string.replace(',', '')) + if whole_word: + match = re.search(r'\b(\d+)\b', string.replace(',', '')) + else: + match = re.search(r'(\d+)', string.replace(',', '')) if match is None: return default try: diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 43be909..e0af28e 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -445,6 +445,14 @@ def _extract_formats(info, player_response): for key, value in hardcoded_itag_info.items(): conservative_update(fmt, key, value) # prefer info from YouTube fmt['quality'] = hardcoded_itag_info.get('height') + conservative_update( + fmt, 'quality', + extract_int(yt_fmt.get('quality'), whole_word=False) + ) + conservative_update( + fmt, 'quality', + extract_int(yt_fmt.get('qualityLabel'), whole_word=False) + ) info['formats'].append(fmt) |