From a9f67d4630319b9644af5802e7a908abcf9449be Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 20 Dec 2019 18:48:40 -0800 Subject: Fix regression: date extraction broken. Move constants to correct file in yt_data_extract --- youtube/yt_data_extract/common.py | 3 ++- youtube/yt_data_extract/watch_extraction.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube') diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 4681a86..dfeae64 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -152,6 +152,7 @@ def extract_approx_int(string): return None return match.group(1) +MONTH_ABBREVIATIONS = {'jan':'1', 'feb':'2', 'mar':'3', 'apr':'4', 'may':'5', 'jun':'6', 'jul':'7', 'aug':'8', 'sep':'9', 'oct':'10', 'nov':'11', 'dec':'12'} def extract_date(date_text): '''Input: "Mar 9, 2019". Output: "2019-3-9"''' if date_text is None: @@ -161,7 +162,7 @@ def extract_date(date_text): parts = date_text.split() if len(parts) >= 3: month, day, year = parts[-3:] - month = month_abbreviations.get(month[0:3]) # slicing in case they start writing out the full month name + month = MONTH_ABBREVIATIONS.get(month[0:3]) # slicing in case they start writing out the full month name if month and (re.fullmatch(r'\d\d?', day) is not None) and (re.fullmatch(r'\d{4}', year) is not None): return year + '-' + month + '-' + day diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 09abbe3..3e6dcdf 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -228,7 +228,6 @@ def _extract_watch_info_mobile(top_level): return info -month_abbreviations = {'jan':'1', 'feb':'2', 'mar':'3', 'apr':'4', 'may':'5', 'jun':'6', 'jul':'7', 'aug':'8', 'sep':'9', 'oct':'10', 'nov':'11', 'dec':'12'} def _extract_watch_info_desktop(top_level): info = { 'comment_count': None, -- cgit v1.2.3