diff options
Diffstat (limited to 'python/atoma/utils.py')
-rw-r--r-- | python/atoma/utils.py | 84 |
1 files changed, 0 insertions, 84 deletions
diff --git a/python/atoma/utils.py b/python/atoma/utils.py deleted file mode 100644 index 4dc1ab5..0000000 --- a/python/atoma/utils.py +++ /dev/null @@ -1,84 +0,0 @@ -from datetime import datetime, timezone -from xml.etree.ElementTree import Element -from typing import Optional - -import dateutil.parser -from defusedxml.ElementTree import parse as defused_xml_parse, ParseError - -from .exceptions import FeedXMLError, FeedParseError - -ns = { - 'content': 'http://purl.org/rss/1.0/modules/content/', - 'feed': 'http://www.w3.org/2005/Atom' -} - - -def parse_xml(xml_content): - try: - return defused_xml_parse(xml_content) - except ParseError: - raise FeedXMLError('Not a valid XML document') - - -def get_child(element: Element, name, - optional: bool=True) -> Optional[Element]: - child = element.find(name, namespaces=ns) - - if child is None and not optional: - raise FeedParseError( - 'Could not parse feed: "{}" does not have a "{}"' - .format(element.tag, name) - ) - - elif child is None: - return None - - return child - - -def get_text(element: Element, name, optional: bool=True) -> Optional[str]: - child = get_child(element, name, optional) - if child is None: - return None - - if child.text is None: - if optional: - return None - - raise FeedParseError( - 'Could not parse feed: "{}" text is required but is empty' - .format(name) - ) - - return child.text.strip() - - -def get_int(element: Element, name, optional: bool=True) -> Optional[int]: - text = get_text(element, name, optional) - if text is None: - return None - - return int(text) - - -def get_datetime(element: Element, name, - optional: bool=True) -> Optional[datetime]: - text = get_text(element, name, optional) - if text is None: - return None - - return try_parse_date(text) - - -def try_parse_date(date_str: str) -> Optional[datetime]: - try: - date = dateutil.parser.parse(date_str, fuzzy=True) - except (ValueError, OverflowError): - return None - - if date.tzinfo is None: - # TZ naive datetime, make it a TZ aware datetime by assuming it - # contains UTC time - date = date.replace(tzinfo=timezone.utc) - - return date |