From 3905e7e64059b45479894ba1fdfb0ef9cef64475 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 16 Feb 2019 23:41:52 -0800 Subject: basic subscriptions system --- python/atoma/opml.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 python/atoma/opml.py (limited to 'python/atoma/opml.py') diff --git a/python/atoma/opml.py b/python/atoma/opml.py new file mode 100644 index 0000000..a73105e --- /dev/null +++ b/python/atoma/opml.py @@ -0,0 +1,107 @@ +from datetime import datetime +from io import BytesIO +from typing import Optional, List +from xml.etree.ElementTree import Element + +import attr + +from .utils import parse_xml, get_text, get_int, get_datetime + + +@attr.s +class OPMLOutline: + text: Optional[str] = attr.ib() + type: Optional[str] = attr.ib() + xml_url: Optional[str] = attr.ib() + description: Optional[str] = attr.ib() + html_url: Optional[str] = attr.ib() + language: Optional[str] = attr.ib() + title: Optional[str] = attr.ib() + version: Optional[str] = attr.ib() + + outlines: List['OPMLOutline'] = attr.ib() + + +@attr.s +class OPML: + title: Optional[str] = attr.ib() + owner_name: Optional[str] = attr.ib() + owner_email: Optional[str] = attr.ib() + date_created: Optional[datetime] = attr.ib() + date_modified: Optional[datetime] = attr.ib() + expansion_state: Optional[str] = attr.ib() + + vertical_scroll_state: Optional[int] = attr.ib() + window_top: Optional[int] = attr.ib() + window_left: Optional[int] = attr.ib() + window_bottom: Optional[int] = attr.ib() + window_right: Optional[int] = attr.ib() + + outlines: List[OPMLOutline] = attr.ib() + + +def _get_outlines(element: Element) -> List[OPMLOutline]: + rv = list() + + for outline in element.findall('outline'): + rv.append(OPMLOutline( + outline.attrib.get('text'), + outline.attrib.get('type'), + outline.attrib.get('xmlUrl'), + outline.attrib.get('description'), + outline.attrib.get('htmlUrl'), + outline.attrib.get('language'), + outline.attrib.get('title'), + outline.attrib.get('version'), + _get_outlines(outline) + )) + + return rv + + +def _parse_opml(root: Element) -> OPML: + head = root.find('head') + body = root.find('body') + + return OPML( + get_text(head, 'title'), + get_text(head, 'ownerName'), + get_text(head, 'ownerEmail'), + get_datetime(head, 'dateCreated'), + get_datetime(head, 'dateModified'), + get_text(head, 'expansionState'), + get_int(head, 'vertScrollState'), + get_int(head, 'windowTop'), + get_int(head, 'windowLeft'), + get_int(head, 'windowBottom'), + get_int(head, 'windowRight'), + outlines=_get_outlines(body) + ) + + +def parse_opml_file(filename: str) -> OPML: + """Parse an OPML document from a local XML file.""" + root = parse_xml(filename).getroot() + return _parse_opml(root) + + +def parse_opml_bytes(data: bytes) -> OPML: + """Parse an OPML document from a byte-string containing XML data.""" + root = parse_xml(BytesIO(data)).getroot() + return _parse_opml(root) + + +def get_feed_list(opml_obj: OPML) -> List[str]: + """Walk an OPML document to extract the list of feed it contains.""" + rv = list() + + def collect(obj): + for outline in obj.outlines: + if outline.type == 'rss' and outline.xml_url: + rv.append(outline.xml_url) + + if outline.outlines: + collect(outline) + + collect(opml_obj) + return rv -- cgit v1.2.3