aboutsummaryrefslogtreecommitdiffstats
path: root/python/atoma/opml.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/atoma/opml.py')
-rw-r--r--python/atoma/opml.py107
1 files changed, 107 insertions, 0 deletions
diff --git a/python/atoma/opml.py b/python/atoma/opml.py
new file mode 100644
index 0000000..a73105e
--- /dev/null
+++ b/python/atoma/opml.py
@@ -0,0 +1,107 @@
+from datetime import datetime
+from io import BytesIO
+from typing import Optional, List
+from xml.etree.ElementTree import Element
+
+import attr
+
+from .utils import parse_xml, get_text, get_int, get_datetime
+
+
+@attr.s
+class OPMLOutline:
+ text: Optional[str] = attr.ib()
+ type: Optional[str] = attr.ib()
+ xml_url: Optional[str] = attr.ib()
+ description: Optional[str] = attr.ib()
+ html_url: Optional[str] = attr.ib()
+ language: Optional[str] = attr.ib()
+ title: Optional[str] = attr.ib()
+ version: Optional[str] = attr.ib()
+
+ outlines: List['OPMLOutline'] = attr.ib()
+
+
+@attr.s
+class OPML:
+ title: Optional[str] = attr.ib()
+ owner_name: Optional[str] = attr.ib()
+ owner_email: Optional[str] = attr.ib()
+ date_created: Optional[datetime] = attr.ib()
+ date_modified: Optional[datetime] = attr.ib()
+ expansion_state: Optional[str] = attr.ib()
+
+ vertical_scroll_state: Optional[int] = attr.ib()
+ window_top: Optional[int] = attr.ib()
+ window_left: Optional[int] = attr.ib()
+ window_bottom: Optional[int] = attr.ib()
+ window_right: Optional[int] = attr.ib()
+
+ outlines: List[OPMLOutline] = attr.ib()
+
+
+def _get_outlines(element: Element) -> List[OPMLOutline]:
+ rv = list()
+
+ for outline in element.findall('outline'):
+ rv.append(OPMLOutline(
+ outline.attrib.get('text'),
+ outline.attrib.get('type'),
+ outline.attrib.get('xmlUrl'),
+ outline.attrib.get('description'),
+ outline.attrib.get('htmlUrl'),
+ outline.attrib.get('language'),
+ outline.attrib.get('title'),
+ outline.attrib.get('version'),
+ _get_outlines(outline)
+ ))
+
+ return rv
+
+
+def _parse_opml(root: Element) -> OPML:
+ head = root.find('head')
+ body = root.find('body')
+
+ return OPML(
+ get_text(head, 'title'),
+ get_text(head, 'ownerName'),
+ get_text(head, 'ownerEmail'),
+ get_datetime(head, 'dateCreated'),
+ get_datetime(head, 'dateModified'),
+ get_text(head, 'expansionState'),
+ get_int(head, 'vertScrollState'),
+ get_int(head, 'windowTop'),
+ get_int(head, 'windowLeft'),
+ get_int(head, 'windowBottom'),
+ get_int(head, 'windowRight'),
+ outlines=_get_outlines(body)
+ )
+
+
+def parse_opml_file(filename: str) -> OPML:
+ """Parse an OPML document from a local XML file."""
+ root = parse_xml(filename).getroot()
+ return _parse_opml(root)
+
+
+def parse_opml_bytes(data: bytes) -> OPML:
+ """Parse an OPML document from a byte-string containing XML data."""
+ root = parse_xml(BytesIO(data)).getroot()
+ return _parse_opml(root)
+
+
+def get_feed_list(opml_obj: OPML) -> List[str]:
+ """Walk an OPML document to extract the list of feed it contains."""
+ rv = list()
+
+ def collect(obj):
+ for outline in obj.outlines:
+ if outline.type == 'rss' and outline.xml_url:
+ rv.append(outline.xml_url)
+
+ if outline.outlines:
+ collect(outline)
+
+ collect(opml_obj)
+ return rv