diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-09-06 15:45:01 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-09-06 15:45:01 -0700 |
commit | ac32b24b2a011292b704a3f27e8fd08a7ae9424b (patch) | |
tree | 0d6e021519dee62089733e20880c65cdb85d8841 /python/atoma/opml.py | |
parent | 7a93acabb3f5a8dd95ec0d56ae57cc34eb57c1b8 (diff) | |
parent | c393031ac54af959561214c8b1d6b22647a81b89 (diff) | |
download | yt-local-ac32b24b2a011292b704a3f27e8fd08a7ae9424b.tar.lz yt-local-ac32b24b2a011292b704a3f27e8fd08a7ae9424b.tar.xz yt-local-ac32b24b2a011292b704a3f27e8fd08a7ae9424b.zip |
Merge subscriptions into master
Diffstat (limited to 'python/atoma/opml.py')
-rw-r--r-- | python/atoma/opml.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/python/atoma/opml.py b/python/atoma/opml.py new file mode 100644 index 0000000..a73105e --- /dev/null +++ b/python/atoma/opml.py @@ -0,0 +1,107 @@ +from datetime import datetime +from io import BytesIO +from typing import Optional, List +from xml.etree.ElementTree import Element + +import attr + +from .utils import parse_xml, get_text, get_int, get_datetime + + +@attr.s +class OPMLOutline: + text: Optional[str] = attr.ib() + type: Optional[str] = attr.ib() + xml_url: Optional[str] = attr.ib() + description: Optional[str] = attr.ib() + html_url: Optional[str] = attr.ib() + language: Optional[str] = attr.ib() + title: Optional[str] = attr.ib() + version: Optional[str] = attr.ib() + + outlines: List['OPMLOutline'] = attr.ib() + + +@attr.s +class OPML: + title: Optional[str] = attr.ib() + owner_name: Optional[str] = attr.ib() + owner_email: Optional[str] = attr.ib() + date_created: Optional[datetime] = attr.ib() + date_modified: Optional[datetime] = attr.ib() + expansion_state: Optional[str] = attr.ib() + + vertical_scroll_state: Optional[int] = attr.ib() + window_top: Optional[int] = attr.ib() + window_left: Optional[int] = attr.ib() + window_bottom: Optional[int] = attr.ib() + window_right: Optional[int] = attr.ib() + + outlines: List[OPMLOutline] = attr.ib() + + +def _get_outlines(element: Element) -> List[OPMLOutline]: + rv = list() + + for outline in element.findall('outline'): + rv.append(OPMLOutline( + outline.attrib.get('text'), + outline.attrib.get('type'), + outline.attrib.get('xmlUrl'), + outline.attrib.get('description'), + outline.attrib.get('htmlUrl'), + outline.attrib.get('language'), + outline.attrib.get('title'), + outline.attrib.get('version'), + _get_outlines(outline) + )) + + return rv + + +def _parse_opml(root: Element) -> OPML: + head = root.find('head') + body = root.find('body') + + return OPML( + get_text(head, 'title'), + get_text(head, 'ownerName'), + get_text(head, 'ownerEmail'), + get_datetime(head, 'dateCreated'), + get_datetime(head, 'dateModified'), + get_text(head, 'expansionState'), + get_int(head, 'vertScrollState'), + get_int(head, 'windowTop'), + get_int(head, 'windowLeft'), + get_int(head, 'windowBottom'), + get_int(head, 'windowRight'), + outlines=_get_outlines(body) + ) + + +def parse_opml_file(filename: str) -> OPML: + """Parse an OPML document from a local XML file.""" + root = parse_xml(filename).getroot() + return _parse_opml(root) + + +def parse_opml_bytes(data: bytes) -> OPML: + """Parse an OPML document from a byte-string containing XML data.""" + root = parse_xml(BytesIO(data)).getroot() + return _parse_opml(root) + + +def get_feed_list(opml_obj: OPML) -> List[str]: + """Walk an OPML document to extract the list of feed it contains.""" + rv = list() + + def collect(obj): + for outline in obj.outlines: + if outline.type == 'rss' and outline.xml_url: + rv.append(outline.xml_url) + + if outline.outlines: + collect(outline) + + collect(opml_obj) + return rv |