aboutsummaryrefslogtreecommitdiffstats
path: root/python/atoma/opml.py
blob: a73105e044a7f7b77b56c4e05f07fd4557378932 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from datetime import datetime
from io import BytesIO
from typing import Optional, List
from xml.etree.ElementTree import Element

import attr

from .utils import parse_xml, get_text, get_int, get_datetime


@attr.s
class OPMLOutline:
    text: Optional[str] = attr.ib()
    type: Optional[str] = attr.ib()
    xml_url: Optional[str] = attr.ib()
    description: Optional[str] = attr.ib()
    html_url: Optional[str] = attr.ib()
    language: Optional[str] = attr.ib()
    title: Optional[str] = attr.ib()
    version: Optional[str] = attr.ib()

    outlines: List['OPMLOutline'] = attr.ib()


@attr.s
class OPML:
    title: Optional[str] = attr.ib()
    owner_name: Optional[str] = attr.ib()
    owner_email: Optional[str] = attr.ib()
    date_created: Optional[datetime] = attr.ib()
    date_modified: Optional[datetime] = attr.ib()
    expansion_state: Optional[str] = attr.ib()

    vertical_scroll_state: Optional[int] = attr.ib()
    window_top: Optional[int] = attr.ib()
    window_left: Optional[int] = attr.ib()
    window_bottom: Optional[int] = attr.ib()
    window_right: Optional[int] = attr.ib()

    outlines: List[OPMLOutline] = attr.ib()


def _get_outlines(element: Element) -> List[OPMLOutline]:
    rv = list()

    for outline in element.findall('outline'):
        rv.append(OPMLOutline(
            outline.attrib.get('text'),
            outline.attrib.get('type'),
            outline.attrib.get('xmlUrl'),
            outline.attrib.get('description'),
            outline.attrib.get('htmlUrl'),
            outline.attrib.get('language'),
            outline.attrib.get('title'),
            outline.attrib.get('version'),
            _get_outlines(outline)
        ))

    return rv


def _parse_opml(root: Element) -> OPML:
    head = root.find('head')
    body = root.find('body')

    return OPML(
        get_text(head, 'title'),
        get_text(head, 'ownerName'),
        get_text(head, 'ownerEmail'),
        get_datetime(head, 'dateCreated'),
        get_datetime(head, 'dateModified'),
        get_text(head, 'expansionState'),
        get_int(head, 'vertScrollState'),
        get_int(head, 'windowTop'),
        get_int(head, 'windowLeft'),
        get_int(head, 'windowBottom'),
        get_int(head, 'windowRight'),
        outlines=_get_outlines(body)
    )


def parse_opml_file(filename: str) -> OPML:
    """Parse an OPML document from a local XML file."""
    root = parse_xml(filename).getroot()
    return _parse_opml(root)


def parse_opml_bytes(data: bytes) -> OPML:
    """Parse an OPML document from a byte-string containing XML data."""
    root = parse_xml(BytesIO(data)).getroot()
    return _parse_opml(root)


def get_feed_list(opml_obj: OPML) -> List[str]:
    """Walk an OPML document to extract the list of feed it contains."""
    rv = list()

    def collect(obj):
        for outline in obj.outlines:
            if outline.type == 'rss' and outline.xml_url:
                rv.append(outline.xml_url)

            if outline.outlines:
                collect(outline)

    collect(opml_obj)
    return rv