aboutsummaryrefslogtreecommitdiffstats
path: root/python/atoma/rss.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/atoma/rss.py')
-rw-r--r--python/atoma/rss.py221
1 files changed, 0 insertions, 221 deletions
diff --git a/python/atoma/rss.py b/python/atoma/rss.py
deleted file mode 100644
index f447a2f..0000000
--- a/python/atoma/rss.py
+++ /dev/null
@@ -1,221 +0,0 @@
-from datetime import datetime
-from io import BytesIO
-from typing import Optional, List
-from xml.etree.ElementTree import Element
-
-import attr
-
-from .utils import (
- parse_xml, get_child, get_text, get_int, get_datetime, FeedParseError
-)
-
-
-@attr.s
-class RSSImage:
- url: str = attr.ib()
- title: Optional[str] = attr.ib()
- link: str = attr.ib()
- width: int = attr.ib()
- height: int = attr.ib()
- description: Optional[str] = attr.ib()
-
-
-@attr.s
-class RSSEnclosure:
- url: str = attr.ib()
- length: Optional[int] = attr.ib()
- type: Optional[str] = attr.ib()
-
-
-@attr.s
-class RSSSource:
- title: str = attr.ib()
- url: Optional[str] = attr.ib()
-
-
-@attr.s
-class RSSItem:
- title: Optional[str] = attr.ib()
- link: Optional[str] = attr.ib()
- description: Optional[str] = attr.ib()
- author: Optional[str] = attr.ib()
- categories: List[str] = attr.ib()
- comments: Optional[str] = attr.ib()
- enclosures: List[RSSEnclosure] = attr.ib()
- guid: Optional[str] = attr.ib()
- pub_date: Optional[datetime] = attr.ib()
- source: Optional[RSSSource] = attr.ib()
-
- # Extension
- content_encoded: Optional[str] = attr.ib()
-
-
-@attr.s
-class RSSChannel:
- title: Optional[str] = attr.ib()
- link: Optional[str] = attr.ib()
- description: Optional[str] = attr.ib()
- language: Optional[str] = attr.ib()
- copyright: Optional[str] = attr.ib()
- managing_editor: Optional[str] = attr.ib()
- web_master: Optional[str] = attr.ib()
- pub_date: Optional[datetime] = attr.ib()
- last_build_date: Optional[datetime] = attr.ib()
- categories: List[str] = attr.ib()
- generator: Optional[str] = attr.ib()
- docs: Optional[str] = attr.ib()
- ttl: Optional[int] = attr.ib()
- image: Optional[RSSImage] = attr.ib()
-
- items: List[RSSItem] = attr.ib()
-
- # Extension
- content_encoded: Optional[str] = attr.ib()
-
-
-def _get_image(element: Element, name,
- optional: bool=True) -> Optional[RSSImage]:
- child = get_child(element, name, optional)
- if child is None:
- return None
-
- return RSSImage(
- get_text(child, 'url', optional=False),
- get_text(child, 'title'),
- get_text(child, 'link', optional=False),
- get_int(child, 'width') or 88,
- get_int(child, 'height') or 31,
- get_text(child, 'description')
- )
-
-
-def _get_source(element: Element, name,
- optional: bool=True) -> Optional[RSSSource]:
- child = get_child(element, name, optional)
- if child is None:
- return None
-
- return RSSSource(
- child.text.strip(),
- child.attrib.get('url'),
- )
-
-
-def _get_enclosure(element: Element) -> RSSEnclosure:
- length = element.attrib.get('length')
- try:
- length = int(length)
- except (TypeError, ValueError):
- length = None
-
- return RSSEnclosure(
- element.attrib['url'],
- length,
- element.attrib.get('type'),
- )
-
-
-def _get_link(element: Element) -> Optional[str]:
- """Attempt to retrieve item link.
-
- Use the GUID as a fallback if it is a permalink.
- """
- link = get_text(element, 'link')
- if link is not None:
- return link
-
- guid = get_child(element, 'guid')
- if guid is not None and guid.attrib.get('isPermaLink') == 'true':
- return get_text(element, 'guid')
-
- return None
-
-
-def _get_item(element: Element) -> RSSItem:
- root = element
-
- title = get_text(root, 'title')
- link = _get_link(root)
- description = get_text(root, 'description')
- author = get_text(root, 'author')
- categories = [e.text for e in root.findall('category')]
- comments = get_text(root, 'comments')
- enclosure = [_get_enclosure(e) for e in root.findall('enclosure')]
- guid = get_text(root, 'guid')
- pub_date = get_datetime(root, 'pubDate')
- source = _get_source(root, 'source')
-
- content_encoded = get_text(root, 'content:encoded')
-
- return RSSItem(
- title,
- link,
- description,
- author,
- categories,
- comments,
- enclosure,
- guid,
- pub_date,
- source,
- content_encoded
- )
-
-
-def _parse_rss(root: Element) -> RSSChannel:
- rss_version = root.get('version')
- if rss_version != '2.0':
- raise FeedParseError('Cannot process RSS feed version "{}"'
- .format(rss_version))
-
- root = root.find('channel')
-
- title = get_text(root, 'title')
- link = get_text(root, 'link')
- description = get_text(root, 'description')
- language = get_text(root, 'language')
- copyright = get_text(root, 'copyright')
- managing_editor = get_text(root, 'managingEditor')
- web_master = get_text(root, 'webMaster')
- pub_date = get_datetime(root, 'pubDate')
- last_build_date = get_datetime(root, 'lastBuildDate')
- categories = [e.text for e in root.findall('category')]
- generator = get_text(root, 'generator')
- docs = get_text(root, 'docs')
- ttl = get_int(root, 'ttl')
-
- image = _get_image(root, 'image')
- items = [_get_item(e) for e in root.findall('item')]
-
- content_encoded = get_text(root, 'content:encoded')
-
- return RSSChannel(
- title,
- link,
- description,
- language,
- copyright,
- managing_editor,
- web_master,
- pub_date,
- last_build_date,
- categories,
- generator,
- docs,
- ttl,
- image,
- items,
- content_encoded
- )
-
-
-def parse_rss_file(filename: str) -> RSSChannel:
- """Parse an RSS feed from a local XML file."""
- root = parse_xml(filename).getroot()
- return _parse_rss(root)
-
-
-def parse_rss_bytes(data: bytes) -> RSSChannel:
- """Parse an RSS feed from a byte-string containing XML data."""
- root = parse_xml(BytesIO(data)).getroot()
- return _parse_rss(root)