aboutsummaryrefslogtreecommitdiffstats
path: root/python/atoma/simple.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/atoma/simple.py')
-rw-r--r--python/atoma/simple.py224
1 files changed, 0 insertions, 224 deletions
diff --git a/python/atoma/simple.py b/python/atoma/simple.py
deleted file mode 100644
index 98bb3e1..0000000
--- a/python/atoma/simple.py
+++ /dev/null
@@ -1,224 +0,0 @@
-"""Simple API that abstracts away the differences between feed types."""
-
-from datetime import datetime, timedelta
-import html
-import os
-from typing import Optional, List, Tuple
-import urllib.parse
-
-import attr
-
-from . import atom, rss, json_feed
-from .exceptions import (
- FeedParseError, FeedDocumentError, FeedXMLError, FeedJSONError
-)
-
-
-@attr.s
-class Attachment:
- link: str = attr.ib()
- mime_type: Optional[str] = attr.ib()
- title: Optional[str] = attr.ib()
- size_in_bytes: Optional[int] = attr.ib()
- duration: Optional[timedelta] = attr.ib()
-
-
-@attr.s
-class Article:
- id: str = attr.ib()
- title: Optional[str] = attr.ib()
- link: Optional[str] = attr.ib()
- content: str = attr.ib()
- published_at: Optional[datetime] = attr.ib()
- updated_at: Optional[datetime] = attr.ib()
- attachments: List[Attachment] = attr.ib()
-
-
-@attr.s
-class Feed:
- title: str = attr.ib()
- subtitle: Optional[str] = attr.ib()
- link: Optional[str] = attr.ib()
- updated_at: Optional[datetime] = attr.ib()
- articles: List[Article] = attr.ib()
-
-
-def _adapt_atom_feed(atom_feed: atom.AtomFeed) -> Feed:
- articles = list()
- for entry in atom_feed.entries:
- if entry.content is not None:
- content = entry.content.value
- elif entry.summary is not None:
- content = entry.summary.value
- else:
- content = ''
- published_at, updated_at = _get_article_dates(entry.published,
- entry.updated)
- # Find article link and attachments
- article_link = None
- attachments = list()
- for candidate_link in entry.links:
- if candidate_link.rel in ('alternate', None):
- article_link = candidate_link.href
- elif candidate_link.rel == 'enclosure':
- attachments.append(Attachment(
- title=_get_attachment_title(candidate_link.title,
- candidate_link.href),
- link=candidate_link.href,
- mime_type=candidate_link.type_,
- size_in_bytes=candidate_link.length,
- duration=None
- ))
-
- if entry.title is None:
- entry_title = None
- elif entry.title.text_type in (atom.AtomTextType.html,
- atom.AtomTextType.xhtml):
- entry_title = html.unescape(entry.title.value).strip()
- else:
- entry_title = entry.title.value
-
- articles.append(Article(
- entry.id_,
- entry_title,
- article_link,
- content,
- published_at,
- updated_at,
- attachments
- ))
-
- # Find feed link
- link = None
- for candidate_link in atom_feed.links:
- if candidate_link.rel == 'self':
- link = candidate_link.href
- break
-
- return Feed(
- atom_feed.title.value if atom_feed.title else atom_feed.id_,
- atom_feed.subtitle.value if atom_feed.subtitle else None,
- link,
- atom_feed.updated,
- articles
- )
-
-
-def _adapt_rss_channel(rss_channel: rss.RSSChannel) -> Feed:
- articles = list()
- for item in rss_channel.items:
- attachments = [
- Attachment(link=e.url, mime_type=e.type, size_in_bytes=e.length,
- title=_get_attachment_title(None, e.url), duration=None)
- for e in item.enclosures
- ]
- articles.append(Article(
- item.guid or item.link,
- item.title,
- item.link,
- item.content_encoded or item.description or '',
- item.pub_date,
- None,
- attachments
- ))
-
- if rss_channel.title is None and rss_channel.link is None:
- raise FeedParseError('RSS feed does not have a title nor a link')
-
- return Feed(
- rss_channel.title if rss_channel.title else rss_channel.link,
- rss_channel.description,
- rss_channel.link,
- rss_channel.pub_date,
- articles
- )
-
-
-def _adapt_json_feed(json_feed: json_feed.JSONFeed) -> Feed:
- articles = list()
- for item in json_feed.items:
- attachments = [
- Attachment(a.url, a.mime_type,
- _get_attachment_title(a.title, a.url),
- a.size_in_bytes, a.duration)
- for a in item.attachments
- ]
- articles.append(Article(
- item.id_,
- item.title,
- item.url,
- item.content_html or item.content_text or '',
- item.date_published,
- item.date_modified,
- attachments
- ))
-
- return Feed(
- json_feed.title,
- json_feed.description,
- json_feed.feed_url,
- None,
- articles
- )
-
-
-def _get_article_dates(published_at: Optional[datetime],
- updated_at: Optional[datetime]
- ) -> Tuple[Optional[datetime], Optional[datetime]]:
- if published_at and updated_at:
- return published_at, updated_at
-
- if updated_at:
- return updated_at, None
-
- if published_at:
- return published_at, None
-
- raise FeedParseError('Article does not have proper dates')
-
-
-def _get_attachment_title(attachment_title: Optional[str], link: str) -> str:
- if attachment_title:
- return attachment_title
-
- parsed_link = urllib.parse.urlparse(link)
- return os.path.basename(parsed_link.path)
-
-
-def _simple_parse(pairs, content) -> Feed:
- is_xml = True
- is_json = True
- for parser, adapter in pairs:
- try:
- return adapter(parser(content))
- except FeedXMLError:
- is_xml = False
- except FeedJSONError:
- is_json = False
- except FeedParseError:
- continue
-
- if not is_xml and not is_json:
- raise FeedDocumentError('File is not a supported feed type')
-
- raise FeedParseError('File is not a valid supported feed')
-
-
-def simple_parse_file(filename: str) -> Feed:
- """Parse an Atom, RSS or JSON feed from a local file."""
- pairs = (
- (rss.parse_rss_file, _adapt_rss_channel),
- (atom.parse_atom_file, _adapt_atom_feed),
- (json_feed.parse_json_feed_file, _adapt_json_feed)
- )
- return _simple_parse(pairs, filename)
-
-
-def simple_parse_bytes(data: bytes) -> Feed:
- """Parse an Atom, RSS or JSON feed from a byte-string containing data."""
- pairs = (
- (rss.parse_rss_bytes, _adapt_rss_channel),
- (atom.parse_atom_bytes, _adapt_atom_feed),
- (json_feed.parse_json_feed_bytes, _adapt_json_feed)
- )
- return _simple_parse(pairs, data)