aboutsummaryrefslogtreecommitdiffstats
path: root/plugins/pelican_comments/wp2comments
blob: 365cfff64919772c526b351107de64dbb8b3ea2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/python

from codecs import open
import sys
import os
from lxml import etree

COMMENT_DIR = 'comments'


def wp2comments(xml):

    # xmlfile = open(xml, encoding='utf-8', mode='r').read()
    tree = etree.parse(xml)
    root = tree.getroot()
    items = root.findall('.//item')

    n = 0

    if not os.path.exists(COMMENT_DIR):
        os.makedirs(COMMENT_DIR)
    elif not os.path.isdir(COMMENT_DIR):
        print('"%s" exists but is not a directory!' % COMMENT_DIR)
        sys.exit(1)

    for item in items:
        title = item.find('title')
        if title is None:
            continue

        # Only fetch comments from published posts.
        status = item.find('wp:status', namespaces=root.nsmap)
        if status is None or status.text != 'publish':
            continue

        slug = item.find('wp:post_name', namespaces=root.nsmap)
        if slug is None:
            print('WARNING: skipping "%s" with no post_name')
            continue
        slug = slug.text

        comments = item.findall('wp:comment', namespaces=root.nsmap)
        if not comments:
            # No comments found for this post.
            continue

        for comment in comments:
            def comment_tag(tag):
                result = comment.find(tag, namespaces=root.nsmap)
                if result is None:
                    return ''
                else:
                    return result.text

            status = comment_tag('wp:comment_approved')
            if status != '1':
                continue

            author = comment_tag('wp:comment_author')
            ip = comment_tag('wp:comment_author_IP')
            date = comment_tag('wp:comment_date_gmt')
            email = comment_tag('wp:comment_author_email')
            url = comment_tag('wp:comment_author_url')
            content = comment_tag('wp:comment_content')

            n += 1
            f = open(os.path.join(COMMENT_DIR, '%s-%d.md' % (slug, n)),
                     encoding='utf-8', mode='w')
            f.write(u'post_id: %s\n' % (slug, ))
            f.write(u'Author: %s\n' % (author, ))
            f.write(u'Date: %s\n' % (date, ))
            f.write(u'Author_Email: %s\n' % (email, ))
            f.write(u'Author_IP: %s\n' % (ip, ))
            f.write(u'Web: %s\n' % (url, ))
            f.write(u'\n%s\n' % (content, ))
            f.close()


if __name__ == '__main__':
    wp2comments(sys.argv[1])