mediagoblin/util.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568

# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011 Free Software Foundation, Inc
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import division

from email.MIMEText import MIMEText
import gettext
import pkg_resources
import smtplib
import sys
import re
import urllib
from math import ceil, floor
import copy
import wtforms

from babel.localedata import exists
import jinja2
import translitcodec
from webob import Response, exc
from lxml.html.clean import Cleaner
import markdown

from mediagoblin import mg_globals
from mediagoblin import messages
from mediagoblin.db.util import ObjectId

from itertools import izip, count

DISPLAY_IMAGE_FETCHING_ORDER = [u'medium', u'original', u'thumb']

TESTS_ENABLED = False
def _activate_testing():
    """
    Call this to activate testing in util.py
    """
    global TESTS_ENABLED
    TESTS_ENABLED = True


def clear_test_buckets():
    """
    We store some things for testing purposes that should be cleared
    when we want a "clean slate" of information for our next round of
    tests.  Call this function to wipe all that stuff clean.

    Also wipes out some other things we might redefine during testing,
    like the jinja envs.
    """
    global SETUP_JINJA_ENVS
    SETUP_JINJA_ENVS = {}

    global EMAIL_TEST_INBOX
    global EMAIL_TEST_MBOX_INBOX
    EMAIL_TEST_INBOX = []
    EMAIL_TEST_MBOX_INBOX = []

    clear_test_template_context()


SETUP_JINJA_ENVS = {}


def get_jinja_env(template_loader, locale):
    """
    Set up the Jinja environment, 

    (In the future we may have another system for providing theming;
    for now this is good enough.)
    """
    setup_gettext(locale)

    # If we have a jinja environment set up with this locale, just
    # return that one.
    if SETUP_JINJA_ENVS.has_key(locale):
        return SETUP_JINJA_ENVS[locale]

    template_env = jinja2.Environment(
        loader=template_loader, autoescape=True,
        extensions=['jinja2.ext.i18n', 'jinja2.ext.autoescape'])

    template_env.install_gettext_callables(
        mg_globals.translations.gettext,
        mg_globals.translations.ngettext)

    # All templates will know how to ...
    # ... fetch all waiting messages and remove them from the queue
    template_env.globals['fetch_messages'] = messages.fetch_messages

    if exists(locale):
        SETUP_JINJA_ENVS[locale] = template_env

    return template_env


# We'll store context information here when doing unit tests
TEMPLATE_TEST_CONTEXT = {}


def render_template(request, template_path, context):
    """
    Render a template with context.

    Always inserts the request into the context, so you don't have to.
    Also stores the context if we're doing unit tests.  Helpful!
    """
    template = request.template_env.get_template(
        template_path)
    context['request'] = request
    rendered = template.render(context)

    if TESTS_ENABLED:
        TEMPLATE_TEST_CONTEXT[template_path] = context

    return rendered


def clear_test_template_context():
    global TEMPLATE_TEST_CONTEXT
    TEMPLATE_TEST_CONTEXT = {}


def render_to_response(request, template, context):
    """Much like Django's shortcut.render()"""
    return Response(render_template(request, template, context))


def redirect(request, *args, **kwargs):
    """Returns a HTTPFound(), takes a request and then urlgen params"""
    
    querystring = None
    if kwargs.get('querystring'):
        querystring = kwargs.get('querystring')
        del kwargs['querystring']

    return exc.HTTPFound(
        location=''.join([
                request.urlgen(*args, **kwargs),
                querystring if querystring else '']))


def setup_user_in_request(request):
    """
    Examine a request and tack on a request.user parameter if that's
    appropriate.
    """
    if not request.session.has_key('user_id'):
        request.user = None
        return

    user = None
    user = request.app.db.User.one(
        {'_id': ObjectId(request.session['user_id'])})

    if not user:
        # Something's wrong... this user doesn't exist?  Invalidate
        # this session.
        request.session.invalidate()

    request.user = user


def import_component(import_string):
    """
    Import a module component defined by STRING.  Probably a method,
    class, or global variable.

    Args:
     - import_string: a string that defines what to import.  Written
       in the format of "module1.module2:component"
    """
    module_name, func_name = import_string.split(':', 1)
    __import__(module_name)
    module = sys.modules[module_name]
    func = getattr(module, func_name)
    return func

_punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+')

def slugify(text, delim=u'-'):
    """
    Generates an ASCII-only slug. Taken from http://flask.pocoo.org/snippets/5/
    """
    result = []
    for word in _punct_re.split(text.lower()):
        word = word.encode('translit/long')
        if word:
            result.append(word)
    return unicode(delim.join(result))

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### Special email test stuff begins HERE
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# We have two "test inboxes" here:
# 
# EMAIL_TEST_INBOX:
# ----------------
#   If you're writing test views, you'll probably want to check this.
#   It contains a list of MIMEText messages.
#
# EMAIL_TEST_MBOX_INBOX:
# ----------------------
#   This collects the messages from the FakeMhost inbox.  It's reslly
#   just here for testing the send_email method itself.
#
#   Anyway this contains:
#    - from
#    - to: a list of email recipient addresses
#    - message: not just the body, but the whole message, including
#      headers, etc.
#
# ***IMPORTANT!***
# ----------------
# Before running tests that call functions which send email, you should
# always call _clear_test_inboxes() to "wipe" the inboxes clean. 

EMAIL_TEST_INBOX = []
EMAIL_TEST_MBOX_INBOX = []


class FakeMhost(object):
    """
    Just a fake mail host so we can capture and test messages
    from send_email
    """
    def connect(self):
        pass

    def sendmail(self, from_addr, to_addrs, message):
        EMAIL_TEST_MBOX_INBOX.append(
            {'from': from_addr,
             'to': to_addrs,
             'message': message})

def _clear_test_inboxes():
    global EMAIL_TEST_INBOX
    global EMAIL_TEST_MBOX_INBOX
    EMAIL_TEST_INBOX = []
    EMAIL_TEST_MBOX_INBOX = []

### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
### </Special email test stuff>
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

def send_email(from_addr, to_addrs, subject, message_body):
    """
    Simple email sending wrapper, use this so we can capture messages
    for unit testing purposes.

    Args:
     - from_addr: address you're sending the email from
     - to_addrs: list of recipient email addresses
     - subject: subject of the email
     - message_body: email body text
    """
    # TODO: make a mock mhost if testing is enabled
    if TESTS_ENABLED or mg_globals.app_config['email_debug_mode']:
        mhost = FakeMhost()
    elif not mg_globals.app_config['email_debug_mode']:
        mhost = smtplib.SMTP()

    mhost.connect()

    message = MIMEText(message_body.encode('utf-8'), 'plain', 'utf-8')
    message['Subject'] = subject
    message['From'] = from_addr
    message['To'] = ', '.join(to_addrs)

    if TESTS_ENABLED:
        EMAIL_TEST_INBOX.append(message)

    if mg_globals.app_config['email_debug_mode']:
        print u"===== Email ====="
        print u"From address: %s" % message['From']
        print u"To addresses: %s" % message['To']
        print u"Subject: %s" % message['Subject']
        print u"-- Body: --"
        print message.get_payload(decode=True)

    return mhost.sendmail(from_addr, to_addrs, message.as_string())


###################
# Translation tools
###################


TRANSLATIONS_PATH = pkg_resources.resource_filename(
    'mediagoblin', 'translations')


def locale_to_lower_upper(locale):
    """
    Take a locale, regardless of style, and format it like "en-us"
    """
    if '-' in locale:
        lang, country = locale.split('-', 1)
        return '%s_%s' % (lang.lower(), country.upper())
    elif '_' in locale:
        lang, country = locale.split('_', 1)
        return '%s_%s' % (lang.lower(), country.upper())
    else:
        return locale.lower()


def locale_to_lower_lower(locale):
    """
    Take a locale, regardless of style, and format it like "en_US"
    """
    if '_' in locale:
        lang, country = locale.split('_', 1)
        return '%s-%s' % (lang.lower(), country.lower())
    else:
        return locale.lower()


def get_locale_from_request(request):
    """
    Figure out what target language is most appropriate based on the
    request
    """
    request_form = request.GET or request.POST

    if request_form.has_key('lang'):
        return locale_to_lower_upper(request_form['lang'])

    accept_lang_matches = request.accept_language.best_matches()

    # Your routing can explicitly specify a target language
    if request.matchdict.has_key('locale'):
        target_lang = request.matchdict['locale']
    elif request.session.has_key('target_lang'):
        target_lang = request.session['target_lang']
    # Pull the first acceptable language
    elif accept_lang_matches:
        target_lang = accept_lang_matches[0]
    # Fall back to English
    else:
        target_lang = 'en'

    return locale_to_lower_upper(target_lang)


# A super strict version of the lxml.html cleaner class
HTML_CLEANER = Cleaner(
    scripts=True,
    javascript=True,
    comments=True,
    style=True,
    links=True,
    page_structure=True,
    processing_instructions=True,
    embedded=True,
    frames=True,
    forms=True,
    annoying_tags=True,
    allow_tags=[
        'div', 'b', 'i', 'em', 'strong', 'p', 'ul', 'ol', 'li', 'a', 'br'],
    remove_unknown_tags=False, # can't be used with allow_tags
    safe_attrs_only=True,
    add_nofollow=True, # for now
    host_whitelist=(),
    whitelist_tags=set([]))


def clean_html(html):
    # clean_html barfs on an empty string
    if not html:
        return u''

    return HTML_CLEANER.clean_html(html)


def convert_to_tag_list_of_dicts(tag_string):
    """
    Filter input from incoming string containing user tags,

    Strips trailing, leading, and internal whitespace, and also converts
    the "tags" text into an array of tags
    """
    taglist = []
    if tag_string:

        # Strip out internal, trailing, and leading whitespace
        stripped_tag_string = u' '.join(tag_string.strip().split())

        # Split the tag string into a list of tags
        for tag in stripped_tag_string.split(
                                       mg_globals.app_config['tags_delimiter']):

            # Ignore empty or duplicate tags
            if tag.strip() and tag.strip() not in [t['name'] for t in taglist]:

                taglist.append({'name': tag.strip(),
                                'slug': slugify(tag.strip())})
    return taglist


def media_tags_as_string(media_entry_tags):
    """
    Generate a string from a media item's tags, stored as a list of dicts

    This is the opposite of convert_to_tag_list_of_dicts
    """
    media_tag_string = ''
    if media_entry_tags:
        media_tag_string = mg_globals.app_config['tags_delimiter'].join(
                                      [tag['name'] for tag in media_entry_tags])
    return media_tag_string

TOO_LONG_TAG_WARNING = \
    u'Tags must be shorter than %s characters.  Tags that are too long: %s'

def tag_length_validator(form, field):
    """
    Make sure tags do not exceed the maximum tag length.
    """
    tags = convert_to_tag_list_of_dicts(field.data)
    too_long_tags = [
        tag['name'] for tag in tags
        if len(tag['name']) > mg_globals.app_config['tags_max_length']]

    if too_long_tags:
        raise wtforms.ValidationError(
            TOO_LONG_TAG_WARNING % (mg_globals.app_config['tags_max_length'], \
                                    ', '.join(too_long_tags)))


MARKDOWN_INSTANCE = markdown.Markdown(safe_mode='escape')

def cleaned_markdown_conversion(text):
    """
    Take a block of text, run it through MarkDown, and clean its HTML.
    """
    # Markdown will do nothing with and clean_html can do nothing with
    # an empty string :)
    if not text:
        return u''

    return clean_html(MARKDOWN_INSTANCE.convert(text))


SETUP_GETTEXTS = {}

def setup_gettext(locale):
    """
    Setup the gettext instance based on this locale
    """
    # Later on when we have plugins we may want to enable the
    # multi-translations system they have so we can handle plugin
    # translations too

    # TODO: fallback nicely on translations from pt_PT to pt if not
    # available, etc.
    if SETUP_GETTEXTS.has_key(locale):
        this_gettext = SETUP_GETTEXTS[locale]
    else:
        this_gettext = gettext.translation(
            'mediagoblin', TRANSLATIONS_PATH, [locale], fallback=True)
        if exists(locale):
            SETUP_GETTEXTS[locale] = this_gettext

    mg_globals.setup_globals(
        translations=this_gettext)


PAGINATION_DEFAULT_PER_PAGE = 30

class Pagination(object):
    """
    Pagination class for mongodb queries.

    Initialization through __init__(self, cursor, page=1, per_page=2),
    get actual data slice through __call__().
    """

    def __init__(self, page, cursor, per_page=PAGINATION_DEFAULT_PER_PAGE,
                 jump_to_id=False):
        """
        Initializes Pagination

        Args:
         - page: requested page
         - per_page: number of objects per page
         - cursor: db cursor 
         - jump_to_id: ObjectId, sets the page to the page containing the object
           with _id == jump_to_id.
        """
        self.page = page
        self.per_page = per_page
        self.cursor = cursor
        self.total_count = self.cursor.count()
        self.active_id = None

        if jump_to_id:
            cursor = copy.copy(self.cursor)

            for (doc, increment) in izip(cursor, count(0)):
                if doc['_id'] == jump_to_id:
                    self.page = 1 + int(floor(increment / self.per_page))

                    self.active_id = jump_to_id
                    break


    def __call__(self):
        """
        Returns slice of objects for the requested page
        """
        return self.cursor.skip(
            (self.page - 1) * self.per_page).limit(self.per_page)

    @property
    def pages(self):
        return int(ceil(self.total_count / float(self.per_page)))

    @property
    def has_prev(self):
        return self.page > 1

    @property
    def has_next(self):
        return self.page < self.pages

    def iter_pages(self, left_edge=2, left_current=2,
                   right_current=5, right_edge=2):
        last = 0
        for num in xrange(1, self.pages + 1):
            if num <= left_edge or \
               (num > self.page - left_current - 1 and \
                num < self.page + right_current) or \
               num > self.pages - right_edge:
                if last + 1 != num:
                    yield None
                yield num
                last = num

    def get_page_url_explicit(self, base_url, get_params, page_no):
        """ 
        Get a page url by adding a page= parameter to the base url
        """ 
        new_get_params = copy.copy(get_params or {})
        new_get_params['page'] = page_no
        return "%s?%s" % (
            base_url, urllib.urlencode(new_get_params))

    def get_page_url(self, request, page_no):
        """ 
        Get a new page url based of the request, and the new page number.

        This is a nice wrapper around get_page_url_explicit()
        """ 
        return self.get_page_url_explicit(
            request.path_info, request.GET, page_no)