aboutsummaryrefslogtreecommitdiffstats
path: root/mediagoblin/plugins/trim_whitespace/__init__.py
blob: 713d13d509a67179e8771e65c8101956d8a67d76 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# GNU MediaGoblin -- federated, autonomous media hosting
# Copyright (C) 2011, 2012 MediaGoblin contributors.  See AUTHORS.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
import logging
import re

from mediagoblin import meddleware

_log = logging.getLogger(__name__)

class TrimWhiteSpaceMeddleware(meddleware.BaseMeddleware):
    _setup_plugin_called = 0
    RE_MULTI_WHITESPACE = re.compile(br'(\s)\s+', re.M)

    def process_response(self, request, response):
        """Perform very naive html tidying by removing multiple whitespaces"""
        # werkzeug.BaseResponse has no content_type attr, this comes via
        # werkzeug.wrappers.CommonRequestDescriptorsMixin (part of
        # wrappers.Response)
        if getattr(response ,'content_type', None) != 'text/html':
            return

        # This is a tad more complex than needed to be able to handle
        # response.data and response.body, depending on whether we have
        # a werkzeug Resonse or a webob one. Let's kill webob soon!
        if hasattr(response, 'body') and not hasattr(response, 'data'):
            # Old-style webob Response object.
            # TODO: Remove this once we transition away from webob
            resp_attr = 'body'
        else:
            resp_attr = 'data'
            # Don't flatten iterator to list when we fudge the response body
            # (see werkzeug.Response documentation)
            response.implicit_sequence_conversion = False

        # Set the tidied text. Very naive tidying for now, just strip all
        # subsequent whitespaces (this preserves most newlines)
        setattr(response, resp_attr, re.sub(
                TrimWhiteSpaceMeddleware.RE_MULTI_WHITESPACE, br'\1',
                getattr(response, resp_attr)))

    @classmethod
    def setup_plugin(cls):
        """Set up this meddleware as a plugin during 'setup' hook"""
        global _log
        if cls._setup_plugin_called:
            _log.info('Trim whitespace plugin was already set up.')
            return

        _log.debug('Trim whitespace plugin set up.')
        cls._setup_plugin_called += 1

        # Append ourselves to the list of enabled Meddlewares
        meddleware.ENABLED_MEDDLEWARE.append(
            f'{cls.__module__}:{cls.__name__}')


hooks = {
    'setup': TrimWhiteSpaceMeddleware.setup_plugin
    }