aboutsummaryrefslogtreecommitdiffstats
path: root/python/werkzeug/middleware/lint.py
blob: 98f958177efa7bc6a840afb6a18461418cbd6202 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
"""
WSGI Protocol Linter
====================

This module provides a middleware that performs sanity checks on the
behavior of the WSGI server and application. It checks that the
:pep:`3333` WSGI spec is properly implemented. It also warns on some
common HTTP errors such as non-empty responses for 304 status codes.

.. autoclass:: LintMiddleware

:copyright: 2007 Pallets
:license: BSD-3-Clause
"""
from warnings import warn

from .._compat import implements_iterator
from .._compat import PY2
from .._compat import string_types
from ..datastructures import Headers
from ..http import is_entity_header
from ..wsgi import FileWrapper

try:
    from urllib.parse import urlparse
except ImportError:
    from urlparse import urlparse


class WSGIWarning(Warning):
    """Warning class for WSGI warnings."""


class HTTPWarning(Warning):
    """Warning class for HTTP warnings."""


def check_string(context, obj, stacklevel=3):
    if type(obj) is not str:
        warn(
            "'%s' requires strings, got '%s'" % (context, type(obj).__name__),
            WSGIWarning,
        )


class InputStream(object):
    def __init__(self, stream):
        self._stream = stream

    def read(self, *args):
        if len(args) == 0:
            warn(
                "WSGI does not guarantee an EOF marker on the input stream, thus making"
                " calls to 'wsgi.input.read()' unsafe. Conforming servers may never"
                " return from this call.",
                WSGIWarning,
                stacklevel=2,
            )
        elif len(args) != 1:
            warn(
                "Too many parameters passed to 'wsgi.input.read()'.",
                WSGIWarning,
                stacklevel=2,
            )
        return self._stream.read(*args)

    def readline(self, *args):
        if len(args) == 0:
            warn(
                "Calls to 'wsgi.input.readline()' without arguments are unsafe. Use"
                " 'wsgi.input.read()' instead.",
                WSGIWarning,
                stacklevel=2,
            )
        elif len(args) == 1:
            warn(
                "'wsgi.input.readline()' was called with a size hint. WSGI does not"
                " support this, although it's available on all major servers.",
                WSGIWarning,
                stacklevel=2,
            )
        else:
            raise TypeError("Too many arguments passed to 'wsgi.input.readline()'.")
        return self._stream.readline(*args)

    def __iter__(self):
        try:
            return iter(self._stream)
        except TypeError:
            warn("'wsgi.input' is not iterable.", WSGIWarning, stacklevel=2)
            return iter(())

    def close(self):
        warn("The application closed the input stream!", WSGIWarning, stacklevel=2)
        self._stream.close()


class ErrorStream(object):
    def __init__(self, stream):
        self._stream = stream

    def write(self, s):
        check_string("wsgi.error.write()", s)
        self._stream.write(s)

    def flush(self):
        self._stream.flush()

    def writelines(self, seq):
        for line in seq:
            self.write(line)

    def close(self):
        warn("The application closed the error stream!", WSGIWarning, stacklevel=2)
        self._stream.close()


class GuardedWrite(object):
    def __init__(self, write, chunks):
        self._write = write
        self._chunks = chunks

    def __call__(self, s):
        check_string("write()", s)
        self._write.write(s)
        self._chunks.append(len(s))


@implements_iterator
class GuardedIterator(object):
    def __init__(self, iterator, headers_set, chunks):
        self._iterator = iterator
        if PY2:
            self._next = iter(iterator).next
        else:
            self._next = iter(iterator).__next__
        self.closed = False
        self.headers_set = headers_set
        self.chunks = chunks

    def __iter__(self):
        return self

    def __next__(self):
        if self.closed:
            warn("Iterated over closed 'app_iter'.", WSGIWarning, stacklevel=2)

        rv = self._next()

        if not self.headers_set:
            warn(
                "The application returned before it started the response.",
                WSGIWarning,
                stacklevel=2,
            )

        check_string("application iterator items", rv)
        self.chunks.append(len(rv))
        return rv

    def close(self):
        self.closed = True

        if hasattr(self._iterator, "close"):
            self._iterator.close()

        if self.headers_set:
            status_code, headers = self.headers_set
            bytes_sent = sum(self.chunks)
            content_length = headers.get("content-length", type=int)

            if status_code == 304:
                for key, _value in headers:
                    key = key.lower()
                    if key not in ("expires", "content-location") and is_entity_header(
                        key
                    ):
                        warn(
                            "Entity header %r found in 304 response." % key, HTTPWarning
                        )
                if bytes_sent:
                    warn("304 responses must not have a body.", HTTPWarning)
            elif 100 <= status_code < 200 or status_code == 204:
                if content_length != 0:
                    warn(
                        "%r responses must have an empty content length." % status_code,
                        HTTPWarning,
                    )
                if bytes_sent:
                    warn(
                        "%r responses must not have a body." % status_code, HTTPWarning
                    )
            elif content_length is not None and content_length != bytes_sent:
                warn(
                    "Content-Length and the number of bytes sent to the client do not"
                    " match.",
                    WSGIWarning,
                )

    def __del__(self):
        if not self.closed:
            try:
                warn(
                    "Iterator was garbage collected before it was closed.", WSGIWarning
                )
            except Exception:
                pass


class LintMiddleware(object):
    """Warns about common errors in the WSGI and HTTP behavior of the
    server and wrapped application. Some of the issues it check are:

    -   invalid status codes
    -   non-bytestrings sent to the WSGI server
    -   strings returned from the WSGI application
    -   non-empty conditional responses
    -   unquoted etags
    -   relative URLs in the Location header
    -   unsafe calls to wsgi.input
    -   unclosed iterators

    Error information is emitted using the :mod:`warnings` module.

    :param app: The WSGI application to wrap.

    .. code-block:: python

        from werkzeug.middleware.lint import LintMiddleware
        app = LintMiddleware(app)
    """

    def __init__(self, app):
        self.app = app

    def check_environ(self, environ):
        if type(environ) is not dict:
            warn(
                "WSGI environment is not a standard Python dict.",
                WSGIWarning,
                stacklevel=4,
            )
        for key in (
            "REQUEST_METHOD",
            "SERVER_NAME",
            "SERVER_PORT",
            "wsgi.version",
            "wsgi.input",
            "wsgi.errors",
            "wsgi.multithread",
            "wsgi.multiprocess",
            "wsgi.run_once",
        ):
            if key not in environ:
                warn(
                    "Required environment key %r not found" % key,
                    WSGIWarning,
                    stacklevel=3,
                )
        if environ["wsgi.version"] != (1, 0):
            warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3)

        script_name = environ.get("SCRIPT_NAME", "")
        path_info = environ.get("PATH_INFO", "")

        if script_name and script_name[0] != "/":
            warn(
                "'SCRIPT_NAME' does not start with a slash: %r" % script_name,
                WSGIWarning,
                stacklevel=3,
            )

        if path_info and path_info[0] != "/":
            warn(
                "'PATH_INFO' does not start with a slash: %r" % path_info,
                WSGIWarning,
                stacklevel=3,
            )

    def check_start_response(self, status, headers, exc_info):
        check_string("status", status)
        status_code = status.split(None, 1)[0]

        if len(status_code) != 3 or not status_code.isdigit():
            warn(WSGIWarning("Status code must be three digits"), stacklevel=3)

        if len(status) < 4 or status[3] != " ":
            warn(
                WSGIWarning(
                    "Invalid value for status %r.  Valid "
                    "status strings are three digits, a space "
                    "and a status explanation"
                ),
                stacklevel=3,
            )

        status_code = int(status_code)

        if status_code < 100:
            warn(WSGIWarning("status code < 100 detected"), stacklevel=3)

        if type(headers) is not list:
            warn(WSGIWarning("header list is not a list"), stacklevel=3)

        for item in headers:
            if type(item) is not tuple or len(item) != 2:
                warn(WSGIWarning("Headers must tuple 2-item tuples"), stacklevel=3)
            name, value = item
            if type(name) is not str or type(value) is not str:
                warn(WSGIWarning("header items must be strings"), stacklevel=3)
            if name.lower() == "status":
                warn(
                    WSGIWarning(
                        "The status header is not supported due to "
                        "conflicts with the CGI spec."
                    ),
                    stacklevel=3,
                )

        if exc_info is not None and not isinstance(exc_info, tuple):
            warn(WSGIWarning("invalid value for exc_info"), stacklevel=3)

        headers = Headers(headers)
        self.check_headers(headers)

        return status_code, headers

    def check_headers(self, headers):
        etag = headers.get("etag")

        if etag is not None:
            if etag.startswith(("W/", "w/")):
                if etag.startswith("w/"):
                    warn(
                        HTTPWarning("weak etag indicator should be upcase."),
                        stacklevel=4,
                    )

                etag = etag[2:]

            if not (etag[:1] == etag[-1:] == '"'):
                warn(HTTPWarning("unquoted etag emitted."), stacklevel=4)

        location = headers.get("location")

        if location is not None:
            if not urlparse(location).netloc:
                warn(
                    HTTPWarning("absolute URLs required for location header"),
                    stacklevel=4,
                )

    def check_iterator(self, app_iter):
        if isinstance(app_iter, string_types):
            warn(
                "The application returned astring. The response will send one character"
                " at a time to the client, which will kill performance. Return a list"
                " or iterable instead.",
                WSGIWarning,
                stacklevel=3,
            )

    def __call__(self, *args, **kwargs):
        if len(args) != 2:
            warn("A WSGI app takes two arguments.", WSGIWarning, stacklevel=2)

        if kwargs:
            warn(
                "A WSGI app does not take keyword arguments.", WSGIWarning, stacklevel=2
            )

        environ, start_response = args

        self.check_environ(environ)
        environ["wsgi.input"] = InputStream(environ["wsgi.input"])
        environ["wsgi.errors"] = ErrorStream(environ["wsgi.errors"])

        # Hook our own file wrapper in so that applications will always
        # iterate to the end and we can check the content length.
        environ["wsgi.file_wrapper"] = FileWrapper

        headers_set = []
        chunks = []

        def checking_start_response(*args, **kwargs):
            if len(args) not in (2, 3):
                warn(
                    "Invalid number of arguments: %s, expected 2 or 3." % len(args),
                    WSGIWarning,
                    stacklevel=2,
                )

            if kwargs:
                warn("'start_response' does not take keyword arguments.", WSGIWarning)

            status, headers = args[:2]

            if len(args) == 3:
                exc_info = args[2]
            else:
                exc_info = None

            headers_set[:] = self.check_start_response(status, headers, exc_info)
            return GuardedWrite(start_response(status, headers, exc_info), chunks)

        app_iter = self.app(environ, checking_start_response)
        self.check_iterator(app_iter)
        return GuardedIterator(app_iter, headers_set, chunks)