aboutsummaryrefslogtreecommitdiffstats
path: root/python/werkzeug/wsgi.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/werkzeug/wsgi.py')
-rw-r--r--python/werkzeug/wsgi.py1067
1 files changed, 1067 insertions, 0 deletions
diff --git a/python/werkzeug/wsgi.py b/python/werkzeug/wsgi.py
new file mode 100644
index 0000000..f069f2d
--- /dev/null
+++ b/python/werkzeug/wsgi.py
@@ -0,0 +1,1067 @@
+# -*- coding: utf-8 -*-
+"""
+ werkzeug.wsgi
+ ~~~~~~~~~~~~~
+
+ This module implements WSGI related helpers.
+
+ :copyright: 2007 Pallets
+ :license: BSD-3-Clause
+"""
+import io
+import re
+import warnings
+from functools import partial
+from functools import update_wrapper
+from itertools import chain
+
+from ._compat import BytesIO
+from ._compat import implements_iterator
+from ._compat import make_literal_wrapper
+from ._compat import string_types
+from ._compat import text_type
+from ._compat import to_bytes
+from ._compat import to_unicode
+from ._compat import try_coerce_native
+from ._compat import wsgi_get_bytes
+from ._internal import _encode_idna
+from .urls import uri_to_iri
+from .urls import url_join
+from .urls import url_parse
+from .urls import url_quote
+
+
+def responder(f):
+ """Marks a function as responder. Decorate a function with it and it
+ will automatically call the return value as WSGI application.
+
+ Example::
+
+ @responder
+ def application(environ, start_response):
+ return Response('Hello World!')
+ """
+ return update_wrapper(lambda *a: f(*a)(*a[-2:]), f)
+
+
+def get_current_url(
+ environ,
+ root_only=False,
+ strip_querystring=False,
+ host_only=False,
+ trusted_hosts=None,
+):
+ """A handy helper function that recreates the full URL as IRI for the
+ current request or parts of it. Here's an example:
+
+ >>> from werkzeug.test import create_environ
+ >>> env = create_environ("/?param=foo", "http://localhost/script")
+ >>> get_current_url(env)
+ 'http://localhost/script/?param=foo'
+ >>> get_current_url(env, root_only=True)
+ 'http://localhost/script/'
+ >>> get_current_url(env, host_only=True)
+ 'http://localhost/'
+ >>> get_current_url(env, strip_querystring=True)
+ 'http://localhost/script/'
+
+ This optionally it verifies that the host is in a list of trusted hosts.
+ If the host is not in there it will raise a
+ :exc:`~werkzeug.exceptions.SecurityError`.
+
+ Note that the string returned might contain unicode characters as the
+ representation is an IRI not an URI. If you need an ASCII only
+ representation you can use the :func:`~werkzeug.urls.iri_to_uri`
+ function:
+
+ >>> from werkzeug.urls import iri_to_uri
+ >>> iri_to_uri(get_current_url(env))
+ 'http://localhost/script/?param=foo'
+
+ :param environ: the WSGI environment to get the current URL from.
+ :param root_only: set `True` if you only want the root URL.
+ :param strip_querystring: set to `True` if you don't want the querystring.
+ :param host_only: set to `True` if the host URL should be returned.
+ :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted`
+ for more information.
+ """
+ tmp = [environ["wsgi.url_scheme"], "://", get_host(environ, trusted_hosts)]
+ cat = tmp.append
+ if host_only:
+ return uri_to_iri("".join(tmp) + "/")
+ cat(url_quote(wsgi_get_bytes(environ.get("SCRIPT_NAME", ""))).rstrip("/"))
+ cat("/")
+ if not root_only:
+ cat(url_quote(wsgi_get_bytes(environ.get("PATH_INFO", "")).lstrip(b"/")))
+ if not strip_querystring:
+ qs = get_query_string(environ)
+ if qs:
+ cat("?" + qs)
+ return uri_to_iri("".join(tmp))
+
+
+def host_is_trusted(hostname, trusted_list):
+ """Checks if a host is trusted against a list. This also takes care
+ of port normalization.
+
+ .. versionadded:: 0.9
+
+ :param hostname: the hostname to check
+ :param trusted_list: a list of hostnames to check against. If a
+ hostname starts with a dot it will match against
+ all subdomains as well.
+ """
+ if not hostname:
+ return False
+
+ if isinstance(trusted_list, string_types):
+ trusted_list = [trusted_list]
+
+ def _normalize(hostname):
+ if ":" in hostname:
+ hostname = hostname.rsplit(":", 1)[0]
+ return _encode_idna(hostname)
+
+ try:
+ hostname = _normalize(hostname)
+ except UnicodeError:
+ return False
+ for ref in trusted_list:
+ if ref.startswith("."):
+ ref = ref[1:]
+ suffix_match = True
+ else:
+ suffix_match = False
+ try:
+ ref = _normalize(ref)
+ except UnicodeError:
+ return False
+ if ref == hostname:
+ return True
+ if suffix_match and hostname.endswith(b"." + ref):
+ return True
+ return False
+
+
+def get_host(environ, trusted_hosts=None):
+ """Return the host for the given WSGI environment. This first checks
+ the ``Host`` header. If it's not present, then ``SERVER_NAME`` and
+ ``SERVER_PORT`` are used. The host will only contain the port if it
+ is different than the standard port for the protocol.
+
+ Optionally, verify that the host is trusted using
+ :func:`host_is_trusted` and raise a
+ :exc:`~werkzeug.exceptions.SecurityError` if it is not.
+
+ :param environ: The WSGI environment to get the host from.
+ :param trusted_hosts: A list of trusted hosts.
+ :return: Host, with port if necessary.
+ :raise ~werkzeug.exceptions.SecurityError: If the host is not
+ trusted.
+ """
+ if "HTTP_HOST" in environ:
+ rv = environ["HTTP_HOST"]
+ if environ["wsgi.url_scheme"] == "http" and rv.endswith(":80"):
+ rv = rv[:-3]
+ elif environ["wsgi.url_scheme"] == "https" and rv.endswith(":443"):
+ rv = rv[:-4]
+ else:
+ rv = environ["SERVER_NAME"]
+ if (environ["wsgi.url_scheme"], environ["SERVER_PORT"]) not in (
+ ("https", "443"),
+ ("http", "80"),
+ ):
+ rv += ":" + environ["SERVER_PORT"]
+ if trusted_hosts is not None:
+ if not host_is_trusted(rv, trusted_hosts):
+ from .exceptions import SecurityError
+
+ raise SecurityError('Host "%s" is not trusted' % rv)
+ return rv
+
+
+def get_content_length(environ):
+ """Returns the content length from the WSGI environment as
+ integer. If it's not available or chunked transfer encoding is used,
+ ``None`` is returned.
+
+ .. versionadded:: 0.9
+
+ :param environ: the WSGI environ to fetch the content length from.
+ """
+ if environ.get("HTTP_TRANSFER_ENCODING", "") == "chunked":
+ return None
+
+ content_length = environ.get("CONTENT_LENGTH")
+ if content_length is not None:
+ try:
+ return max(0, int(content_length))
+ except (ValueError, TypeError):
+ pass
+
+
+def get_input_stream(environ, safe_fallback=True):
+ """Returns the input stream from the WSGI environment and wraps it
+ in the most sensible way possible. The stream returned is not the
+ raw WSGI stream in most cases but one that is safe to read from
+ without taking into account the content length.
+
+ If content length is not set, the stream will be empty for safety reasons.
+ If the WSGI server supports chunked or infinite streams, it should set
+ the ``wsgi.input_terminated`` value in the WSGI environ to indicate that.
+
+ .. versionadded:: 0.9
+
+ :param environ: the WSGI environ to fetch the stream from.
+ :param safe_fallback: use an empty stream as a safe fallback when the
+ content length is not set. Disabling this allows infinite streams,
+ which can be a denial-of-service risk.
+ """
+ stream = environ["wsgi.input"]
+ content_length = get_content_length(environ)
+
+ # A wsgi extension that tells us if the input is terminated. In
+ # that case we return the stream unchanged as we know we can safely
+ # read it until the end.
+ if environ.get("wsgi.input_terminated"):
+ return stream
+
+ # If the request doesn't specify a content length, returning the stream is
+ # potentially dangerous because it could be infinite, malicious or not. If
+ # safe_fallback is true, return an empty stream instead for safety.
+ if content_length is None:
+ return BytesIO() if safe_fallback else stream
+
+ # Otherwise limit the stream to the content length
+ return LimitedStream(stream, content_length)
+
+
+def get_query_string(environ):
+ """Returns the `QUERY_STRING` from the WSGI environment. This also takes
+ care about the WSGI decoding dance on Python 3 environments as a
+ native string. The string returned will be restricted to ASCII
+ characters.
+
+ .. versionadded:: 0.9
+
+ :param environ: the WSGI environment object to get the query string from.
+ """
+ qs = wsgi_get_bytes(environ.get("QUERY_STRING", ""))
+ # QUERY_STRING really should be ascii safe but some browsers
+ # will send us some unicode stuff (I am looking at you IE).
+ # In that case we want to urllib quote it badly.
+ return try_coerce_native(url_quote(qs, safe=":&%=+$!*'(),"))
+
+
+def get_path_info(environ, charset="utf-8", errors="replace"):
+ """Returns the `PATH_INFO` from the WSGI environment and properly
+ decodes it. This also takes care about the WSGI decoding dance
+ on Python 3 environments. if the `charset` is set to `None` a
+ bytestring is returned.
+
+ .. versionadded:: 0.9
+
+ :param environ: the WSGI environment object to get the path from.
+ :param charset: the charset for the path info, or `None` if no
+ decoding should be performed.
+ :param errors: the decoding error handling.
+ """
+ path = wsgi_get_bytes(environ.get("PATH_INFO", ""))
+ return to_unicode(path, charset, errors, allow_none_charset=True)
+
+
+def get_script_name(environ, charset="utf-8", errors="replace"):
+ """Returns the `SCRIPT_NAME` from the WSGI environment and properly
+ decodes it. This also takes care about the WSGI decoding dance
+ on Python 3 environments. if the `charset` is set to `None` a
+ bytestring is returned.
+
+ .. versionadded:: 0.9
+
+ :param environ: the WSGI environment object to get the path from.
+ :param charset: the charset for the path, or `None` if no
+ decoding should be performed.
+ :param errors: the decoding error handling.
+ """
+ path = wsgi_get_bytes(environ.get("SCRIPT_NAME", ""))
+ return to_unicode(path, charset, errors, allow_none_charset=True)
+
+
+def pop_path_info(environ, charset="utf-8", errors="replace"):
+ """Removes and returns the next segment of `PATH_INFO`, pushing it onto
+ `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`.
+
+ If the `charset` is set to `None` a bytestring is returned.
+
+ If there are empty segments (``'/foo//bar``) these are ignored but
+ properly pushed to the `SCRIPT_NAME`:
+
+ >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'}
+ >>> pop_path_info(env)
+ 'a'
+ >>> env['SCRIPT_NAME']
+ '/foo/a'
+ >>> pop_path_info(env)
+ 'b'
+ >>> env['SCRIPT_NAME']
+ '/foo/a/b'
+
+ .. versionadded:: 0.5
+
+ .. versionchanged:: 0.9
+ The path is now decoded and a charset and encoding
+ parameter can be provided.
+
+ :param environ: the WSGI environment that is modified.
+ """
+ path = environ.get("PATH_INFO")
+ if not path:
+ return None
+
+ script_name = environ.get("SCRIPT_NAME", "")
+
+ # shift multiple leading slashes over
+ old_path = path
+ path = path.lstrip("/")
+ if path != old_path:
+ script_name += "/" * (len(old_path) - len(path))
+
+ if "/" not in path:
+ environ["PATH_INFO"] = ""
+ environ["SCRIPT_NAME"] = script_name + path
+ rv = wsgi_get_bytes(path)
+ else:
+ segment, path = path.split("/", 1)
+ environ["PATH_INFO"] = "/" + path
+ environ["SCRIPT_NAME"] = script_name + segment
+ rv = wsgi_get_bytes(segment)
+
+ return to_unicode(rv, charset, errors, allow_none_charset=True)
+
+
+def peek_path_info(environ, charset="utf-8", errors="replace"):
+ """Returns the next segment on the `PATH_INFO` or `None` if there
+ is none. Works like :func:`pop_path_info` without modifying the
+ environment:
+
+ >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'}
+ >>> peek_path_info(env)
+ 'a'
+ >>> peek_path_info(env)
+ 'a'
+
+ If the `charset` is set to `None` a bytestring is returned.
+
+ .. versionadded:: 0.5
+
+ .. versionchanged:: 0.9
+ The path is now decoded and a charset and encoding
+ parameter can be provided.
+
+ :param environ: the WSGI environment that is checked.
+ """
+ segments = environ.get("PATH_INFO", "").lstrip("/").split("/", 1)
+ if segments:
+ return to_unicode(
+ wsgi_get_bytes(segments[0]), charset, errors, allow_none_charset=True
+ )
+
+
+def extract_path_info(
+ environ_or_baseurl,
+ path_or_url,
+ charset="utf-8",
+ errors="werkzeug.url_quote",
+ collapse_http_schemes=True,
+):
+ """Extracts the path info from the given URL (or WSGI environment) and
+ path. The path info returned is a unicode string, not a bytestring
+ suitable for a WSGI environment. The URLs might also be IRIs.
+
+ If the path info could not be determined, `None` is returned.
+
+ Some examples:
+
+ >>> extract_path_info('http://example.com/app', '/app/hello')
+ u'/hello'
+ >>> extract_path_info('http://example.com/app',
+ ... 'https://example.com/app/hello')
+ u'/hello'
+ >>> extract_path_info('http://example.com/app',
+ ... 'https://example.com/app/hello',
+ ... collapse_http_schemes=False) is None
+ True
+
+ Instead of providing a base URL you can also pass a WSGI environment.
+
+ :param environ_or_baseurl: a WSGI environment dict, a base URL or
+ base IRI. This is the root of the
+ application.
+ :param path_or_url: an absolute path from the server root, a
+ relative path (in which case it's the path info)
+ or a full URL. Also accepts IRIs and unicode
+ parameters.
+ :param charset: the charset for byte data in URLs
+ :param errors: the error handling on decode
+ :param collapse_http_schemes: if set to `False` the algorithm does
+ not assume that http and https on the
+ same server point to the same
+ resource.
+
+ .. versionchanged:: 0.15
+ The ``errors`` parameter defaults to leaving invalid bytes
+ quoted instead of replacing them.
+
+ .. versionadded:: 0.6
+ """
+
+ def _normalize_netloc(scheme, netloc):
+ parts = netloc.split(u"@", 1)[-1].split(u":", 1)
+ if len(parts) == 2:
+ netloc, port = parts
+ if (scheme == u"http" and port == u"80") or (
+ scheme == u"https" and port == u"443"
+ ):
+ port = None
+ else:
+ netloc = parts[0]
+ port = None
+ if port is not None:
+ netloc += u":" + port
+ return netloc
+
+ # make sure whatever we are working on is a IRI and parse it
+ path = uri_to_iri(path_or_url, charset, errors)
+ if isinstance(environ_or_baseurl, dict):
+ environ_or_baseurl = get_current_url(environ_or_baseurl, root_only=True)
+ base_iri = uri_to_iri(environ_or_baseurl, charset, errors)
+ base_scheme, base_netloc, base_path = url_parse(base_iri)[:3]
+ cur_scheme, cur_netloc, cur_path, = url_parse(url_join(base_iri, path))[:3]
+
+ # normalize the network location
+ base_netloc = _normalize_netloc(base_scheme, base_netloc)
+ cur_netloc = _normalize_netloc(cur_scheme, cur_netloc)
+
+ # is that IRI even on a known HTTP scheme?
+ if collapse_http_schemes:
+ for scheme in base_scheme, cur_scheme:
+ if scheme not in (u"http", u"https"):
+ return None
+ else:
+ if not (base_scheme in (u"http", u"https") and base_scheme == cur_scheme):
+ return None
+
+ # are the netlocs compatible?
+ if base_netloc != cur_netloc:
+ return None
+
+ # are we below the application path?
+ base_path = base_path.rstrip(u"/")
+ if not cur_path.startswith(base_path):
+ return None
+
+ return u"/" + cur_path[len(base_path) :].lstrip(u"/")
+
+
+@implements_iterator
+class ClosingIterator(object):
+ """The WSGI specification requires that all middlewares and gateways
+ respect the `close` callback of the iterable returned by the application.
+ Because it is useful to add another close action to a returned iterable
+ and adding a custom iterable is a boring task this class can be used for
+ that::
+
+ return ClosingIterator(app(environ, start_response), [cleanup_session,
+ cleanup_locals])
+
+ If there is just one close function it can be passed instead of the list.
+
+ A closing iterator is not needed if the application uses response objects
+ and finishes the processing if the response is started::
+
+ try:
+ return response(environ, start_response)
+ finally:
+ cleanup_session()
+ cleanup_locals()
+ """
+
+ def __init__(self, iterable, callbacks=None):
+ iterator = iter(iterable)
+ self._next = partial(next, iterator)
+ if callbacks is None:
+ callbacks = []
+ elif callable(callbacks):
+ callbacks = [callbacks]
+ else:
+ callbacks = list(callbacks)
+ iterable_close = getattr(iterable, "close", None)
+ if iterable_close:
+ callbacks.insert(0, iterable_close)
+ self._callbacks = callbacks
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ return self._next()
+
+ def close(self):
+ for callback in self._callbacks:
+ callback()
+
+
+def wrap_file(environ, file, buffer_size=8192):
+ """Wraps a file. This uses the WSGI server's file wrapper if available
+ or otherwise the generic :class:`FileWrapper`.
+
+ .. versionadded:: 0.5
+
+ If the file wrapper from the WSGI server is used it's important to not
+ iterate over it from inside the application but to pass it through
+ unchanged. If you want to pass out a file wrapper inside a response
+ object you have to set :attr:`~BaseResponse.direct_passthrough` to `True`.
+
+ More information about file wrappers are available in :pep:`333`.
+
+ :param file: a :class:`file`-like object with a :meth:`~file.read` method.
+ :param buffer_size: number of bytes for one iteration.
+ """
+ return environ.get("wsgi.file_wrapper", FileWrapper)(file, buffer_size)
+
+
+@implements_iterator
+class FileWrapper(object):
+ """This class can be used to convert a :class:`file`-like object into
+ an iterable. It yields `buffer_size` blocks until the file is fully
+ read.
+
+ You should not use this class directly but rather use the
+ :func:`wrap_file` function that uses the WSGI server's file wrapper
+ support if it's available.
+
+ .. versionadded:: 0.5
+
+ If you're using this object together with a :class:`BaseResponse` you have
+ to use the `direct_passthrough` mode.
+
+ :param file: a :class:`file`-like object with a :meth:`~file.read` method.
+ :param buffer_size: number of bytes for one iteration.
+ """
+
+ def __init__(self, file, buffer_size=8192):
+ self.file = file
+ self.buffer_size = buffer_size
+
+ def close(self):
+ if hasattr(self.file, "close"):
+ self.file.close()
+
+ def seekable(self):
+ if hasattr(self.file, "seekable"):
+ return self.file.seekable()
+ if hasattr(self.file, "seek"):
+ return True
+ return False
+
+ def seek(self, *args):
+ if hasattr(self.file, "seek"):
+ self.file.seek(*args)
+
+ def tell(self):
+ if hasattr(self.file, "tell"):
+ return self.file.tell()
+ return None
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ data = self.file.read(self.buffer_size)
+ if data:
+ return data
+ raise StopIteration()
+
+
+@implements_iterator
+class _RangeWrapper(object):
+ # private for now, but should we make it public in the future ?
+
+ """This class can be used to convert an iterable object into
+ an iterable that will only yield a piece of the underlying content.
+ It yields blocks until the underlying stream range is fully read.
+ The yielded blocks will have a size that can't exceed the original
+ iterator defined block size, but that can be smaller.
+
+ If you're using this object together with a :class:`BaseResponse` you have
+ to use the `direct_passthrough` mode.
+
+ :param iterable: an iterable object with a :meth:`__next__` method.
+ :param start_byte: byte from which read will start.
+ :param byte_range: how many bytes to read.
+ """
+
+ def __init__(self, iterable, start_byte=0, byte_range=None):
+ self.iterable = iter(iterable)
+ self.byte_range = byte_range
+ self.start_byte = start_byte
+ self.end_byte = None
+ if byte_range is not None:
+ self.end_byte = self.start_byte + self.byte_range
+ self.read_length = 0
+ self.seekable = hasattr(iterable, "seekable") and iterable.seekable()
+ self.end_reached = False
+
+ def __iter__(self):
+ return self
+
+ def _next_chunk(self):
+ try:
+ chunk = next(self.iterable)
+ self.read_length += len(chunk)
+ return chunk
+ except StopIteration:
+ self.end_reached = True
+ raise
+
+ def _first_iteration(self):
+ chunk = None
+ if self.seekable:
+ self.iterable.seek(self.start_byte)
+ self.read_length = self.iterable.tell()
+ contextual_read_length = self.read_length
+ else:
+ while self.read_length <= self.start_byte:
+ chunk = self._next_chunk()
+ if chunk is not None:
+ chunk = chunk[self.start_byte - self.read_length :]
+ contextual_read_length = self.start_byte
+ return chunk, contextual_read_length
+
+ def _next(self):
+ if self.end_reached:
+ raise StopIteration()
+ chunk = None
+ contextual_read_length = self.read_length
+ if self.read_length == 0:
+ chunk, contextual_read_length = self._first_iteration()
+ if chunk is None:
+ chunk = self._next_chunk()
+ if self.end_byte is not None and self.read_length >= self.end_byte:
+ self.end_reached = True
+ return chunk[: self.end_byte - contextual_read_length]
+ return chunk
+
+ def __next__(self):
+ chunk = self._next()
+ if chunk:
+ return chunk
+ self.end_reached = True
+ raise StopIteration()
+
+ def close(self):
+ if hasattr(self.iterable, "close"):
+ self.iterable.close()
+
+
+def _make_chunk_iter(stream, limit, buffer_size):
+ """Helper for the line and chunk iter functions."""
+ if isinstance(stream, (bytes, bytearray, text_type)):
+ raise TypeError(
+ "Passed a string or byte object instead of true iterator or stream."
+ )
+ if not hasattr(stream, "read"):
+ for item in stream:
+ if item:
+ yield item
+ return
+ if not isinstance(stream, LimitedStream) and limit is not None:
+ stream = LimitedStream(stream, limit)
+ _read = stream.read
+ while 1:
+ item = _read(buffer_size)
+ if not item:
+ break
+ yield item
+
+
+def make_line_iter(stream, limit=None, buffer_size=10 * 1024, cap_at_buffer=False):
+ """Safely iterates line-based over an input stream. If the input stream
+ is not a :class:`LimitedStream` the `limit` parameter is mandatory.
+
+ This uses the stream's :meth:`~file.read` method internally as opposite
+ to the :meth:`~file.readline` method that is unsafe and can only be used
+ in violation of the WSGI specification. The same problem applies to the
+ `__iter__` function of the input stream which calls :meth:`~file.readline`
+ without arguments.
+
+ If you need line-by-line processing it's strongly recommended to iterate
+ over the input stream using this helper function.
+
+ .. versionchanged:: 0.8
+ This function now ensures that the limit was reached.
+
+ .. versionadded:: 0.9
+ added support for iterators as input stream.
+
+ .. versionadded:: 0.11.10
+ added support for the `cap_at_buffer` parameter.
+
+ :param stream: the stream or iterate to iterate over.
+ :param limit: the limit in bytes for the stream. (Usually
+ content length. Not necessary if the `stream`
+ is a :class:`LimitedStream`.
+ :param buffer_size: The optional buffer size.
+ :param cap_at_buffer: if this is set chunks are split if they are longer
+ than the buffer size. Internally this is implemented
+ that the buffer size might be exhausted by a factor
+ of two however.
+ """
+ _iter = _make_chunk_iter(stream, limit, buffer_size)
+
+ first_item = next(_iter, "")
+ if not first_item:
+ return
+
+ s = make_literal_wrapper(first_item)
+ empty = s("")
+ cr = s("\r")
+ lf = s("\n")
+ crlf = s("\r\n")
+
+ _iter = chain((first_item,), _iter)
+
+ def _iter_basic_lines():
+ _join = empty.join
+ buffer = []
+ while 1:
+ new_data = next(_iter, "")
+ if not new_data:
+ break
+ new_buf = []
+ buf_size = 0
+ for item in chain(buffer, new_data.splitlines(True)):
+ new_buf.append(item)
+ buf_size += len(item)
+ if item and item[-1:] in crlf:
+ yield _join(new_buf)
+ new_buf = []
+ elif cap_at_buffer and buf_size >= buffer_size:
+ rv = _join(new_buf)
+ while len(rv) >= buffer_size:
+ yield rv[:buffer_size]
+ rv = rv[buffer_size:]
+ new_buf = [rv]
+ buffer = new_buf
+ if buffer:
+ yield _join(buffer)
+
+ # This hackery is necessary to merge 'foo\r' and '\n' into one item
+ # of 'foo\r\n' if we were unlucky and we hit a chunk boundary.
+ previous = empty
+ for item in _iter_basic_lines():
+ if item == lf and previous[-1:] == cr:
+ previous += item
+ item = empty
+ if previous:
+ yield previous
+ previous = item
+ if previous:
+ yield previous
+
+
+def make_chunk_iter(
+ stream, separator, limit=None, buffer_size=10 * 1024, cap_at_buffer=False
+):
+ """Works like :func:`make_line_iter` but accepts a separator
+ which divides chunks. If you want newline based processing
+ you should use :func:`make_line_iter` instead as it
+ supports arbitrary newline markers.
+
+ .. versionadded:: 0.8
+
+ .. versionadded:: 0.9
+ added support for iterators as input stream.
+
+ .. versionadded:: 0.11.10
+ added support for the `cap_at_buffer` parameter.
+
+ :param stream: the stream or iterate to iterate over.
+ :param separator: the separator that divides chunks.
+ :param limit: the limit in bytes for the stream. (Usually
+ content length. Not necessary if the `stream`
+ is otherwise already limited).
+ :param buffer_size: The optional buffer size.
+ :param cap_at_buffer: if this is set chunks are split if they are longer
+ than the buffer size. Internally this is implemented
+ that the buffer size might be exhausted by a factor
+ of two however.
+ """
+ _iter = _make_chunk_iter(stream, limit, buffer_size)
+
+ first_item = next(_iter, "")
+ if not first_item:
+ return
+
+ _iter = chain((first_item,), _iter)
+ if isinstance(first_item, text_type):
+ separator = to_unicode(separator)
+ _split = re.compile(r"(%s)" % re.escape(separator)).split
+ _join = u"".join
+ else:
+ separator = to_bytes(separator)
+ _split = re.compile(b"(" + re.escape(separator) + b")").split
+ _join = b"".join
+
+ buffer = []
+ while 1:
+ new_data = next(_iter, "")
+ if not new_data:
+ break
+ chunks = _split(new_data)
+ new_buf = []
+ buf_size = 0
+ for item in chain(buffer, chunks):
+ if item == separator:
+ yield _join(new_buf)
+ new_buf = []
+ buf_size = 0
+ else:
+ buf_size += len(item)
+ new_buf.append(item)
+
+ if cap_at_buffer and buf_size >= buffer_size:
+ rv = _join(new_buf)
+ while len(rv) >= buffer_size:
+ yield rv[:buffer_size]
+ rv = rv[buffer_size:]
+ new_buf = [rv]
+ buf_size = len(rv)
+
+ buffer = new_buf
+ if buffer:
+ yield _join(buffer)
+
+
+@implements_iterator
+class LimitedStream(io.IOBase):
+ """Wraps a stream so that it doesn't read more than n bytes. If the
+ stream is exhausted and the caller tries to get more bytes from it
+ :func:`on_exhausted` is called which by default returns an empty
+ string. The return value of that function is forwarded
+ to the reader function. So if it returns an empty string
+ :meth:`read` will return an empty string as well.
+
+ The limit however must never be higher than what the stream can
+ output. Otherwise :meth:`readlines` will try to read past the
+ limit.
+
+ .. admonition:: Note on WSGI compliance
+
+ calls to :meth:`readline` and :meth:`readlines` are not
+ WSGI compliant because it passes a size argument to the
+ readline methods. Unfortunately the WSGI PEP is not safely
+ implementable without a size argument to :meth:`readline`
+ because there is no EOF marker in the stream. As a result
+ of that the use of :meth:`readline` is discouraged.
+
+ For the same reason iterating over the :class:`LimitedStream`
+ is not portable. It internally calls :meth:`readline`.
+
+ We strongly suggest using :meth:`read` only or using the
+ :func:`make_line_iter` which safely iterates line-based
+ over a WSGI input stream.
+
+ :param stream: the stream to wrap.
+ :param limit: the limit for the stream, must not be longer than
+ what the string can provide if the stream does not
+ end with `EOF` (like `wsgi.input`)
+ """
+
+ def __init__(self, stream, limit):
+ self._read = stream.read
+ self._readline = stream.readline
+ self._pos = 0
+ self.limit = limit
+
+ def __iter__(self):
+ return self
+
+ @property
+ def is_exhausted(self):
+ """If the stream is exhausted this attribute is `True`."""
+ return self._pos >= self.limit
+
+ def on_exhausted(self):
+ """This is called when the stream tries to read past the limit.
+ The return value of this function is returned from the reading
+ function.
+ """
+ # Read null bytes from the stream so that we get the
+ # correct end of stream marker.
+ return self._read(0)
+
+ def on_disconnect(self):
+ """What should happen if a disconnect is detected? The return
+ value of this function is returned from read functions in case
+ the client went away. By default a
+ :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised.
+ """
+ from .exceptions import ClientDisconnected
+
+ raise ClientDisconnected()
+
+ def exhaust(self, chunk_size=1024 * 64):
+ """Exhaust the stream. This consumes all the data left until the
+ limit is reached.
+
+ :param chunk_size: the size for a chunk. It will read the chunk
+ until the stream is exhausted and throw away
+ the results.
+ """
+ to_read = self.limit - self._pos
+ chunk = chunk_size
+ while to_read > 0:
+ chunk = min(to_read, chunk)
+ self.read(chunk)
+ to_read -= chunk
+
+ def read(self, size=None):
+ """Read `size` bytes or if size is not provided everything is read.
+
+ :param size: the number of bytes read.
+ """
+ if self._pos >= self.limit:
+ return self.on_exhausted()
+ if size is None or size == -1: # -1 is for consistence with file
+ size = self.limit
+ to_read = min(self.limit - self._pos, size)
+ try:
+ read = self._read(to_read)
+ except (IOError, ValueError):
+ return self.on_disconnect()
+ if to_read and len(read) != to_read:
+ return self.on_disconnect()
+ self._pos += len(read)
+ return read
+
+ def readline(self, size=None):
+ """Reads one line from the stream."""
+ if self._pos >= self.limit:
+ return self.on_exhausted()
+ if size is None:
+ size = self.limit - self._pos
+ else:
+ size = min(size, self.limit - self._pos)
+ try:
+ line = self._readline(size)
+ except (ValueError, IOError):
+ return self.on_disconnect()
+ if size and not line:
+ return self.on_disconnect()
+ self._pos += len(line)
+ return line
+
+ def readlines(self, size=None):
+ """Reads a file into a list of strings. It calls :meth:`readline`
+ until the file is read to the end. It does support the optional
+ `size` argument if the underlaying stream supports it for
+ `readline`.
+ """
+ last_pos = self._pos
+ result = []
+ if size is not None:
+ end = min(self.limit, last_pos + size)
+ else:
+ end = self.limit
+ while 1:
+ if size is not None:
+ size -= last_pos - self._pos
+ if self._pos >= end:
+ break
+ result.append(self.readline(size))
+ if size is not None:
+ last_pos = self._pos
+ return result
+
+ def tell(self):
+ """Returns the position of the stream.
+
+ .. versionadded:: 0.9
+ """
+ return self._pos
+
+ def __next__(self):
+ line = self.readline()
+ if not line:
+ raise StopIteration()
+ return line
+
+ def readable(self):
+ return True
+
+
+# DEPRECATED
+from .middleware.dispatcher import DispatcherMiddleware as _DispatcherMiddleware
+from .middleware.http_proxy import ProxyMiddleware as _ProxyMiddleware
+from .middleware.shared_data import SharedDataMiddleware as _SharedDataMiddleware
+
+
+class ProxyMiddleware(_ProxyMiddleware):
+ """
+ .. deprecated:: 0.15
+ ``werkzeug.wsgi.ProxyMiddleware`` has moved to
+ :mod:`werkzeug.middleware.http_proxy`. This import will be
+ removed in 1.0.
+ """
+
+ def __init__(self, *args, **kwargs):
+ warnings.warn(
+ "'werkzeug.wsgi.ProxyMiddleware' has moved to 'werkzeug"
+ ".middleware.http_proxy.ProxyMiddleware'. This import is"
+ " deprecated as of version 0.15 and will be removed in"
+ " version 1.0.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ super(ProxyMiddleware, self).__init__(*args, **kwargs)
+
+
+class SharedDataMiddleware(_SharedDataMiddleware):
+ """
+ .. deprecated:: 0.15
+ ``werkzeug.wsgi.SharedDataMiddleware`` has moved to
+ :mod:`werkzeug.middleware.shared_data`. This import will be
+ removed in 1.0.
+ """
+
+ def __init__(self, *args, **kwargs):
+ warnings.warn(
+ "'werkzeug.wsgi.SharedDataMiddleware' has moved to"
+ " 'werkzeug.middleware.shared_data.SharedDataMiddleware'."
+ " This import is deprecated as of version 0.15 and will be"
+ " removed in version 1.0.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ super(SharedDataMiddleware, self).__init__(*args, **kwargs)
+
+
+class DispatcherMiddleware(_DispatcherMiddleware):
+ """
+ .. deprecated:: 0.15
+ ``werkzeug.wsgi.DispatcherMiddleware`` has moved to
+ :mod:`werkzeug.middleware.dispatcher`. This import will be
+ removed in 1.0.
+ """
+
+ def __init__(self, *args, **kwargs):
+ warnings.warn(
+ "'werkzeug.wsgi.DispatcherMiddleware' has moved to"
+ " 'werkzeug.middleware.dispatcher.DispatcherMiddleware'."
+ " This import is deprecated as of version 0.15 and will be"
+ " removed in version 1.0.",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ super(DispatcherMiddleware, self).__init__(*args, **kwargs)