diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-08-09 22:01:04 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-08-09 22:01:04 -0700 |
commit | 2e75c6d9603f8a5edf6495f8d4fb3115a67d823c (patch) | |
tree | 8fb2d1bec2cf0e50c5fce6bc718f755485419db0 /python/werkzeug/middleware | |
parent | cc9283ad5332f59a69a91d9d0fab299779de513c (diff) | |
parent | adc40bc760345a23678a01f27d7697dfd3811914 (diff) | |
download | yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.lz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.xz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.zip |
Merge flask framework and other stuff from master
Diffstat (limited to 'python/werkzeug/middleware')
-rw-r--r-- | python/werkzeug/middleware/__init__.py | 25 | ||||
-rw-r--r-- | python/werkzeug/middleware/dispatcher.py | 66 | ||||
-rw-r--r-- | python/werkzeug/middleware/http_proxy.py | 219 | ||||
-rw-r--r-- | python/werkzeug/middleware/lint.py | 408 | ||||
-rw-r--r-- | python/werkzeug/middleware/profiler.py | 132 | ||||
-rw-r--r-- | python/werkzeug/middleware/proxy_fix.py | 228 | ||||
-rw-r--r-- | python/werkzeug/middleware/shared_data.py | 260 |
7 files changed, 1338 insertions, 0 deletions
diff --git a/python/werkzeug/middleware/__init__.py b/python/werkzeug/middleware/__init__.py new file mode 100644 index 0000000..5e049f5 --- /dev/null +++ b/python/werkzeug/middleware/__init__.py @@ -0,0 +1,25 @@ +""" +Middleware +========== + +A WSGI middleware is a WSGI application that wraps another application +in order to observe or change its behavior. Werkzeug provides some +middleware for common use cases. + +.. toctree:: + :maxdepth: 1 + + proxy_fix + shared_data + dispatcher + http_proxy + lint + profiler + +The :doc:`interactive debugger </debug>` is also a middleware that can +be applied manually, although it is typically used automatically with +the :doc:`development server </serving>`. + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" diff --git a/python/werkzeug/middleware/dispatcher.py b/python/werkzeug/middleware/dispatcher.py new file mode 100644 index 0000000..2eb173e --- /dev/null +++ b/python/werkzeug/middleware/dispatcher.py @@ -0,0 +1,66 @@ +""" +Application Dispatcher +====================== + +This middleware creates a single WSGI application that dispatches to +multiple other WSGI applications mounted at different URL paths. + +A common example is writing a Single Page Application, where you have a +backend API and a frontend written in JavaScript that does the routing +in the browser rather than requesting different pages from the server. +The frontend is a single HTML and JS file that should be served for any +path besides "/api". + +This example dispatches to an API app under "/api", an admin app +under "/admin", and an app that serves frontend files for all other +requests:: + + app = DispatcherMiddleware(serve_frontend, { + '/api': api_app, + '/admin': admin_app, + }) + +In production, you might instead handle this at the HTTP server level, +serving files or proxying to application servers based on location. The +API and admin apps would each be deployed with a separate WSGI server, +and the static files would be served directly by the HTTP server. + +.. autoclass:: DispatcherMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" + + +class DispatcherMiddleware(object): + """Combine multiple applications as a single WSGI application. + Requests are dispatched to an application based on the path it is + mounted under. + + :param app: The WSGI application to dispatch to if the request + doesn't match a mounted path. + :param mounts: Maps path prefixes to applications for dispatching. + """ + + def __init__(self, app, mounts=None): + self.app = app + self.mounts = mounts or {} + + def __call__(self, environ, start_response): + script = environ.get("PATH_INFO", "") + path_info = "" + + while "/" in script: + if script in self.mounts: + app = self.mounts[script] + break + + script, last_item = script.rsplit("/", 1) + path_info = "/%s%s" % (last_item, path_info) + else: + app = self.mounts.get(script, self.app) + + original_script_name = environ.get("SCRIPT_NAME", "") + environ["SCRIPT_NAME"] = original_script_name + script + environ["PATH_INFO"] = path_info + return app(environ, start_response) diff --git a/python/werkzeug/middleware/http_proxy.py b/python/werkzeug/middleware/http_proxy.py new file mode 100644 index 0000000..bfdc071 --- /dev/null +++ b/python/werkzeug/middleware/http_proxy.py @@ -0,0 +1,219 @@ +""" +Basic HTTP Proxy +================ + +.. autoclass:: ProxyMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import socket + +from ..datastructures import EnvironHeaders +from ..http import is_hop_by_hop_header +from ..urls import url_parse +from ..urls import url_quote +from ..wsgi import get_input_stream + +try: + from http import client +except ImportError: + import httplib as client + + +class ProxyMiddleware(object): + """Proxy requests under a path to an external server, routing other + requests to the app. + + This middleware can only proxy HTTP requests, as that is the only + protocol handled by the WSGI server. Other protocols, such as + websocket requests, cannot be proxied at this layer. This should + only be used for development, in production a real proxying server + should be used. + + The middleware takes a dict that maps a path prefix to a dict + describing the host to be proxied to:: + + app = ProxyMiddleware(app, { + "/static/": { + "target": "http://127.0.0.1:5001/", + } + }) + + Each host has the following options: + + ``target``: + The target URL to dispatch to. This is required. + ``remove_prefix``: + Whether to remove the prefix from the URL before dispatching it + to the target. The default is ``False``. + ``host``: + ``"<auto>"`` (default): + The host header is automatically rewritten to the URL of the + target. + ``None``: + The host header is unmodified from the client request. + Any other value: + The host header is overwritten with the value. + ``headers``: + A dictionary of headers to be sent with the request to the + target. The default is ``{}``. + ``ssl_context``: + A :class:`ssl.SSLContext` defining how to verify requests if the + target is HTTPS. The default is ``None``. + + In the example above, everything under ``"/static/"`` is proxied to + the server on port 5001. The host header is rewritten to the target, + and the ``"/static/"`` prefix is removed from the URLs. + + :param app: The WSGI application to wrap. + :param targets: Proxy target configurations. See description above. + :param chunk_size: Size of chunks to read from input stream and + write to target. + :param timeout: Seconds before an operation to a target fails. + + .. versionadded:: 0.14 + """ + + def __init__(self, app, targets, chunk_size=2 << 13, timeout=10): + def _set_defaults(opts): + opts.setdefault("remove_prefix", False) + opts.setdefault("host", "<auto>") + opts.setdefault("headers", {}) + opts.setdefault("ssl_context", None) + return opts + + self.app = app + self.targets = dict( + ("/%s/" % k.strip("/"), _set_defaults(v)) for k, v in targets.items() + ) + self.chunk_size = chunk_size + self.timeout = timeout + + def proxy_to(self, opts, path, prefix): + target = url_parse(opts["target"]) + + def application(environ, start_response): + headers = list(EnvironHeaders(environ).items()) + headers[:] = [ + (k, v) + for k, v in headers + if not is_hop_by_hop_header(k) + and k.lower() not in ("content-length", "host") + ] + headers.append(("Connection", "close")) + + if opts["host"] == "<auto>": + headers.append(("Host", target.ascii_host)) + elif opts["host"] is None: + headers.append(("Host", environ["HTTP_HOST"])) + else: + headers.append(("Host", opts["host"])) + + headers.extend(opts["headers"].items()) + remote_path = path + + if opts["remove_prefix"]: + remote_path = "%s/%s" % ( + target.path.rstrip("/"), + remote_path[len(prefix) :].lstrip("/"), + ) + + content_length = environ.get("CONTENT_LENGTH") + chunked = False + + if content_length not in ("", None): + headers.append(("Content-Length", content_length)) + elif content_length is not None: + headers.append(("Transfer-Encoding", "chunked")) + chunked = True + + try: + if target.scheme == "http": + con = client.HTTPConnection( + target.ascii_host, target.port or 80, timeout=self.timeout + ) + elif target.scheme == "https": + con = client.HTTPSConnection( + target.ascii_host, + target.port or 443, + timeout=self.timeout, + context=opts["ssl_context"], + ) + else: + raise RuntimeError( + "Target scheme must be 'http' or 'https', got '{}'.".format( + target.scheme + ) + ) + + con.connect() + remote_url = url_quote(remote_path) + querystring = environ["QUERY_STRING"] + + if querystring: + remote_url = remote_url + "?" + querystring + + con.putrequest(environ["REQUEST_METHOD"], remote_url, skip_host=True) + + for k, v in headers: + if k.lower() == "connection": + v = "close" + + con.putheader(k, v) + + con.endheaders() + stream = get_input_stream(environ) + + while 1: + data = stream.read(self.chunk_size) + + if not data: + break + + if chunked: + con.send(b"%x\r\n%s\r\n" % (len(data), data)) + else: + con.send(data) + + resp = con.getresponse() + except socket.error: + from ..exceptions import BadGateway + + return BadGateway()(environ, start_response) + + start_response( + "%d %s" % (resp.status, resp.reason), + [ + (k.title(), v) + for k, v in resp.getheaders() + if not is_hop_by_hop_header(k) + ], + ) + + def read(): + while 1: + try: + data = resp.read(self.chunk_size) + except socket.error: + break + + if not data: + break + + yield data + + return read() + + return application + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] + app = self.app + + for prefix, opts in self.targets.items(): + if path.startswith(prefix): + app = self.proxy_to(opts, path, prefix) + break + + return app(environ, start_response) diff --git a/python/werkzeug/middleware/lint.py b/python/werkzeug/middleware/lint.py new file mode 100644 index 0000000..98f9581 --- /dev/null +++ b/python/werkzeug/middleware/lint.py @@ -0,0 +1,408 @@ +""" +WSGI Protocol Linter +==================== + +This module provides a middleware that performs sanity checks on the +behavior of the WSGI server and application. It checks that the +:pep:`3333` WSGI spec is properly implemented. It also warns on some +common HTTP errors such as non-empty responses for 304 status codes. + +.. autoclass:: LintMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from warnings import warn + +from .._compat import implements_iterator +from .._compat import PY2 +from .._compat import string_types +from ..datastructures import Headers +from ..http import is_entity_header +from ..wsgi import FileWrapper + +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse + + +class WSGIWarning(Warning): + """Warning class for WSGI warnings.""" + + +class HTTPWarning(Warning): + """Warning class for HTTP warnings.""" + + +def check_string(context, obj, stacklevel=3): + if type(obj) is not str: + warn( + "'%s' requires strings, got '%s'" % (context, type(obj).__name__), + WSGIWarning, + ) + + +class InputStream(object): + def __init__(self, stream): + self._stream = stream + + def read(self, *args): + if len(args) == 0: + warn( + "WSGI does not guarantee an EOF marker on the input stream, thus making" + " calls to 'wsgi.input.read()' unsafe. Conforming servers may never" + " return from this call.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) != 1: + warn( + "Too many parameters passed to 'wsgi.input.read()'.", + WSGIWarning, + stacklevel=2, + ) + return self._stream.read(*args) + + def readline(self, *args): + if len(args) == 0: + warn( + "Calls to 'wsgi.input.readline()' without arguments are unsafe. Use" + " 'wsgi.input.read()' instead.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) == 1: + warn( + "'wsgi.input.readline()' was called with a size hint. WSGI does not" + " support this, although it's available on all major servers.", + WSGIWarning, + stacklevel=2, + ) + else: + raise TypeError("Too many arguments passed to 'wsgi.input.readline()'.") + return self._stream.readline(*args) + + def __iter__(self): + try: + return iter(self._stream) + except TypeError: + warn("'wsgi.input' is not iterable.", WSGIWarning, stacklevel=2) + return iter(()) + + def close(self): + warn("The application closed the input stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class ErrorStream(object): + def __init__(self, stream): + self._stream = stream + + def write(self, s): + check_string("wsgi.error.write()", s) + self._stream.write(s) + + def flush(self): + self._stream.flush() + + def writelines(self, seq): + for line in seq: + self.write(line) + + def close(self): + warn("The application closed the error stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class GuardedWrite(object): + def __init__(self, write, chunks): + self._write = write + self._chunks = chunks + + def __call__(self, s): + check_string("write()", s) + self._write.write(s) + self._chunks.append(len(s)) + + +@implements_iterator +class GuardedIterator(object): + def __init__(self, iterator, headers_set, chunks): + self._iterator = iterator + if PY2: + self._next = iter(iterator).next + else: + self._next = iter(iterator).__next__ + self.closed = False + self.headers_set = headers_set + self.chunks = chunks + + def __iter__(self): + return self + + def __next__(self): + if self.closed: + warn("Iterated over closed 'app_iter'.", WSGIWarning, stacklevel=2) + + rv = self._next() + + if not self.headers_set: + warn( + "The application returned before it started the response.", + WSGIWarning, + stacklevel=2, + ) + + check_string("application iterator items", rv) + self.chunks.append(len(rv)) + return rv + + def close(self): + self.closed = True + + if hasattr(self._iterator, "close"): + self._iterator.close() + + if self.headers_set: + status_code, headers = self.headers_set + bytes_sent = sum(self.chunks) + content_length = headers.get("content-length", type=int) + + if status_code == 304: + for key, _value in headers: + key = key.lower() + if key not in ("expires", "content-location") and is_entity_header( + key + ): + warn( + "Entity header %r found in 304 response." % key, HTTPWarning + ) + if bytes_sent: + warn("304 responses must not have a body.", HTTPWarning) + elif 100 <= status_code < 200 or status_code == 204: + if content_length != 0: + warn( + "%r responses must have an empty content length." % status_code, + HTTPWarning, + ) + if bytes_sent: + warn( + "%r responses must not have a body." % status_code, HTTPWarning + ) + elif content_length is not None and content_length != bytes_sent: + warn( + "Content-Length and the number of bytes sent to the client do not" + " match.", + WSGIWarning, + ) + + def __del__(self): + if not self.closed: + try: + warn( + "Iterator was garbage collected before it was closed.", WSGIWarning + ) + except Exception: + pass + + +class LintMiddleware(object): + """Warns about common errors in the WSGI and HTTP behavior of the + server and wrapped application. Some of the issues it check are: + + - invalid status codes + - non-bytestrings sent to the WSGI server + - strings returned from the WSGI application + - non-empty conditional responses + - unquoted etags + - relative URLs in the Location header + - unsafe calls to wsgi.input + - unclosed iterators + + Error information is emitted using the :mod:`warnings` module. + + :param app: The WSGI application to wrap. + + .. code-block:: python + + from werkzeug.middleware.lint import LintMiddleware + app = LintMiddleware(app) + """ + + def __init__(self, app): + self.app = app + + def check_environ(self, environ): + if type(environ) is not dict: + warn( + "WSGI environment is not a standard Python dict.", + WSGIWarning, + stacklevel=4, + ) + for key in ( + "REQUEST_METHOD", + "SERVER_NAME", + "SERVER_PORT", + "wsgi.version", + "wsgi.input", + "wsgi.errors", + "wsgi.multithread", + "wsgi.multiprocess", + "wsgi.run_once", + ): + if key not in environ: + warn( + "Required environment key %r not found" % key, + WSGIWarning, + stacklevel=3, + ) + if environ["wsgi.version"] != (1, 0): + warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3) + + script_name = environ.get("SCRIPT_NAME", "") + path_info = environ.get("PATH_INFO", "") + + if script_name and script_name[0] != "/": + warn( + "'SCRIPT_NAME' does not start with a slash: %r" % script_name, + WSGIWarning, + stacklevel=3, + ) + + if path_info and path_info[0] != "/": + warn( + "'PATH_INFO' does not start with a slash: %r" % path_info, + WSGIWarning, + stacklevel=3, + ) + + def check_start_response(self, status, headers, exc_info): + check_string("status", status) + status_code = status.split(None, 1)[0] + + if len(status_code) != 3 or not status_code.isdigit(): + warn(WSGIWarning("Status code must be three digits"), stacklevel=3) + + if len(status) < 4 or status[3] != " ": + warn( + WSGIWarning( + "Invalid value for status %r. Valid " + "status strings are three digits, a space " + "and a status explanation" + ), + stacklevel=3, + ) + + status_code = int(status_code) + + if status_code < 100: + warn(WSGIWarning("status code < 100 detected"), stacklevel=3) + + if type(headers) is not list: + warn(WSGIWarning("header list is not a list"), stacklevel=3) + + for item in headers: + if type(item) is not tuple or len(item) != 2: + warn(WSGIWarning("Headers must tuple 2-item tuples"), stacklevel=3) + name, value = item + if type(name) is not str or type(value) is not str: + warn(WSGIWarning("header items must be strings"), stacklevel=3) + if name.lower() == "status": + warn( + WSGIWarning( + "The status header is not supported due to " + "conflicts with the CGI spec." + ), + stacklevel=3, + ) + + if exc_info is not None and not isinstance(exc_info, tuple): + warn(WSGIWarning("invalid value for exc_info"), stacklevel=3) + + headers = Headers(headers) + self.check_headers(headers) + + return status_code, headers + + def check_headers(self, headers): + etag = headers.get("etag") + + if etag is not None: + if etag.startswith(("W/", "w/")): + if etag.startswith("w/"): + warn( + HTTPWarning("weak etag indicator should be upcase."), + stacklevel=4, + ) + + etag = etag[2:] + + if not (etag[:1] == etag[-1:] == '"'): + warn(HTTPWarning("unquoted etag emitted."), stacklevel=4) + + location = headers.get("location") + + if location is not None: + if not urlparse(location).netloc: + warn( + HTTPWarning("absolute URLs required for location header"), + stacklevel=4, + ) + + def check_iterator(self, app_iter): + if isinstance(app_iter, string_types): + warn( + "The application returned astring. The response will send one character" + " at a time to the client, which will kill performance. Return a list" + " or iterable instead.", + WSGIWarning, + stacklevel=3, + ) + + def __call__(self, *args, **kwargs): + if len(args) != 2: + warn("A WSGI app takes two arguments.", WSGIWarning, stacklevel=2) + + if kwargs: + warn( + "A WSGI app does not take keyword arguments.", WSGIWarning, stacklevel=2 + ) + + environ, start_response = args + + self.check_environ(environ) + environ["wsgi.input"] = InputStream(environ["wsgi.input"]) + environ["wsgi.errors"] = ErrorStream(environ["wsgi.errors"]) + + # Hook our own file wrapper in so that applications will always + # iterate to the end and we can check the content length. + environ["wsgi.file_wrapper"] = FileWrapper + + headers_set = [] + chunks = [] + + def checking_start_response(*args, **kwargs): + if len(args) not in (2, 3): + warn( + "Invalid number of arguments: %s, expected 2 or 3." % len(args), + WSGIWarning, + stacklevel=2, + ) + + if kwargs: + warn("'start_response' does not take keyword arguments.", WSGIWarning) + + status, headers = args[:2] + + if len(args) == 3: + exc_info = args[2] + else: + exc_info = None + + headers_set[:] = self.check_start_response(status, headers, exc_info) + return GuardedWrite(start_response(status, headers, exc_info), chunks) + + app_iter = self.app(environ, checking_start_response) + self.check_iterator(app_iter) + return GuardedIterator(app_iter, headers_set, chunks) diff --git a/python/werkzeug/middleware/profiler.py b/python/werkzeug/middleware/profiler.py new file mode 100644 index 0000000..32a14d9 --- /dev/null +++ b/python/werkzeug/middleware/profiler.py @@ -0,0 +1,132 @@ +""" +Application Profiler +==================== + +This module provides a middleware that profiles each request with the +:mod:`cProfile` module. This can help identify bottlenecks in your code +that may be slowing down your application. + +.. autoclass:: ProfilerMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from __future__ import print_function + +import os.path +import sys +import time +from pstats import Stats + +try: + from cProfile import Profile +except ImportError: + from profile import Profile + + +class ProfilerMiddleware(object): + """Wrap a WSGI application and profile the execution of each + request. Responses are buffered so that timings are more exact. + + If ``stream`` is given, :class:`pstats.Stats` are written to it + after each request. If ``profile_dir`` is given, :mod:`cProfile` + data files are saved to that directory, one file per request. + + The filename can be customized by passing ``filename_format``. If + it is a string, it will be formatted using :meth:`str.format` with + the following fields available: + + - ``{method}`` - The request method; GET, POST, etc. + - ``{path}`` - The request path or 'root' should one not exist. + - ``{elapsed}`` - The elapsed time of the request. + - ``{time}`` - The time of the request. + + If it is a callable, it will be called with the WSGI ``environ`` + dict and should return a filename. + + :param app: The WSGI application to wrap. + :param stream: Write stats to this stream. Disable with ``None``. + :param sort_by: A tuple of columns to sort stats by. See + :meth:`pstats.Stats.sort_stats`. + :param restrictions: A tuple of restrictions to filter stats by. See + :meth:`pstats.Stats.print_stats`. + :param profile_dir: Save profile data files to this directory. + :param filename_format: Format string for profile data file names, + or a callable returning a name. See explanation above. + + .. code-block:: python + + from werkzeug.middleware.profiler import ProfilerMiddleware + app = ProfilerMiddleware(app) + + .. versionchanged:: 0.15 + Stats are written even if ``profile_dir`` is given, and can be + disable by passing ``stream=None``. + + .. versionadded:: 0.15 + Added ``filename_format``. + + .. versionadded:: 0.9 + Added ``restrictions`` and ``profile_dir``. + """ + + def __init__( + self, + app, + stream=sys.stdout, + sort_by=("time", "calls"), + restrictions=(), + profile_dir=None, + filename_format="{method}.{path}.{elapsed:.0f}ms.{time:.0f}.prof", + ): + self._app = app + self._stream = stream + self._sort_by = sort_by + self._restrictions = restrictions + self._profile_dir = profile_dir + self._filename_format = filename_format + + def __call__(self, environ, start_response): + response_body = [] + + def catching_start_response(status, headers, exc_info=None): + start_response(status, headers, exc_info) + return response_body.append + + def runapp(): + app_iter = self._app(environ, catching_start_response) + response_body.extend(app_iter) + + if hasattr(app_iter, "close"): + app_iter.close() + + profile = Profile() + start = time.time() + profile.runcall(runapp) + body = b"".join(response_body) + elapsed = time.time() - start + + if self._profile_dir is not None: + if callable(self._filename_format): + filename = self._filename_format(environ) + else: + filename = self._filename_format.format( + method=environ["REQUEST_METHOD"], + path=( + environ.get("PATH_INFO").strip("/").replace("/", ".") or "root" + ), + elapsed=elapsed * 1000.0, + time=time.time(), + ) + filename = os.path.join(self._profile_dir, filename) + profile.dump_stats(filename) + + if self._stream is not None: + stats = Stats(profile, stream=self._stream) + stats.sort_stats(*self._sort_by) + print("-" * 80, file=self._stream) + print("PATH: {!r}".format(environ.get("PATH_INFO", "")), file=self._stream) + stats.print_stats(*self._restrictions) + print("-" * 80 + "\n", file=self._stream) + + return [body] diff --git a/python/werkzeug/middleware/proxy_fix.py b/python/werkzeug/middleware/proxy_fix.py new file mode 100644 index 0000000..dc1dacc --- /dev/null +++ b/python/werkzeug/middleware/proxy_fix.py @@ -0,0 +1,228 @@ +""" +X-Forwarded-For Proxy Fix +========================= + +This module provides a middleware that adjusts the WSGI environ based on +``X-Forwarded-`` headers that proxies in front of an application may +set. + +When an application is running behind a proxy server, WSGI may see the +request as coming from that server rather than the real client. Proxies +set various headers to track where the request actually came from. + +This middleware should only be applied if the application is actually +behind such a proxy, and should be configured with the number of proxies +that are chained in front of it. Not all proxies set all the headers. +Since incoming headers can be faked, you must set how many proxies are +setting each header so the middleware knows what to trust. + +.. autoclass:: ProxyFix + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import warnings + + +class ProxyFix(object): + """Adjust the WSGI environ based on ``X-Forwarded-`` that proxies in + front of the application may set. + + - ``X-Forwarded-For`` sets ``REMOTE_ADDR``. + - ``X-Forwarded-Proto`` sets ``wsgi.url_scheme``. + - ``X-Forwarded-Host`` sets ``HTTP_HOST``, ``SERVER_NAME``, and + ``SERVER_PORT``. + - ``X-Forwarded-Port`` sets ``HTTP_HOST`` and ``SERVER_PORT``. + - ``X-Forwarded-Prefix`` sets ``SCRIPT_NAME``. + + You must tell the middleware how many proxies set each header so it + knows what values to trust. It is a security issue to trust values + that came from the client rather than a proxy. + + The original values of the headers are stored in the WSGI + environ as ``werkzeug.proxy_fix.orig``, a dict. + + :param app: The WSGI application to wrap. + :param x_for: Number of values to trust for ``X-Forwarded-For``. + :param x_proto: Number of values to trust for ``X-Forwarded-Proto``. + :param x_host: Number of values to trust for ``X-Forwarded-Host``. + :param x_port: Number of values to trust for ``X-Forwarded-Port``. + :param x_prefix: Number of values to trust for + ``X-Forwarded-Prefix``. + :param num_proxies: Deprecated, use ``x_for`` instead. + + .. code-block:: python + + from werkzeug.middleware.proxy_fix import ProxyFix + # App is behind one proxy that sets the -For and -Host headers. + app = ProxyFix(app, x_for=1, x_host=1) + + .. versionchanged:: 0.15 + All headers support multiple values. The ``num_proxies`` + argument is deprecated. Each header is configured with a + separate number of trusted proxies. + + .. versionchanged:: 0.15 + Original WSGI environ values are stored in the + ``werkzeug.proxy_fix.orig`` dict. ``orig_remote_addr``, + ``orig_wsgi_url_scheme``, and ``orig_http_host`` are deprecated + and will be removed in 1.0. + + .. versionchanged:: 0.15 + Support ``X-Forwarded-Port`` and ``X-Forwarded-Prefix``. + + .. versionchanged:: 0.15 + ``X-Fowarded-Host`` and ``X-Forwarded-Port`` modify + ``SERVER_NAME`` and ``SERVER_PORT``. + """ + + def __init__( + self, app, num_proxies=None, x_for=1, x_proto=0, x_host=0, x_port=0, x_prefix=0 + ): + self.app = app + self.x_for = x_for + self.x_proto = x_proto + self.x_host = x_host + self.x_port = x_port + self.x_prefix = x_prefix + self.num_proxies = num_proxies + + @property + def num_proxies(self): + """The number of proxies setting ``X-Forwarded-For`` in front + of the application. + + .. deprecated:: 0.15 + A separate number of trusted proxies is configured for each + header. ``num_proxies`` maps to ``x_for``. This method will + be removed in 1.0. + + :internal: + """ + warnings.warn( + "'num_proxies' is deprecated as of version 0.15 and will be" + " removed in version 1.0. Use 'x_for' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.x_for + + @num_proxies.setter + def num_proxies(self, value): + if value is not None: + warnings.warn( + "'num_proxies' is deprecated as of version 0.15 and" + " will be removed in version 1.0. Use 'x_for' instead.", + DeprecationWarning, + stacklevel=2, + ) + self.x_for = value + + def get_remote_addr(self, forwarded_for): + """Get the real ``remote_addr`` by looking backwards ``x_for`` + number of values in the ``X-Forwarded-For`` header. + + :param forwarded_for: List of values parsed from the + ``X-Forwarded-For`` header. + :return: The real ``remote_addr``, or ``None`` if there were not + at least ``x_for`` values. + + .. deprecated:: 0.15 + This is handled internally for each header. This method will + be removed in 1.0. + + .. versionchanged:: 0.9 + Use ``num_proxies`` instead of always picking the first + value. + + .. versionadded:: 0.8 + """ + warnings.warn( + "'get_remote_addr' is deprecated as of version 0.15 and" + " will be removed in version 1.0. It is now handled" + " internally for each header.", + DeprecationWarning, + ) + return self._get_trusted_comma(self.x_for, ",".join(forwarded_for)) + + def _get_trusted_comma(self, trusted, value): + """Get the real value from a comma-separated header based on the + configured number of trusted proxies. + + :param trusted: Number of values to trust in the header. + :param value: Header value to parse. + :return: The real value, or ``None`` if there are fewer values + than the number of trusted proxies. + + .. versionadded:: 0.15 + """ + if not (trusted and value): + return + values = [x.strip() for x in value.split(",")] + if len(values) >= trusted: + return values[-trusted] + + def __call__(self, environ, start_response): + """Modify the WSGI environ based on the various ``Forwarded`` + headers before calling the wrapped application. Store the + original environ values in ``werkzeug.proxy_fix.orig_{key}``. + """ + environ_get = environ.get + orig_remote_addr = environ_get("REMOTE_ADDR") + orig_wsgi_url_scheme = environ_get("wsgi.url_scheme") + orig_http_host = environ_get("HTTP_HOST") + environ.update( + { + "werkzeug.proxy_fix.orig": { + "REMOTE_ADDR": orig_remote_addr, + "wsgi.url_scheme": orig_wsgi_url_scheme, + "HTTP_HOST": orig_http_host, + "SERVER_NAME": environ_get("SERVER_NAME"), + "SERVER_PORT": environ_get("SERVER_PORT"), + "SCRIPT_NAME": environ_get("SCRIPT_NAME"), + }, + # todo: remove deprecated keys + "werkzeug.proxy_fix.orig_remote_addr": orig_remote_addr, + "werkzeug.proxy_fix.orig_wsgi_url_scheme": orig_wsgi_url_scheme, + "werkzeug.proxy_fix.orig_http_host": orig_http_host, + } + ) + + x_for = self._get_trusted_comma(self.x_for, environ_get("HTTP_X_FORWARDED_FOR")) + if x_for: + environ["REMOTE_ADDR"] = x_for + + x_proto = self._get_trusted_comma( + self.x_proto, environ_get("HTTP_X_FORWARDED_PROTO") + ) + if x_proto: + environ["wsgi.url_scheme"] = x_proto + + x_host = self._get_trusted_comma( + self.x_host, environ_get("HTTP_X_FORWARDED_HOST") + ) + if x_host: + environ["HTTP_HOST"] = x_host + parts = x_host.split(":", 1) + environ["SERVER_NAME"] = parts[0] + if len(parts) == 2: + environ["SERVER_PORT"] = parts[1] + + x_port = self._get_trusted_comma( + self.x_port, environ_get("HTTP_X_FORWARDED_PORT") + ) + if x_port: + host = environ.get("HTTP_HOST") + if host: + parts = host.split(":", 1) + host = parts[0] if len(parts) == 2 else host + environ["HTTP_HOST"] = "%s:%s" % (host, x_port) + environ["SERVER_PORT"] = x_port + + x_prefix = self._get_trusted_comma( + self.x_prefix, environ_get("HTTP_X_FORWARDED_PREFIX") + ) + if x_prefix: + environ["SCRIPT_NAME"] = x_prefix + + return self.app(environ, start_response) diff --git a/python/werkzeug/middleware/shared_data.py b/python/werkzeug/middleware/shared_data.py new file mode 100644 index 0000000..a902281 --- /dev/null +++ b/python/werkzeug/middleware/shared_data.py @@ -0,0 +1,260 @@ +""" +Serve Shared Static Files +========================= + +.. autoclass:: SharedDataMiddleware + :members: is_allowed + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import mimetypes +import os +import posixpath +from datetime import datetime +from io import BytesIO +from time import mktime +from time import time +from zlib import adler32 + +from .._compat import PY2 +from .._compat import string_types +from ..filesystem import get_filesystem_encoding +from ..http import http_date +from ..http import is_resource_modified +from ..wsgi import get_path_info +from ..wsgi import wrap_file + + +class SharedDataMiddleware(object): + + """A WSGI middleware that provides static content for development + environments or simple server setups. Usage is quite simple:: + + import os + from werkzeug.wsgi import SharedDataMiddleware + + app = SharedDataMiddleware(app, { + '/static': os.path.join(os.path.dirname(__file__), 'static') + }) + + The contents of the folder ``./shared`` will now be available on + ``http://example.com/shared/``. This is pretty useful during development + because a standalone media server is not required. One can also mount + files on the root folder and still continue to use the application because + the shared data middleware forwards all unhandled requests to the + application, even if the requests are below one of the shared folders. + + If `pkg_resources` is available you can also tell the middleware to serve + files from package data:: + + app = SharedDataMiddleware(app, { + '/static': ('myapplication', 'static') + }) + + This will then serve the ``static`` folder in the `myapplication` + Python package. + + The optional `disallow` parameter can be a list of :func:`~fnmatch.fnmatch` + rules for files that are not accessible from the web. If `cache` is set to + `False` no caching headers are sent. + + Currently the middleware does not support non ASCII filenames. If the + encoding on the file system happens to be the encoding of the URI it may + work but this could also be by accident. We strongly suggest using ASCII + only file names for static files. + + The middleware will guess the mimetype using the Python `mimetype` + module. If it's unable to figure out the charset it will fall back + to `fallback_mimetype`. + + .. versionchanged:: 0.5 + The cache timeout is configurable now. + + .. versionadded:: 0.6 + The `fallback_mimetype` parameter was added. + + :param app: the application to wrap. If you don't want to wrap an + application you can pass it :exc:`NotFound`. + :param exports: a list or dict of exported files and folders. + :param disallow: a list of :func:`~fnmatch.fnmatch` rules. + :param fallback_mimetype: the fallback mimetype for unknown files. + :param cache: enable or disable caching headers. + :param cache_timeout: the cache timeout in seconds for the headers. + """ + + def __init__( + self, + app, + exports, + disallow=None, + cache=True, + cache_timeout=60 * 60 * 12, + fallback_mimetype="text/plain", + ): + self.app = app + self.exports = [] + self.cache = cache + self.cache_timeout = cache_timeout + + if hasattr(exports, "items"): + exports = exports.items() + + for key, value in exports: + if isinstance(value, tuple): + loader = self.get_package_loader(*value) + elif isinstance(value, string_types): + if os.path.isfile(value): + loader = self.get_file_loader(value) + else: + loader = self.get_directory_loader(value) + else: + raise TypeError("unknown def %r" % value) + + self.exports.append((key, loader)) + + if disallow is not None: + from fnmatch import fnmatch + + self.is_allowed = lambda x: not fnmatch(x, disallow) + + self.fallback_mimetype = fallback_mimetype + + def is_allowed(self, filename): + """Subclasses can override this method to disallow the access to + certain files. However by providing `disallow` in the constructor + this method is overwritten. + """ + return True + + def _opener(self, filename): + return lambda: ( + open(filename, "rb"), + datetime.utcfromtimestamp(os.path.getmtime(filename)), + int(os.path.getsize(filename)), + ) + + def get_file_loader(self, filename): + return lambda x: (os.path.basename(filename), self._opener(filename)) + + def get_package_loader(self, package, package_path): + from pkg_resources import DefaultProvider, ResourceManager, get_provider + + loadtime = datetime.utcnow() + provider = get_provider(package) + manager = ResourceManager() + filesystem_bound = isinstance(provider, DefaultProvider) + + def loader(path): + if path is None: + return None, None + + path = posixpath.join(package_path, path) + + if not provider.has_resource(path): + return None, None + + basename = posixpath.basename(path) + + if filesystem_bound: + return ( + basename, + self._opener(provider.get_resource_filename(manager, path)), + ) + + s = provider.get_resource_string(manager, path) + return basename, lambda: (BytesIO(s), loadtime, len(s)) + + return loader + + def get_directory_loader(self, directory): + def loader(path): + if path is not None: + path = os.path.join(directory, path) + else: + path = directory + + if os.path.isfile(path): + return os.path.basename(path), self._opener(path) + + return None, None + + return loader + + def generate_etag(self, mtime, file_size, real_filename): + if not isinstance(real_filename, bytes): + real_filename = real_filename.encode(get_filesystem_encoding()) + + return "wzsdm-%d-%s-%s" % ( + mktime(mtime.timetuple()), + file_size, + adler32(real_filename) & 0xFFFFFFFF, + ) + + def __call__(self, environ, start_response): + cleaned_path = get_path_info(environ) + + if PY2: + cleaned_path = cleaned_path.encode(get_filesystem_encoding()) + + # sanitize the path for non unix systems + cleaned_path = cleaned_path.strip("/") + + for sep in os.sep, os.altsep: + if sep and sep != "/": + cleaned_path = cleaned_path.replace(sep, "/") + + path = "/" + "/".join(x for x in cleaned_path.split("/") if x and x != "..") + file_loader = None + + for search_path, loader in self.exports: + if search_path == path: + real_filename, file_loader = loader(None) + + if file_loader is not None: + break + + if not search_path.endswith("/"): + search_path += "/" + + if path.startswith(search_path): + real_filename, file_loader = loader(path[len(search_path) :]) + + if file_loader is not None: + break + + if file_loader is None or not self.is_allowed(real_filename): + return self.app(environ, start_response) + + guessed_type = mimetypes.guess_type(real_filename) + mime_type = guessed_type[0] or self.fallback_mimetype + f, mtime, file_size = file_loader() + + headers = [("Date", http_date())] + + if self.cache: + timeout = self.cache_timeout + etag = self.generate_etag(mtime, file_size, real_filename) + headers += [ + ("Etag", '"%s"' % etag), + ("Cache-Control", "max-age=%d, public" % timeout), + ] + + if not is_resource_modified(environ, etag, last_modified=mtime): + f.close() + start_response("304 Not Modified", headers) + return [] + + headers.append(("Expires", http_date(time() + timeout))) + else: + headers.append(("Cache-Control", "public")) + + headers.extend( + ( + ("Content-Type", mime_type), + ("Content-Length", str(file_size)), + ("Last-Modified", http_date(mtime)), + ) + ) + start_response("200 OK", headers) + return wrap_file(environ, f) |