diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-06-16 16:16:03 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-06-16 16:16:03 -0700 |
commit | 2db58930a6f8c955c4d437657bd07e2939a705f2 (patch) | |
tree | b1d388bd4adc1d3134d255cd0c4d8746d7b2468b /python/werkzeug | |
parent | 9f93b9429c77e631972186049fbc7518e2cf5d4b (diff) | |
download | yt-local-2db58930a6f8c955c4d437657bd07e2939a705f2.tar.lz yt-local-2db58930a6f8c955c4d437657bd07e2939a705f2.tar.xz yt-local-2db58930a6f8c955c4d437657bd07e2939a705f2.zip |
Convert watch page to flask framework
Diffstat (limited to 'python/werkzeug')
48 files changed, 22252 insertions, 0 deletions
diff --git a/python/werkzeug/__init__.py b/python/werkzeug/__init__.py new file mode 100644 index 0000000..e460e75 --- /dev/null +++ b/python/werkzeug/__init__.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +""" + werkzeug + ~~~~~~~~ + + Werkzeug is the Swiss Army knife of Python web development. + + It provides useful classes and functions for any WSGI application to make + the life of a python web developer much easier. All of the provided + classes are independent from each other so you can mix it with any other + library. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import sys +from types import ModuleType + +__version__ = "0.15.4" + +# This import magic raises concerns quite often which is why the implementation +# and motivation is explained here in detail now. +# +# The majority of the functions and classes provided by Werkzeug work on the +# HTTP and WSGI layer. There is no useful grouping for those which is why +# they are all importable from "werkzeug" instead of the modules where they are +# implemented. The downside of that is, that now everything would be loaded at +# once, even if unused. +# +# The implementation of a lazy-loading module in this file replaces the +# werkzeug package when imported from within. Attribute access to the werkzeug +# module will then lazily import from the modules that implement the objects. + +# import mapping to objects in other modules +all_by_module = { + "werkzeug.debug": ["DebuggedApplication"], + "werkzeug.local": [ + "Local", + "LocalManager", + "LocalProxy", + "LocalStack", + "release_local", + ], + "werkzeug.serving": ["run_simple"], + "werkzeug.test": ["Client", "EnvironBuilder", "create_environ", "run_wsgi_app"], + "werkzeug.testapp": ["test_app"], + "werkzeug.exceptions": ["abort", "Aborter"], + "werkzeug.urls": [ + "url_decode", + "url_encode", + "url_quote", + "url_quote_plus", + "url_unquote", + "url_unquote_plus", + "url_fix", + "Href", + "iri_to_uri", + "uri_to_iri", + ], + "werkzeug.formparser": ["parse_form_data"], + "werkzeug.utils": [ + "escape", + "environ_property", + "append_slash_redirect", + "redirect", + "cached_property", + "import_string", + "dump_cookie", + "parse_cookie", + "unescape", + "format_string", + "find_modules", + "header_property", + "html", + "xhtml", + "HTMLBuilder", + "validate_arguments", + "ArgumentValidationError", + "bind_arguments", + "secure_filename", + ], + "werkzeug.wsgi": [ + "get_current_url", + "get_host", + "pop_path_info", + "peek_path_info", + "ClosingIterator", + "FileWrapper", + "make_line_iter", + "LimitedStream", + "responder", + "wrap_file", + "extract_path_info", + ], + "werkzeug.datastructures": [ + "MultiDict", + "CombinedMultiDict", + "Headers", + "EnvironHeaders", + "ImmutableList", + "ImmutableDict", + "ImmutableMultiDict", + "TypeConversionDict", + "ImmutableTypeConversionDict", + "Accept", + "MIMEAccept", + "CharsetAccept", + "LanguageAccept", + "RequestCacheControl", + "ResponseCacheControl", + "ETags", + "HeaderSet", + "WWWAuthenticate", + "Authorization", + "FileMultiDict", + "CallbackDict", + "FileStorage", + "OrderedMultiDict", + "ImmutableOrderedMultiDict", + ], + "werkzeug.useragents": ["UserAgent"], + "werkzeug.http": [ + "parse_etags", + "parse_date", + "http_date", + "cookie_date", + "parse_cache_control_header", + "is_resource_modified", + "parse_accept_header", + "parse_set_header", + "quote_etag", + "unquote_etag", + "generate_etag", + "dump_header", + "parse_list_header", + "parse_dict_header", + "parse_authorization_header", + "parse_www_authenticate_header", + "remove_entity_headers", + "is_entity_header", + "remove_hop_by_hop_headers", + "parse_options_header", + "dump_options_header", + "is_hop_by_hop_header", + "unquote_header_value", + "quote_header_value", + "HTTP_STATUS_CODES", + ], + "werkzeug.wrappers": [ + "BaseResponse", + "BaseRequest", + "Request", + "Response", + "AcceptMixin", + "ETagRequestMixin", + "ETagResponseMixin", + "ResponseStreamMixin", + "CommonResponseDescriptorsMixin", + "UserAgentMixin", + "AuthorizationMixin", + "WWWAuthenticateMixin", + "CommonRequestDescriptorsMixin", + ], + "werkzeug.middleware.dispatcher": ["DispatcherMiddleware"], + "werkzeug.middleware.shared_data": ["SharedDataMiddleware"], + "werkzeug.security": ["generate_password_hash", "check_password_hash"], + # the undocumented easteregg ;-) + "werkzeug._internal": ["_easteregg"], +} + +# modules that should be imported when accessed as attributes of werkzeug +attribute_modules = frozenset(["exceptions", "routing"]) + +object_origins = {} +for module, items in all_by_module.items(): + for item in items: + object_origins[item] = module + + +class module(ModuleType): + """Automatically import objects from the modules.""" + + def __getattr__(self, name): + if name in object_origins: + module = __import__(object_origins[name], None, None, [name]) + for extra_name in all_by_module[module.__name__]: + setattr(self, extra_name, getattr(module, extra_name)) + return getattr(module, name) + elif name in attribute_modules: + __import__("werkzeug." + name) + return ModuleType.__getattribute__(self, name) + + def __dir__(self): + """Just show what we want to show.""" + result = list(new_module.__all__) + result.extend( + ( + "__file__", + "__doc__", + "__all__", + "__docformat__", + "__name__", + "__path__", + "__package__", + "__version__", + ) + ) + return result + + +# keep a reference to this module so that it's not garbage collected +old_module = sys.modules["werkzeug"] + + +# setup the new module and patch it into the dict of loaded modules +new_module = sys.modules["werkzeug"] = module("werkzeug") +new_module.__dict__.update( + { + "__file__": __file__, + "__package__": "werkzeug", + "__path__": __path__, + "__doc__": __doc__, + "__version__": __version__, + "__all__": tuple(object_origins) + tuple(attribute_modules), + "__docformat__": "restructuredtext en", + } +) + + +# Due to bootstrapping issues we need to import exceptions here. +# Don't ask :-( +__import__("werkzeug.exceptions") diff --git a/python/werkzeug/_compat.py b/python/werkzeug/_compat.py new file mode 100644 index 0000000..1097983 --- /dev/null +++ b/python/werkzeug/_compat.py @@ -0,0 +1,219 @@ +# flake8: noqa +# This whole file is full of lint errors +import functools +import operator +import sys + +try: + import builtins +except ImportError: + import __builtin__ as builtins + + +PY2 = sys.version_info[0] == 2 +WIN = sys.platform.startswith("win") + +_identity = lambda x: x + +if PY2: + unichr = unichr + text_type = unicode + string_types = (str, unicode) + integer_types = (int, long) + + iterkeys = lambda d, *args, **kwargs: d.iterkeys(*args, **kwargs) + itervalues = lambda d, *args, **kwargs: d.itervalues(*args, **kwargs) + iteritems = lambda d, *args, **kwargs: d.iteritems(*args, **kwargs) + + iterlists = lambda d, *args, **kwargs: d.iterlists(*args, **kwargs) + iterlistvalues = lambda d, *args, **kwargs: d.iterlistvalues(*args, **kwargs) + + int_to_byte = chr + iter_bytes = iter + + import collections as collections_abc + + exec("def reraise(tp, value, tb=None):\n raise tp, value, tb") + + def fix_tuple_repr(obj): + def __repr__(self): + cls = self.__class__ + return "%s(%s)" % ( + cls.__name__, + ", ".join( + "%s=%r" % (field, self[index]) + for index, field in enumerate(cls._fields) + ), + ) + + obj.__repr__ = __repr__ + return obj + + def implements_iterator(cls): + cls.next = cls.__next__ + del cls.__next__ + return cls + + def implements_to_string(cls): + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda x: x.__unicode__().encode("utf-8") + return cls + + def native_string_result(func): + def wrapper(*args, **kwargs): + return func(*args, **kwargs).encode("utf-8") + + return functools.update_wrapper(wrapper, func) + + def implements_bool(cls): + cls.__nonzero__ = cls.__bool__ + del cls.__bool__ + return cls + + from itertools import imap, izip, ifilter + + range_type = xrange + + from StringIO import StringIO + from cStringIO import StringIO as BytesIO + + NativeStringIO = BytesIO + + def make_literal_wrapper(reference): + return _identity + + def normalize_string_tuple(tup): + """Normalizes a string tuple to a common type. Following Python 2 + rules, upgrades to unicode are implicit. + """ + if any(isinstance(x, text_type) for x in tup): + return tuple(to_unicode(x) for x in tup) + return tup + + def try_coerce_native(s): + """Try to coerce a unicode string to native if possible. Otherwise, + leave it as unicode. + """ + try: + return to_native(s) + except UnicodeError: + return s + + wsgi_get_bytes = _identity + + def wsgi_decoding_dance(s, charset="utf-8", errors="replace"): + return s.decode(charset, errors) + + def wsgi_encoding_dance(s, charset="utf-8", errors="replace"): + if isinstance(s, bytes): + return s + return s.encode(charset, errors) + + def to_bytes(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None: + return None + if isinstance(x, (bytes, bytearray, buffer)): + return bytes(x) + if isinstance(x, unicode): + return x.encode(charset, errors) + raise TypeError("Expected bytes") + + def to_native(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None or isinstance(x, str): + return x + return x.encode(charset, errors) + + +else: + unichr = chr + text_type = str + string_types = (str,) + integer_types = (int,) + + iterkeys = lambda d, *args, **kwargs: iter(d.keys(*args, **kwargs)) + itervalues = lambda d, *args, **kwargs: iter(d.values(*args, **kwargs)) + iteritems = lambda d, *args, **kwargs: iter(d.items(*args, **kwargs)) + + iterlists = lambda d, *args, **kwargs: iter(d.lists(*args, **kwargs)) + iterlistvalues = lambda d, *args, **kwargs: iter(d.listvalues(*args, **kwargs)) + + int_to_byte = operator.methodcaller("to_bytes", 1, "big") + iter_bytes = functools.partial(map, int_to_byte) + + import collections.abc as collections_abc + + def reraise(tp, value, tb=None): + if value.__traceback__ is not tb: + raise value.with_traceback(tb) + raise value + + fix_tuple_repr = _identity + implements_iterator = _identity + implements_to_string = _identity + implements_bool = _identity + native_string_result = _identity + imap = map + izip = zip + ifilter = filter + range_type = range + + from io import StringIO, BytesIO + + NativeStringIO = StringIO + + _latin1_encode = operator.methodcaller("encode", "latin1") + + def make_literal_wrapper(reference): + if isinstance(reference, text_type): + return _identity + return _latin1_encode + + def normalize_string_tuple(tup): + """Ensures that all types in the tuple are either strings + or bytes. + """ + tupiter = iter(tup) + is_text = isinstance(next(tupiter, None), text_type) + for arg in tupiter: + if isinstance(arg, text_type) != is_text: + raise TypeError( + "Cannot mix str and bytes arguments (got %s)" % repr(tup) + ) + return tup + + try_coerce_native = _identity + wsgi_get_bytes = _latin1_encode + + def wsgi_decoding_dance(s, charset="utf-8", errors="replace"): + return s.encode("latin1").decode(charset, errors) + + def wsgi_encoding_dance(s, charset="utf-8", errors="replace"): + if isinstance(s, text_type): + s = s.encode(charset) + return s.decode("latin1", errors) + + def to_bytes(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None: + return None + if isinstance(x, (bytes, bytearray, memoryview)): # noqa + return bytes(x) + if isinstance(x, str): + return x.encode(charset, errors) + raise TypeError("Expected bytes") + + def to_native(x, charset=sys.getdefaultencoding(), errors="strict"): + if x is None or isinstance(x, str): + return x + return x.decode(charset, errors) + + +def to_unicode( + x, charset=sys.getdefaultencoding(), errors="strict", allow_none_charset=False +): + if x is None: + return None + if not isinstance(x, bytes): + return text_type(x) + if charset is None and allow_none_charset: + return x + return x.decode(charset, errors) diff --git a/python/werkzeug/_internal.py b/python/werkzeug/_internal.py new file mode 100644 index 0000000..90e3dd9 --- /dev/null +++ b/python/werkzeug/_internal.py @@ -0,0 +1,484 @@ +# -*- coding: utf-8 -*- +""" + werkzeug._internal + ~~~~~~~~~~~~~~~~~~ + + This module provides internally used helpers and constants. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import inspect +import logging +import re +import string +from datetime import date +from datetime import datetime +from itertools import chain +from weakref import WeakKeyDictionary + +from ._compat import int_to_byte +from ._compat import integer_types +from ._compat import iter_bytes +from ._compat import range_type +from ._compat import text_type + + +_logger = None +_signature_cache = WeakKeyDictionary() +_epoch_ord = date(1970, 1, 1).toordinal() +_cookie_params = { + b"expires", + b"path", + b"comment", + b"max-age", + b"secure", + b"httponly", + b"version", +} +_legal_cookie_chars = ( + string.ascii_letters + string.digits + u"/=!#$%&'*+-.^_`|~:" +).encode("ascii") + +_cookie_quoting_map = {b",": b"\\054", b";": b"\\073", b'"': b'\\"', b"\\": b"\\\\"} +for _i in chain(range_type(32), range_type(127, 256)): + _cookie_quoting_map[int_to_byte(_i)] = ("\\%03o" % _i).encode("latin1") + +_octal_re = re.compile(br"\\[0-3][0-7][0-7]") +_quote_re = re.compile(br"[\\].") +_legal_cookie_chars_re = br"[\w\d!#%&\'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +_cookie_re = re.compile( + br""" + (?P<key>[^=;]+) + (?:\s*=\s* + (?P<val> + "(?:[^\\"]|\\.)*" | + (?:.*?) + ) + )? + \s*; +""", + flags=re.VERBOSE, +) + + +class _Missing(object): + def __repr__(self): + return "no value" + + def __reduce__(self): + return "_missing" + + +_missing = _Missing() + + +def _get_environ(obj): + env = getattr(obj, "environ", obj) + assert isinstance(env, dict), ( + "%r is not a WSGI environment (has to be a dict)" % type(obj).__name__ + ) + return env + + +def _has_level_handler(logger): + """Check if there is a handler in the logging chain that will handle + the given logger's effective level. + """ + level = logger.getEffectiveLevel() + current = logger + + while current: + if any(handler.level <= level for handler in current.handlers): + return True + + if not current.propagate: + break + + current = current.parent + + return False + + +def _log(type, message, *args, **kwargs): + """Log a message to the 'werkzeug' logger. + + The logger is created the first time it is needed. If there is no + level set, it is set to :data:`logging.INFO`. If there is no handler + for the logger's effective level, a :class:`logging.StreamHandler` + is added. + """ + global _logger + + if _logger is None: + _logger = logging.getLogger("werkzeug") + + if _logger.level == logging.NOTSET: + _logger.setLevel(logging.INFO) + + if not _has_level_handler(_logger): + _logger.addHandler(logging.StreamHandler()) + + getattr(_logger, type)(message.rstrip(), *args, **kwargs) + + +def _parse_signature(func): + """Return a signature object for the function.""" + if hasattr(func, "im_func"): + func = func.im_func + + # if we have a cached validator for this function, return it + parse = _signature_cache.get(func) + if parse is not None: + return parse + + # inspect the function signature and collect all the information + if hasattr(inspect, "getfullargspec"): + tup = inspect.getfullargspec(func) + else: + tup = inspect.getargspec(func) + positional, vararg_var, kwarg_var, defaults = tup[:4] + defaults = defaults or () + arg_count = len(positional) + arguments = [] + for idx, name in enumerate(positional): + if isinstance(name, list): + raise TypeError( + "cannot parse functions that unpack tuples in the function signature" + ) + try: + default = defaults[idx - arg_count] + except IndexError: + param = (name, False, None) + else: + param = (name, True, default) + arguments.append(param) + arguments = tuple(arguments) + + def parse(args, kwargs): + new_args = [] + missing = [] + extra = {} + + # consume as many arguments as positional as possible + for idx, (name, has_default, default) in enumerate(arguments): + try: + new_args.append(args[idx]) + except IndexError: + try: + new_args.append(kwargs.pop(name)) + except KeyError: + if has_default: + new_args.append(default) + else: + missing.append(name) + else: + if name in kwargs: + extra[name] = kwargs.pop(name) + + # handle extra arguments + extra_positional = args[arg_count:] + if vararg_var is not None: + new_args.extend(extra_positional) + extra_positional = () + if kwargs and kwarg_var is None: + extra.update(kwargs) + kwargs = {} + + return ( + new_args, + kwargs, + missing, + extra, + extra_positional, + arguments, + vararg_var, + kwarg_var, + ) + + _signature_cache[func] = parse + return parse + + +def _date_to_unix(arg): + """Converts a timetuple, integer or datetime object into the seconds from + epoch in utc. + """ + if isinstance(arg, datetime): + arg = arg.utctimetuple() + elif isinstance(arg, integer_types + (float,)): + return int(arg) + year, month, day, hour, minute, second = arg[:6] + days = date(year, month, 1).toordinal() - _epoch_ord + day - 1 + hours = days * 24 + hour + minutes = hours * 60 + minute + seconds = minutes * 60 + second + return seconds + + +class _DictAccessorProperty(object): + """Baseclass for `environ_property` and `header_property`.""" + + read_only = False + + def __init__( + self, + name, + default=None, + load_func=None, + dump_func=None, + read_only=None, + doc=None, + ): + self.name = name + self.default = default + self.load_func = load_func + self.dump_func = dump_func + if read_only is not None: + self.read_only = read_only + self.__doc__ = doc + + def __get__(self, obj, type=None): + if obj is None: + return self + storage = self.lookup(obj) + if self.name not in storage: + return self.default + rv = storage[self.name] + if self.load_func is not None: + try: + rv = self.load_func(rv) + except (ValueError, TypeError): + rv = self.default + return rv + + def __set__(self, obj, value): + if self.read_only: + raise AttributeError("read only property") + if self.dump_func is not None: + value = self.dump_func(value) + self.lookup(obj)[self.name] = value + + def __delete__(self, obj): + if self.read_only: + raise AttributeError("read only property") + self.lookup(obj).pop(self.name, None) + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, self.name) + + +def _cookie_quote(b): + buf = bytearray() + all_legal = True + _lookup = _cookie_quoting_map.get + _push = buf.extend + + for char in iter_bytes(b): + if char not in _legal_cookie_chars: + all_legal = False + char = _lookup(char, char) + _push(char) + + if all_legal: + return bytes(buf) + return bytes(b'"' + buf + b'"') + + +def _cookie_unquote(b): + if len(b) < 2: + return b + if b[:1] != b'"' or b[-1:] != b'"': + return b + + b = b[1:-1] + + i = 0 + n = len(b) + rv = bytearray() + _push = rv.extend + + while 0 <= i < n: + o_match = _octal_re.search(b, i) + q_match = _quote_re.search(b, i) + if not o_match and not q_match: + rv.extend(b[i:]) + break + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): + _push(b[i:k]) + _push(b[k + 1 : k + 2]) + i = k + 2 + else: + _push(b[i:j]) + rv.append(int(b[j + 1 : j + 4], 8)) + i = j + 4 + + return bytes(rv) + + +def _cookie_parse_impl(b): + """Lowlevel cookie parsing facility that operates on bytes.""" + i = 0 + n = len(b) + + while i < n: + match = _cookie_re.search(b + b";", i) + if not match: + break + + key = match.group("key").strip() + value = match.group("val") or b"" + i = match.end(0) + + # Ignore parameters. We have no interest in them. + if key.lower() not in _cookie_params: + yield _cookie_unquote(key), _cookie_unquote(value) + + +def _encode_idna(domain): + # If we're given bytes, make sure they fit into ASCII + if not isinstance(domain, text_type): + domain.decode("ascii") + return domain + + # Otherwise check if it's already ascii, then return + try: + return domain.encode("ascii") + except UnicodeError: + pass + + # Otherwise encode each part separately + parts = domain.split(".") + for idx, part in enumerate(parts): + parts[idx] = part.encode("idna") + return b".".join(parts) + + +def _decode_idna(domain): + # If the input is a string try to encode it to ascii to + # do the idna decoding. if that fails because of an + # unicode error, then we already have a decoded idna domain + if isinstance(domain, text_type): + try: + domain = domain.encode("ascii") + except UnicodeError: + return domain + + # Decode each part separately. If a part fails, try to + # decode it with ascii and silently ignore errors. This makes + # most sense because the idna codec does not have error handling + parts = domain.split(b".") + for idx, part in enumerate(parts): + try: + parts[idx] = part.decode("idna") + except UnicodeError: + parts[idx] = part.decode("ascii", "ignore") + + return ".".join(parts) + + +def _make_cookie_domain(domain): + if domain is None: + return None + domain = _encode_idna(domain) + if b":" in domain: + domain = domain.split(b":", 1)[0] + if b"." in domain: + return domain + raise ValueError( + "Setting 'domain' for a cookie on a server running locally (ex: " + "localhost) is not supported by complying browsers. You should " + "have something like: '127.0.0.1 localhost dev.localhost' on " + "your hosts file and then point your server to run on " + "'dev.localhost' and also set 'domain' for 'dev.localhost'" + ) + + +def _easteregg(app=None): + """Like the name says. But who knows how it works?""" + + def bzzzzzzz(gyver): + import base64 + import zlib + + return zlib.decompress(base64.b64decode(gyver)).decode("ascii") + + gyver = u"\n".join( + [ + x + (77 - len(x)) * u" " + for x in bzzzzzzz( + b""" +eJyFlzuOJDkMRP06xRjymKgDJCDQStBYT8BCgK4gTwfQ2fcFs2a2FzvZk+hvlcRvRJD148efHt9m +9Xz94dRY5hGt1nrYcXx7us9qlcP9HHNh28rz8dZj+q4rynVFFPdlY4zH873NKCexrDM6zxxRymzz +4QIxzK4bth1PV7+uHn6WXZ5C4ka/+prFzx3zWLMHAVZb8RRUxtFXI5DTQ2n3Hi2sNI+HK43AOWSY +jmEzE4naFp58PdzhPMdslLVWHTGUVpSxImw+pS/D+JhzLfdS1j7PzUMxij+mc2U0I9zcbZ/HcZxc +q1QjvvcThMYFnp93agEx392ZdLJWXbi/Ca4Oivl4h/Y1ErEqP+lrg7Xa4qnUKu5UE9UUA4xeqLJ5 +jWlPKJvR2yhRI7xFPdzPuc6adXu6ovwXwRPXXnZHxlPtkSkqWHilsOrGrvcVWXgGP3daXomCj317 +8P2UOw/NnA0OOikZyFf3zZ76eN9QXNwYdD8f8/LdBRFg0BO3bB+Pe/+G8er8tDJv83XTkj7WeMBJ +v/rnAfdO51d6sFglfi8U7zbnr0u9tyJHhFZNXYfH8Iafv2Oa+DT6l8u9UYlajV/hcEgk1x8E8L/r +XJXl2SK+GJCxtnyhVKv6GFCEB1OO3f9YWAIEbwcRWv/6RPpsEzOkXURMN37J0PoCSYeBnJQd9Giu +LxYQJNlYPSo/iTQwgaihbART7Fcyem2tTSCcwNCs85MOOpJtXhXDe0E7zgZJkcxWTar/zEjdIVCk +iXy87FW6j5aGZhttDBoAZ3vnmlkx4q4mMmCdLtnHkBXFMCReqthSGkQ+MDXLLCpXwBs0t+sIhsDI +tjBB8MwqYQpLygZ56rRHHpw+OAVyGgaGRHWy2QfXez+ZQQTTBkmRXdV/A9LwH6XGZpEAZU8rs4pE +1R4FQ3Uwt8RKEtRc0/CrANUoes3EzM6WYcFyskGZ6UTHJWenBDS7h163Eo2bpzqxNE9aVgEM2CqI +GAJe9Yra4P5qKmta27VjzYdR04Vc7KHeY4vs61C0nbywFmcSXYjzBHdiEjraS7PGG2jHHTpJUMxN +Jlxr3pUuFvlBWLJGE3GcA1/1xxLcHmlO+LAXbhrXah1tD6Ze+uqFGdZa5FM+3eHcKNaEarutAQ0A +QMAZHV+ve6LxAwWnXbbSXEG2DmCX5ijeLCKj5lhVFBrMm+ryOttCAeFpUdZyQLAQkA06RLs56rzG +8MID55vqr/g64Qr/wqwlE0TVxgoiZhHrbY2h1iuuyUVg1nlkpDrQ7Vm1xIkI5XRKLedN9EjzVchu +jQhXcVkjVdgP2O99QShpdvXWoSwkp5uMwyjt3jiWCqWGSiaaPAzohjPanXVLbM3x0dNskJsaCEyz +DTKIs+7WKJD4ZcJGfMhLFBf6hlbnNkLEePF8Cx2o2kwmYF4+MzAxa6i+6xIQkswOqGO+3x9NaZX8 +MrZRaFZpLeVTYI9F/djY6DDVVs340nZGmwrDqTCiiqD5luj3OzwpmQCiQhdRYowUYEA3i1WWGwL4 +GCtSoO4XbIPFeKGU13XPkDf5IdimLpAvi2kVDVQbzOOa4KAXMFlpi/hV8F6IDe0Y2reg3PuNKT3i +RYhZqtkQZqSB2Qm0SGtjAw7RDwaM1roESC8HWiPxkoOy0lLTRFG39kvbLZbU9gFKFRvixDZBJmpi +Xyq3RE5lW00EJjaqwp/v3EByMSpVZYsEIJ4APaHmVtpGSieV5CALOtNUAzTBiw81GLgC0quyzf6c +NlWknzJeCsJ5fup2R4d8CYGN77mu5vnO1UqbfElZ9E6cR6zbHjgsr9ly18fXjZoPeDjPuzlWbFwS +pdvPkhntFvkc13qb9094LL5NrA3NIq3r9eNnop9DizWOqCEbyRBFJTHn6Tt3CG1o8a4HevYh0XiJ +sR0AVVHuGuMOIfbuQ/OKBkGRC6NJ4u7sbPX8bG/n5sNIOQ6/Y/BX3IwRlTSabtZpYLB85lYtkkgm +p1qXK3Du2mnr5INXmT/78KI12n11EFBkJHHp0wJyLe9MvPNUGYsf+170maayRoy2lURGHAIapSpQ +krEDuNoJCHNlZYhKpvw4mspVWxqo415n8cD62N9+EfHrAvqQnINStetek7RY2Urv8nxsnGaZfRr/ +nhXbJ6m/yl1LzYqscDZA9QHLNbdaSTTr+kFg3bC0iYbX/eQy0Bv3h4B50/SGYzKAXkCeOLI3bcAt +mj2Z/FM1vQWgDynsRwNvrWnJHlespkrp8+vO1jNaibm+PhqXPPv30YwDZ6jApe3wUjFQobghvW9p +7f2zLkGNv8b191cD/3vs9Q833z8t""" + ).splitlines() + ] + ) + + def easteregged(environ, start_response): + def injecting_start_response(status, headers, exc_info=None): + headers.append(("X-Powered-By", "Werkzeug")) + return start_response(status, headers, exc_info) + + if app is not None and environ.get("QUERY_STRING") != "macgybarchakku": + return app(environ, injecting_start_response) + injecting_start_response("200 OK", [("Content-Type", "text/html")]) + return [ + ( + u""" +<!DOCTYPE html> +<html> +<head> +<title>About Werkzeug</title> +<style type="text/css"> + body { font: 15px Georgia, serif; text-align: center; } + a { color: #333; text-decoration: none; } + h1 { font-size: 30px; margin: 20px 0 10px 0; } + p { margin: 0 0 30px 0; } + pre { font: 11px 'Consolas', 'Monaco', monospace; line-height: 0.95; } +</style> +</head> +<body> +<h1><a href="http://werkzeug.pocoo.org/">Werkzeug</a></h1> +<p>the Swiss Army knife of Python web development.</p> +<pre>%s\n\n\n</pre> +</body> +</html>""" + % gyver + ).encode("latin1") + ] + + return easteregged diff --git a/python/werkzeug/_reloader.py b/python/werkzeug/_reloader.py new file mode 100644 index 0000000..f06a63d --- /dev/null +++ b/python/werkzeug/_reloader.py @@ -0,0 +1,334 @@ +import os +import subprocess +import sys +import threading +import time +from itertools import chain + +from ._compat import iteritems +from ._compat import PY2 +from ._compat import text_type +from ._internal import _log + + +def _iter_module_files(): + """This iterates over all relevant Python files. It goes through all + loaded files from modules, all files in folders of already loaded modules + as well as all files reachable through a package. + """ + # The list call is necessary on Python 3 in case the module + # dictionary modifies during iteration. + for module in list(sys.modules.values()): + if module is None: + continue + filename = getattr(module, "__file__", None) + if filename: + if os.path.isdir(filename) and os.path.exists( + os.path.join(filename, "__init__.py") + ): + filename = os.path.join(filename, "__init__.py") + + old = None + while not os.path.isfile(filename): + old = filename + filename = os.path.dirname(filename) + if filename == old: + break + else: + if filename[-4:] in (".pyc", ".pyo"): + filename = filename[:-1] + yield filename + + +def _find_observable_paths(extra_files=None): + """Finds all paths that should be observed.""" + rv = set( + os.path.dirname(os.path.abspath(x)) if os.path.isfile(x) else os.path.abspath(x) + for x in sys.path + ) + + for filename in extra_files or (): + rv.add(os.path.dirname(os.path.abspath(filename))) + + for module in list(sys.modules.values()): + fn = getattr(module, "__file__", None) + if fn is None: + continue + fn = os.path.abspath(fn) + rv.add(os.path.dirname(fn)) + + return _find_common_roots(rv) + + +def _get_args_for_reloading(): + """Returns the executable. This contains a workaround for windows + if the executable is incorrectly reported to not have the .exe + extension which can cause bugs on reloading. This also contains + a workaround for linux where the file is executable (possibly with + a program other than python) + """ + rv = [sys.executable] + py_script = os.path.abspath(sys.argv[0]) + args = sys.argv[1:] + # Need to look at main module to determine how it was executed. + __main__ = sys.modules["__main__"] + + if __main__.__package__ is None: + # Executed a file, like "python app.py". + if os.name == "nt": + # Windows entry points have ".exe" extension and should be + # called directly. + if not os.path.exists(py_script) and os.path.exists(py_script + ".exe"): + py_script += ".exe" + + if ( + os.path.splitext(rv[0])[1] == ".exe" + and os.path.splitext(py_script)[1] == ".exe" + ): + rv.pop(0) + + elif os.path.isfile(py_script) and os.access(py_script, os.X_OK): + # The file is marked as executable. Nix adds a wrapper that + # shouldn't be called with the Python executable. + rv.pop(0) + + rv.append(py_script) + else: + # Executed a module, like "python -m werkzeug.serving". + if sys.argv[0] == "-m": + # Flask works around previous behavior by putting + # "-m flask" in sys.argv. + # TODO remove this once Flask no longer misbehaves + args = sys.argv + else: + py_module = __main__.__package__ + name = os.path.splitext(os.path.basename(py_script))[0] + + if name != "__main__": + py_module += "." + name + + rv.extend(("-m", py_module.lstrip("."))) + + rv.extend(args) + return rv + + +def _find_common_roots(paths): + """Out of some paths it finds the common roots that need monitoring.""" + paths = [x.split(os.path.sep) for x in paths] + root = {} + for chunks in sorted(paths, key=len, reverse=True): + node = root + for chunk in chunks: + node = node.setdefault(chunk, {}) + node.clear() + + rv = set() + + def _walk(node, path): + for prefix, child in iteritems(node): + _walk(child, path + (prefix,)) + if not node: + rv.add("/".join(path)) + + _walk(root, ()) + return rv + + +class ReloaderLoop(object): + name = None + + # monkeypatched by testsuite. wrapping with `staticmethod` is required in + # case time.sleep has been replaced by a non-c function (e.g. by + # `eventlet.monkey_patch`) before we get here + _sleep = staticmethod(time.sleep) + + def __init__(self, extra_files=None, interval=1): + self.extra_files = set(os.path.abspath(x) for x in extra_files or ()) + self.interval = interval + + def run(self): + pass + + def restart_with_reloader(self): + """Spawn a new Python interpreter with the same arguments as this one, + but running the reloader thread. + """ + while 1: + _log("info", " * Restarting with %s" % self.name) + args = _get_args_for_reloading() + + # a weird bug on windows. sometimes unicode strings end up in the + # environment and subprocess.call does not like this, encode them + # to latin1 and continue. + if os.name == "nt" and PY2: + new_environ = {} + for key, value in iteritems(os.environ): + if isinstance(key, text_type): + key = key.encode("iso-8859-1") + if isinstance(value, text_type): + value = value.encode("iso-8859-1") + new_environ[key] = value + else: + new_environ = os.environ.copy() + + new_environ["WERKZEUG_RUN_MAIN"] = "true" + exit_code = subprocess.call(args, env=new_environ, close_fds=False) + if exit_code != 3: + return exit_code + + def trigger_reload(self, filename): + self.log_reload(filename) + sys.exit(3) + + def log_reload(self, filename): + filename = os.path.abspath(filename) + _log("info", " * Detected change in %r, reloading" % filename) + + +class StatReloaderLoop(ReloaderLoop): + name = "stat" + + def run(self): + mtimes = {} + while 1: + for filename in chain(_iter_module_files(), self.extra_files): + try: + mtime = os.stat(filename).st_mtime + except OSError: + continue + + old_time = mtimes.get(filename) + if old_time is None: + mtimes[filename] = mtime + continue + elif mtime > old_time: + self.trigger_reload(filename) + self._sleep(self.interval) + + +class WatchdogReloaderLoop(ReloaderLoop): + def __init__(self, *args, **kwargs): + ReloaderLoop.__init__(self, *args, **kwargs) + from watchdog.observers import Observer + from watchdog.events import FileSystemEventHandler + + self.observable_paths = set() + + def _check_modification(filename): + if filename in self.extra_files: + self.trigger_reload(filename) + dirname = os.path.dirname(filename) + if dirname.startswith(tuple(self.observable_paths)): + if filename.endswith((".pyc", ".pyo", ".py")): + self.trigger_reload(filename) + + class _CustomHandler(FileSystemEventHandler): + def on_created(self, event): + _check_modification(event.src_path) + + def on_modified(self, event): + _check_modification(event.src_path) + + def on_moved(self, event): + _check_modification(event.src_path) + _check_modification(event.dest_path) + + def on_deleted(self, event): + _check_modification(event.src_path) + + reloader_name = Observer.__name__.lower() + if reloader_name.endswith("observer"): + reloader_name = reloader_name[:-8] + reloader_name += " reloader" + + self.name = reloader_name + + self.observer_class = Observer + self.event_handler = _CustomHandler() + self.should_reload = False + + def trigger_reload(self, filename): + # This is called inside an event handler, which means throwing + # SystemExit has no effect. + # https://github.com/gorakhargosh/watchdog/issues/294 + self.should_reload = True + self.log_reload(filename) + + def run(self): + watches = {} + observer = self.observer_class() + observer.start() + + try: + while not self.should_reload: + to_delete = set(watches) + paths = _find_observable_paths(self.extra_files) + for path in paths: + if path not in watches: + try: + watches[path] = observer.schedule( + self.event_handler, path, recursive=True + ) + except OSError: + # Clear this path from list of watches We don't want + # the same error message showing again in the next + # iteration. + watches[path] = None + to_delete.discard(path) + for path in to_delete: + watch = watches.pop(path, None) + if watch is not None: + observer.unschedule(watch) + self.observable_paths = paths + self._sleep(self.interval) + finally: + observer.stop() + observer.join() + + sys.exit(3) + + +reloader_loops = {"stat": StatReloaderLoop, "watchdog": WatchdogReloaderLoop} + +try: + __import__("watchdog.observers") +except ImportError: + reloader_loops["auto"] = reloader_loops["stat"] +else: + reloader_loops["auto"] = reloader_loops["watchdog"] + + +def ensure_echo_on(): + """Ensure that echo mode is enabled. Some tools such as PDB disable + it which causes usability issues after reload.""" + # tcgetattr will fail if stdin isn't a tty + if not sys.stdin.isatty(): + return + try: + import termios + except ImportError: + return + attributes = termios.tcgetattr(sys.stdin) + if not attributes[3] & termios.ECHO: + attributes[3] |= termios.ECHO + termios.tcsetattr(sys.stdin, termios.TCSANOW, attributes) + + +def run_with_reloader(main_func, extra_files=None, interval=1, reloader_type="auto"): + """Run the given function in an independent python interpreter.""" + import signal + + reloader = reloader_loops[reloader_type](extra_files, interval) + signal.signal(signal.SIGTERM, lambda *args: sys.exit(0)) + try: + if os.environ.get("WERKZEUG_RUN_MAIN") == "true": + ensure_echo_on() + t = threading.Thread(target=main_func, args=()) + t.setDaemon(True) + t.start() + reloader.run() + else: + sys.exit(reloader.restart_with_reloader()) + except KeyboardInterrupt: + pass diff --git a/python/werkzeug/contrib/__init__.py b/python/werkzeug/contrib/__init__.py new file mode 100644 index 0000000..0e741f0 --- /dev/null +++ b/python/werkzeug/contrib/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib + ~~~~~~~~~~~~~~~~ + + Contains user-submitted code that other users may find useful, but which + is not part of the Werkzeug core. Anyone can write code for inclusion in + the `contrib` package. All modules in this package are distributed as an + add-on library and thus are not part of Werkzeug itself. + + This file itself is mostly for informational purposes and to tell the + Python interpreter that `contrib` is a package. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" diff --git a/python/werkzeug/contrib/atom.py b/python/werkzeug/contrib/atom.py new file mode 100644 index 0000000..d079d2b --- /dev/null +++ b/python/werkzeug/contrib/atom.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib.atom + ~~~~~~~~~~~~~~~~~~~~~ + + This module provides a class called :class:`AtomFeed` which can be + used to generate feeds in the Atom syndication format (see :rfc:`4287`). + + Example:: + + def atom_feed(request): + feed = AtomFeed("My Blog", feed_url=request.url, + url=request.host_url, + subtitle="My example blog for a feed test.") + for post in Post.query.limit(10).all(): + feed.add(post.title, post.body, content_type='html', + author=post.author, url=post.url, id=post.uid, + updated=post.last_update, published=post.pub_date) + return feed.get_response() + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import warnings +from datetime import datetime + +from .._compat import implements_to_string +from .._compat import string_types +from ..utils import escape +from ..wrappers import BaseResponse + +warnings.warn( + "'werkzeug.contrib.atom' is deprecated as of version 0.15 and will" + " be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, +) + +XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" + + +def _make_text_block(name, content, content_type=None): + """Helper function for the builder that creates an XML text block.""" + if content_type == "xhtml": + return u'<%s type="xhtml"><div xmlns="%s">%s</div></%s>\n' % ( + name, + XHTML_NAMESPACE, + content, + name, + ) + if not content_type: + return u"<%s>%s</%s>\n" % (name, escape(content), name) + return u'<%s type="%s">%s</%s>\n' % (name, content_type, escape(content), name) + + +def format_iso8601(obj): + """Format a datetime object for iso8601""" + iso8601 = obj.isoformat() + if obj.tzinfo: + return iso8601 + return iso8601 + "Z" + + +@implements_to_string +class AtomFeed(object): + + """A helper class that creates Atom feeds. + + :param title: the title of the feed. Required. + :param title_type: the type attribute for the title element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. + :param url: the url for the feed (not the url *of* the feed) + :param id: a globally unique id for the feed. Must be an URI. If + not present the `feed_url` is used, but one of both is + required. + :param updated: the time the feed was modified the last time. Must + be a :class:`datetime.datetime` object. If not + present the latest entry's `updated` is used. + Treated as UTC if naive datetime. + :param feed_url: the URL to the feed. Should be the URL that was + requested. + :param author: the author of the feed. Must be either a string (the + name) or a dict with name (required) and uri or + email (both optional). Can be a list of (may be + mixed, too) strings and dicts, too, if there are + multiple authors. Required if not every entry has an + author element. + :param icon: an icon for the feed. + :param logo: a logo for the feed. + :param rights: copyright information for the feed. + :param rights_type: the type attribute for the rights element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. Default is + ``'text'``. + :param subtitle: a short description of the feed. + :param subtitle_type: the type attribute for the subtitle element. + One of ``'text'``, ``'html'``, ``'text'`` + or ``'xhtml'``. Default is ``'text'``. + :param links: additional links. Must be a list of dictionaries with + href (required) and rel, type, hreflang, title, length + (all optional) + :param generator: the software that generated this feed. This must be + a tuple in the form ``(name, url, version)``. If + you don't want to specify one of them, set the item + to `None`. + :param entries: a list with the entries for the feed. Entries can also + be added later with :meth:`add`. + + For more information on the elements see + http://www.atomenabled.org/developers/syndication/ + + Everywhere where a list is demanded, any iterable can be used. + """ + + default_generator = ("Werkzeug", None, None) + + def __init__(self, title=None, entries=None, **kwargs): + self.title = title + self.title_type = kwargs.get("title_type", "text") + self.url = kwargs.get("url") + self.feed_url = kwargs.get("feed_url", self.url) + self.id = kwargs.get("id", self.feed_url) + self.updated = kwargs.get("updated") + self.author = kwargs.get("author", ()) + self.icon = kwargs.get("icon") + self.logo = kwargs.get("logo") + self.rights = kwargs.get("rights") + self.rights_type = kwargs.get("rights_type") + self.subtitle = kwargs.get("subtitle") + self.subtitle_type = kwargs.get("subtitle_type", "text") + self.generator = kwargs.get("generator") + if self.generator is None: + self.generator = self.default_generator + self.links = kwargs.get("links", []) + self.entries = list(entries) if entries else [] + + if not hasattr(self.author, "__iter__") or isinstance( + self.author, string_types + (dict,) + ): + self.author = [self.author] + for i, author in enumerate(self.author): + if not isinstance(author, dict): + self.author[i] = {"name": author} + + if not self.title: + raise ValueError("title is required") + if not self.id: + raise ValueError("id is required") + for author in self.author: + if "name" not in author: + raise TypeError("author must contain at least a name") + + def add(self, *args, **kwargs): + """Add a new entry to the feed. This function can either be called + with a :class:`FeedEntry` or some keyword and positional arguments + that are forwarded to the :class:`FeedEntry` constructor. + """ + if len(args) == 1 and not kwargs and isinstance(args[0], FeedEntry): + self.entries.append(args[0]) + else: + kwargs["feed_url"] = self.feed_url + self.entries.append(FeedEntry(*args, **kwargs)) + + def __repr__(self): + return "<%s %r (%d entries)>" % ( + self.__class__.__name__, + self.title, + len(self.entries), + ) + + def generate(self): + """Return a generator that yields pieces of XML.""" + # atom demands either an author element in every entry or a global one + if not self.author: + if any(not e.author for e in self.entries): + self.author = ({"name": "Unknown author"},) + + if not self.updated: + dates = sorted([entry.updated for entry in self.entries]) + self.updated = dates[-1] if dates else datetime.utcnow() + + yield u'<?xml version="1.0" encoding="utf-8"?>\n' + yield u'<feed xmlns="http://www.w3.org/2005/Atom">\n' + yield " " + _make_text_block("title", self.title, self.title_type) + yield u" <id>%s</id>\n" % escape(self.id) + yield u" <updated>%s</updated>\n" % format_iso8601(self.updated) + if self.url: + yield u' <link href="%s" />\n' % escape(self.url) + if self.feed_url: + yield u' <link href="%s" rel="self" />\n' % escape(self.feed_url) + for link in self.links: + yield u" <link %s/>\n" % "".join( + '%s="%s" ' % (k, escape(link[k])) for k in link + ) + for author in self.author: + yield u" <author>\n" + yield u" <name>%s</name>\n" % escape(author["name"]) + if "uri" in author: + yield u" <uri>%s</uri>\n" % escape(author["uri"]) + if "email" in author: + yield " <email>%s</email>\n" % escape(author["email"]) + yield " </author>\n" + if self.subtitle: + yield " " + _make_text_block("subtitle", self.subtitle, self.subtitle_type) + if self.icon: + yield u" <icon>%s</icon>\n" % escape(self.icon) + if self.logo: + yield u" <logo>%s</logo>\n" % escape(self.logo) + if self.rights: + yield " " + _make_text_block("rights", self.rights, self.rights_type) + generator_name, generator_url, generator_version = self.generator + if generator_name or generator_url or generator_version: + tmp = [u" <generator"] + if generator_url: + tmp.append(u' uri="%s"' % escape(generator_url)) + if generator_version: + tmp.append(u' version="%s"' % escape(generator_version)) + tmp.append(u">%s</generator>\n" % escape(generator_name)) + yield u"".join(tmp) + for entry in self.entries: + for line in entry.generate(): + yield u" " + line + yield u"</feed>\n" + + def to_string(self): + """Convert the feed into a string.""" + return u"".join(self.generate()) + + def get_response(self): + """Return a response object for the feed.""" + return BaseResponse(self.to_string(), mimetype="application/atom+xml") + + def __call__(self, environ, start_response): + """Use the class as WSGI response object.""" + return self.get_response()(environ, start_response) + + def __str__(self): + return self.to_string() + + +@implements_to_string +class FeedEntry(object): + + """Represents a single entry in a feed. + + :param title: the title of the entry. Required. + :param title_type: the type attribute for the title element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. + :param content: the content of the entry. + :param content_type: the type attribute for the content element. One + of ``'html'``, ``'text'`` or ``'xhtml'``. + :param summary: a summary of the entry's content. + :param summary_type: the type attribute for the summary element. One + of ``'html'``, ``'text'`` or ``'xhtml'``. + :param url: the url for the entry. + :param id: a globally unique id for the entry. Must be an URI. If + not present the URL is used, but one of both is required. + :param updated: the time the entry was modified the last time. Must + be a :class:`datetime.datetime` object. Treated as + UTC if naive datetime. Required. + :param author: the author of the entry. Must be either a string (the + name) or a dict with name (required) and uri or + email (both optional). Can be a list of (may be + mixed, too) strings and dicts, too, if there are + multiple authors. Required if the feed does not have an + author element. + :param published: the time the entry was initially published. Must + be a :class:`datetime.datetime` object. Treated as + UTC if naive datetime. + :param rights: copyright information for the entry. + :param rights_type: the type attribute for the rights element. One of + ``'html'``, ``'text'`` or ``'xhtml'``. Default is + ``'text'``. + :param links: additional links. Must be a list of dictionaries with + href (required) and rel, type, hreflang, title, length + (all optional) + :param categories: categories for the entry. Must be a list of dictionaries + with term (required), scheme and label (all optional) + :param xml_base: The xml base (url) for this feed item. If not provided + it will default to the item url. + + For more information on the elements see + http://www.atomenabled.org/developers/syndication/ + + Everywhere where a list is demanded, any iterable can be used. + """ + + def __init__(self, title=None, content=None, feed_url=None, **kwargs): + self.title = title + self.title_type = kwargs.get("title_type", "text") + self.content = content + self.content_type = kwargs.get("content_type", "html") + self.url = kwargs.get("url") + self.id = kwargs.get("id", self.url) + self.updated = kwargs.get("updated") + self.summary = kwargs.get("summary") + self.summary_type = kwargs.get("summary_type", "html") + self.author = kwargs.get("author", ()) + self.published = kwargs.get("published") + self.rights = kwargs.get("rights") + self.links = kwargs.get("links", []) + self.categories = kwargs.get("categories", []) + self.xml_base = kwargs.get("xml_base", feed_url) + + if not hasattr(self.author, "__iter__") or isinstance( + self.author, string_types + (dict,) + ): + self.author = [self.author] + for i, author in enumerate(self.author): + if not isinstance(author, dict): + self.author[i] = {"name": author} + + if not self.title: + raise ValueError("title is required") + if not self.id: + raise ValueError("id is required") + if not self.updated: + raise ValueError("updated is required") + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.title) + + def generate(self): + """Yields pieces of ATOM XML.""" + base = "" + if self.xml_base: + base = ' xml:base="%s"' % escape(self.xml_base) + yield u"<entry%s>\n" % base + yield u" " + _make_text_block("title", self.title, self.title_type) + yield u" <id>%s</id>\n" % escape(self.id) + yield u" <updated>%s</updated>\n" % format_iso8601(self.updated) + if self.published: + yield u" <published>%s</published>\n" % format_iso8601(self.published) + if self.url: + yield u' <link href="%s" />\n' % escape(self.url) + for author in self.author: + yield u" <author>\n" + yield u" <name>%s</name>\n" % escape(author["name"]) + if "uri" in author: + yield u" <uri>%s</uri>\n" % escape(author["uri"]) + if "email" in author: + yield u" <email>%s</email>\n" % escape(author["email"]) + yield u" </author>\n" + for link in self.links: + yield u" <link %s/>\n" % "".join( + '%s="%s" ' % (k, escape(link[k])) for k in link + ) + for category in self.categories: + yield u" <category %s/>\n" % "".join( + '%s="%s" ' % (k, escape(category[k])) for k in category + ) + if self.summary: + yield u" " + _make_text_block("summary", self.summary, self.summary_type) + if self.content: + yield u" " + _make_text_block("content", self.content, self.content_type) + yield u"</entry>\n" + + def to_string(self): + """Convert the feed item into a unicode object.""" + return u"".join(self.generate()) + + def __str__(self): + return self.to_string() diff --git a/python/werkzeug/contrib/cache.py b/python/werkzeug/contrib/cache.py new file mode 100644 index 0000000..79c749b --- /dev/null +++ b/python/werkzeug/contrib/cache.py @@ -0,0 +1,933 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib.cache + ~~~~~~~~~~~~~~~~~~~~~~ + + The main problem with dynamic Web sites is, well, they're dynamic. Each + time a user requests a page, the webserver executes a lot of code, queries + the database, renders templates until the visitor gets the page he sees. + + This is a lot more expensive than just loading a file from the file system + and sending it to the visitor. + + For most Web applications, this overhead isn't a big deal but once it + becomes, you will be glad to have a cache system in place. + + How Caching Works + ================= + + Caching is pretty simple. Basically you have a cache object lurking around + somewhere that is connected to a remote cache or the file system or + something else. When the request comes in you check if the current page + is already in the cache and if so, you're returning it from the cache. + Otherwise you generate the page and put it into the cache. (Or a fragment + of the page, you don't have to cache the full thing) + + Here is a simple example of how to cache a sidebar for 5 minutes:: + + def get_sidebar(user): + identifier = 'sidebar_for/user%d' % user.id + value = cache.get(identifier) + if value is not None: + return value + value = generate_sidebar_for(user=user) + cache.set(identifier, value, timeout=60 * 5) + return value + + Creating a Cache Object + ======================= + + To create a cache object you just import the cache system of your choice + from the cache module and instantiate it. Then you can start working + with that object: + + >>> from werkzeug.contrib.cache import SimpleCache + >>> c = SimpleCache() + >>> c.set("foo", "value") + >>> c.get("foo") + 'value' + >>> c.get("missing") is None + True + + Please keep in mind that you have to create the cache and put it somewhere + you have access to it (either as a module global you can import or you just + put it into your WSGI application). + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import errno +import os +import platform +import re +import tempfile +import warnings +from hashlib import md5 +from time import time + +from .._compat import integer_types +from .._compat import iteritems +from .._compat import string_types +from .._compat import text_type +from .._compat import to_native +from ..posixemulation import rename + +try: + import cPickle as pickle +except ImportError: # pragma: no cover + import pickle + +warnings.warn( + "'werkzeug.contrib.cache' is deprecated as of version 0.15 and will" + " be removed in version 1.0. It has moved to https://github.com" + "/pallets/cachelib.", + DeprecationWarning, + stacklevel=2, +) + + +def _items(mappingorseq): + """Wrapper for efficient iteration over mappings represented by dicts + or sequences:: + + >>> for k, v in _items((i, i*i) for i in xrange(5)): + ... assert k*k == v + + >>> for k, v in _items(dict((i, i*i) for i in xrange(5))): + ... assert k*k == v + + """ + if hasattr(mappingorseq, "items"): + return iteritems(mappingorseq) + return mappingorseq + + +class BaseCache(object): + """Baseclass for the cache systems. All the cache systems implement this + API or a superset of it. + + :param default_timeout: the default timeout (in seconds) that is used if + no timeout is specified on :meth:`set`. A timeout + of 0 indicates that the cache never expires. + """ + + def __init__(self, default_timeout=300): + self.default_timeout = default_timeout + + def _normalize_timeout(self, timeout): + if timeout is None: + timeout = self.default_timeout + return timeout + + def get(self, key): + """Look up key in the cache and return the value for it. + + :param key: the key to be looked up. + :returns: The value if it exists and is readable, else ``None``. + """ + return None + + def delete(self, key): + """Delete `key` from the cache. + + :param key: the key to delete. + :returns: Whether the key existed and has been deleted. + :rtype: boolean + """ + return True + + def get_many(self, *keys): + """Returns a list of values for the given keys. + For each key an item in the list is created:: + + foo, bar = cache.get_many("foo", "bar") + + Has the same error handling as :meth:`get`. + + :param keys: The function accepts multiple keys as positional + arguments. + """ + return [self.get(k) for k in keys] + + def get_dict(self, *keys): + """Like :meth:`get_many` but return a dict:: + + d = cache.get_dict("foo", "bar") + foo = d["foo"] + bar = d["bar"] + + :param keys: The function accepts multiple keys as positional + arguments. + """ + return dict(zip(keys, self.get_many(*keys))) + + def set(self, key, value, timeout=None): + """Add a new key/value to the cache (overwrites value, if key already + exists in the cache). + + :param key: the key to set + :param value: the value for the key + :param timeout: the cache timeout for the key in seconds (if not + specified, it uses the default timeout). A timeout of + 0 idicates that the cache never expires. + :returns: ``True`` if key has been updated, ``False`` for backend + errors. Pickling errors, however, will raise a subclass of + ``pickle.PickleError``. + :rtype: boolean + """ + return True + + def add(self, key, value, timeout=None): + """Works like :meth:`set` but does not overwrite the values of already + existing keys. + + :param key: the key to set + :param value: the value for the key + :param timeout: the cache timeout for the key in seconds (if not + specified, it uses the default timeout). A timeout of + 0 idicates that the cache never expires. + :returns: Same as :meth:`set`, but also ``False`` for already + existing keys. + :rtype: boolean + """ + return True + + def set_many(self, mapping, timeout=None): + """Sets multiple keys and values from a mapping. + + :param mapping: a mapping with the keys/values to set. + :param timeout: the cache timeout for the key in seconds (if not + specified, it uses the default timeout). A timeout of + 0 idicates that the cache never expires. + :returns: Whether all given keys have been set. + :rtype: boolean + """ + rv = True + for key, value in _items(mapping): + if not self.set(key, value, timeout): + rv = False + return rv + + def delete_many(self, *keys): + """Deletes multiple keys at once. + + :param keys: The function accepts multiple keys as positional + arguments. + :returns: Whether all given keys have been deleted. + :rtype: boolean + """ + return all(self.delete(key) for key in keys) + + def has(self, key): + """Checks if a key exists in the cache without returning it. This is a + cheap operation that bypasses loading the actual data on the backend. + + This method is optional and may not be implemented on all caches. + + :param key: the key to check + """ + raise NotImplementedError( + "%s doesn't have an efficient implementation of `has`. That " + "means it is impossible to check whether a key exists without " + "fully loading the key's data. Consider using `self.get` " + "explicitly if you don't care about performance." + ) + + def clear(self): + """Clears the cache. Keep in mind that not all caches support + completely clearing the cache. + + :returns: Whether the cache has been cleared. + :rtype: boolean + """ + return True + + def inc(self, key, delta=1): + """Increments the value of a key by `delta`. If the key does + not yet exist it is initialized with `delta`. + + For supporting caches this is an atomic operation. + + :param key: the key to increment. + :param delta: the delta to add. + :returns: The new value or ``None`` for backend errors. + """ + value = (self.get(key) or 0) + delta + return value if self.set(key, value) else None + + def dec(self, key, delta=1): + """Decrements the value of a key by `delta`. If the key does + not yet exist it is initialized with `-delta`. + + For supporting caches this is an atomic operation. + + :param key: the key to increment. + :param delta: the delta to subtract. + :returns: The new value or `None` for backend errors. + """ + value = (self.get(key) or 0) - delta + return value if self.set(key, value) else None + + +class NullCache(BaseCache): + """A cache that doesn't cache. This can be useful for unit testing. + + :param default_timeout: a dummy parameter that is ignored but exists + for API compatibility with other caches. + """ + + def has(self, key): + return False + + +class SimpleCache(BaseCache): + """Simple memory cache for single process environments. This class exists + mainly for the development server and is not 100% thread safe. It tries + to use as many atomic operations as possible and no locks for simplicity + but it could happen under heavy load that keys are added multiple times. + + :param threshold: the maximum number of items the cache stores before + it starts deleting some. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + """ + + def __init__(self, threshold=500, default_timeout=300): + BaseCache.__init__(self, default_timeout) + self._cache = {} + self.clear = self._cache.clear + self._threshold = threshold + + def _prune(self): + if len(self._cache) > self._threshold: + now = time() + toremove = [] + for idx, (key, (expires, _)) in enumerate(self._cache.items()): + if (expires != 0 and expires <= now) or idx % 3 == 0: + toremove.append(key) + for key in toremove: + self._cache.pop(key, None) + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout > 0: + timeout = time() + timeout + return timeout + + def get(self, key): + try: + expires, value = self._cache[key] + if expires == 0 or expires > time(): + return pickle.loads(value) + except (KeyError, pickle.PickleError): + return None + + def set(self, key, value, timeout=None): + expires = self._normalize_timeout(timeout) + self._prune() + self._cache[key] = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) + return True + + def add(self, key, value, timeout=None): + expires = self._normalize_timeout(timeout) + self._prune() + item = (expires, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) + if key in self._cache: + return False + self._cache.setdefault(key, item) + return True + + def delete(self, key): + return self._cache.pop(key, None) is not None + + def has(self, key): + try: + expires, value = self._cache[key] + return expires == 0 or expires > time() + except KeyError: + return False + + +_test_memcached_key = re.compile(r"[^\x00-\x21\xff]{1,250}$").match + + +class MemcachedCache(BaseCache): + """A cache that uses memcached as backend. + + The first argument can either be an object that resembles the API of a + :class:`memcache.Client` or a tuple/list of server addresses. In the + event that a tuple/list is passed, Werkzeug tries to import the best + available memcache library. + + This cache looks into the following packages/modules to find bindings for + memcached: + + - ``pylibmc`` + - ``google.appengine.api.memcached`` + - ``memcached`` + - ``libmc`` + + Implementation notes: This cache backend works around some limitations in + memcached to simplify the interface. For example unicode keys are encoded + to utf-8 on the fly. Methods such as :meth:`~BaseCache.get_dict` return + the keys in the same format as passed. Furthermore all get methods + silently ignore key errors to not cause problems when untrusted user data + is passed to the get methods which is often the case in web applications. + + :param servers: a list or tuple of server addresses or alternatively + a :class:`memcache.Client` or a compatible client. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + :param key_prefix: a prefix that is added before all keys. This makes it + possible to use the same memcached server for different + applications. Keep in mind that + :meth:`~BaseCache.clear` will also clear keys with a + different prefix. + """ + + def __init__(self, servers=None, default_timeout=300, key_prefix=None): + BaseCache.__init__(self, default_timeout) + if servers is None or isinstance(servers, (list, tuple)): + if servers is None: + servers = ["127.0.0.1:11211"] + self._client = self.import_preferred_memcache_lib(servers) + if self._client is None: + raise RuntimeError("no memcache module found") + else: + # NOTE: servers is actually an already initialized memcache + # client. + self._client = servers + + self.key_prefix = to_native(key_prefix) + + def _normalize_key(self, key): + key = to_native(key, "utf-8") + if self.key_prefix: + key = self.key_prefix + key + return key + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout > 0: + timeout = int(time()) + timeout + return timeout + + def get(self, key): + key = self._normalize_key(key) + # memcached doesn't support keys longer than that. Because often + # checks for so long keys can occur because it's tested from user + # submitted data etc we fail silently for getting. + if _test_memcached_key(key): + return self._client.get(key) + + def get_dict(self, *keys): + key_mapping = {} + have_encoded_keys = False + for key in keys: + encoded_key = self._normalize_key(key) + if not isinstance(key, str): + have_encoded_keys = True + if _test_memcached_key(key): + key_mapping[encoded_key] = key + _keys = list(key_mapping) + d = rv = self._client.get_multi(_keys) + if have_encoded_keys or self.key_prefix: + rv = {} + for key, value in iteritems(d): + rv[key_mapping[key]] = value + if len(rv) < len(keys): + for key in keys: + if key not in rv: + rv[key] = None + return rv + + def add(self, key, value, timeout=None): + key = self._normalize_key(key) + timeout = self._normalize_timeout(timeout) + return self._client.add(key, value, timeout) + + def set(self, key, value, timeout=None): + key = self._normalize_key(key) + timeout = self._normalize_timeout(timeout) + return self._client.set(key, value, timeout) + + def get_many(self, *keys): + d = self.get_dict(*keys) + return [d[key] for key in keys] + + def set_many(self, mapping, timeout=None): + new_mapping = {} + for key, value in _items(mapping): + key = self._normalize_key(key) + new_mapping[key] = value + + timeout = self._normalize_timeout(timeout) + failed_keys = self._client.set_multi(new_mapping, timeout) + return not failed_keys + + def delete(self, key): + key = self._normalize_key(key) + if _test_memcached_key(key): + return self._client.delete(key) + + def delete_many(self, *keys): + new_keys = [] + for key in keys: + key = self._normalize_key(key) + if _test_memcached_key(key): + new_keys.append(key) + return self._client.delete_multi(new_keys) + + def has(self, key): + key = self._normalize_key(key) + if _test_memcached_key(key): + return self._client.append(key, "") + return False + + def clear(self): + return self._client.flush_all() + + def inc(self, key, delta=1): + key = self._normalize_key(key) + return self._client.incr(key, delta) + + def dec(self, key, delta=1): + key = self._normalize_key(key) + return self._client.decr(key, delta) + + def import_preferred_memcache_lib(self, servers): + """Returns an initialized memcache client. Used by the constructor.""" + try: + import pylibmc + except ImportError: + pass + else: + return pylibmc.Client(servers) + + try: + from google.appengine.api import memcache + except ImportError: + pass + else: + return memcache.Client() + + try: + import memcache + except ImportError: + pass + else: + return memcache.Client(servers) + + try: + import libmc + except ImportError: + pass + else: + return libmc.Client(servers) + + +# backwards compatibility +GAEMemcachedCache = MemcachedCache + + +class RedisCache(BaseCache): + """Uses the Redis key-value store as a cache backend. + + The first argument can be either a string denoting address of the Redis + server or an object resembling an instance of a redis.Redis class. + + Note: Python Redis API already takes care of encoding unicode strings on + the fly. + + .. versionadded:: 0.7 + + .. versionadded:: 0.8 + `key_prefix` was added. + + .. versionchanged:: 0.8 + This cache backend now properly serializes objects. + + .. versionchanged:: 0.8.3 + This cache backend now supports password authentication. + + .. versionchanged:: 0.10 + ``**kwargs`` is now passed to the redis object. + + :param host: address of the Redis server or an object which API is + compatible with the official Python Redis client (redis-py). + :param port: port number on which Redis server listens for connections. + :param password: password authentication for the Redis server. + :param db: db (zero-based numeric index) on Redis Server to connect. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + :param key_prefix: A prefix that should be added to all keys. + + Any additional keyword arguments will be passed to ``redis.Redis``. + """ + + def __init__( + self, + host="localhost", + port=6379, + password=None, + db=0, + default_timeout=300, + key_prefix=None, + **kwargs + ): + BaseCache.__init__(self, default_timeout) + if host is None: + raise ValueError("RedisCache host parameter may not be None") + if isinstance(host, string_types): + try: + import redis + except ImportError: + raise RuntimeError("no redis module found") + if kwargs.get("decode_responses", None): + raise ValueError("decode_responses is not supported by RedisCache.") + self._client = redis.Redis( + host=host, port=port, password=password, db=db, **kwargs + ) + else: + self._client = host + self.key_prefix = key_prefix or "" + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout == 0: + timeout = -1 + return timeout + + def dump_object(self, value): + """Dumps an object into a string for redis. By default it serializes + integers as regular string and pickle dumps everything else. + """ + t = type(value) + if t in integer_types: + return str(value).encode("ascii") + return b"!" + pickle.dumps(value) + + def load_object(self, value): + """The reversal of :meth:`dump_object`. This might be called with + None. + """ + if value is None: + return None + if value.startswith(b"!"): + try: + return pickle.loads(value[1:]) + except pickle.PickleError: + return None + try: + return int(value) + except ValueError: + # before 0.8 we did not have serialization. Still support that. + return value + + def get(self, key): + return self.load_object(self._client.get(self.key_prefix + key)) + + def get_many(self, *keys): + if self.key_prefix: + keys = [self.key_prefix + key for key in keys] + return [self.load_object(x) for x in self._client.mget(keys)] + + def set(self, key, value, timeout=None): + timeout = self._normalize_timeout(timeout) + dump = self.dump_object(value) + if timeout == -1: + result = self._client.set(name=self.key_prefix + key, value=dump) + else: + result = self._client.setex( + name=self.key_prefix + key, value=dump, time=timeout + ) + return result + + def add(self, key, value, timeout=None): + timeout = self._normalize_timeout(timeout) + dump = self.dump_object(value) + return self._client.setnx( + name=self.key_prefix + key, value=dump + ) and self._client.expire(name=self.key_prefix + key, time=timeout) + + def set_many(self, mapping, timeout=None): + timeout = self._normalize_timeout(timeout) + # Use transaction=False to batch without calling redis MULTI + # which is not supported by twemproxy + pipe = self._client.pipeline(transaction=False) + + for key, value in _items(mapping): + dump = self.dump_object(value) + if timeout == -1: + pipe.set(name=self.key_prefix + key, value=dump) + else: + pipe.setex(name=self.key_prefix + key, value=dump, time=timeout) + return pipe.execute() + + def delete(self, key): + return self._client.delete(self.key_prefix + key) + + def delete_many(self, *keys): + if not keys: + return + if self.key_prefix: + keys = [self.key_prefix + key for key in keys] + return self._client.delete(*keys) + + def has(self, key): + return self._client.exists(self.key_prefix + key) + + def clear(self): + status = False + if self.key_prefix: + keys = self._client.keys(self.key_prefix + "*") + if keys: + status = self._client.delete(*keys) + else: + status = self._client.flushdb() + return status + + def inc(self, key, delta=1): + return self._client.incr(name=self.key_prefix + key, amount=delta) + + def dec(self, key, delta=1): + return self._client.decr(name=self.key_prefix + key, amount=delta) + + +class FileSystemCache(BaseCache): + """A cache that stores the items on the file system. This cache depends + on being the only user of the `cache_dir`. Make absolutely sure that + nobody but this cache stores files there or otherwise the cache will + randomly delete files therein. + + :param cache_dir: the directory where cache files are stored. + :param threshold: the maximum number of items the cache stores before + it starts deleting some. A threshold value of 0 + indicates no threshold. + :param default_timeout: the default timeout that is used if no timeout is + specified on :meth:`~BaseCache.set`. A timeout of + 0 indicates that the cache never expires. + :param mode: the file mode wanted for the cache files, default 0600 + """ + + #: used for temporary files by the FileSystemCache + _fs_transaction_suffix = ".__wz_cache" + #: keep amount of files in a cache element + _fs_count_file = "__wz_cache_count" + + def __init__(self, cache_dir, threshold=500, default_timeout=300, mode=0o600): + BaseCache.__init__(self, default_timeout) + self._path = cache_dir + self._threshold = threshold + self._mode = mode + + try: + os.makedirs(self._path) + except OSError as ex: + if ex.errno != errno.EEXIST: + raise + + self._update_count(value=len(self._list_dir())) + + @property + def _file_count(self): + return self.get(self._fs_count_file) or 0 + + def _update_count(self, delta=None, value=None): + # If we have no threshold, don't count files + if self._threshold == 0: + return + + if delta: + new_count = self._file_count + delta + else: + new_count = value or 0 + self.set(self._fs_count_file, new_count, mgmt_element=True) + + def _normalize_timeout(self, timeout): + timeout = BaseCache._normalize_timeout(self, timeout) + if timeout != 0: + timeout = time() + timeout + return int(timeout) + + def _list_dir(self): + """return a list of (fully qualified) cache filenames + """ + mgmt_files = [ + self._get_filename(name).split("/")[-1] for name in (self._fs_count_file,) + ] + return [ + os.path.join(self._path, fn) + for fn in os.listdir(self._path) + if not fn.endswith(self._fs_transaction_suffix) and fn not in mgmt_files + ] + + def _prune(self): + if self._threshold == 0 or not self._file_count > self._threshold: + return + + entries = self._list_dir() + now = time() + for idx, fname in enumerate(entries): + try: + remove = False + with open(fname, "rb") as f: + expires = pickle.load(f) + remove = (expires != 0 and expires <= now) or idx % 3 == 0 + + if remove: + os.remove(fname) + except (IOError, OSError): + pass + self._update_count(value=len(self._list_dir())) + + def clear(self): + for fname in self._list_dir(): + try: + os.remove(fname) + except (IOError, OSError): + self._update_count(value=len(self._list_dir())) + return False + self._update_count(value=0) + return True + + def _get_filename(self, key): + if isinstance(key, text_type): + key = key.encode("utf-8") # XXX unicode review + hash = md5(key).hexdigest() + return os.path.join(self._path, hash) + + def get(self, key): + filename = self._get_filename(key) + try: + with open(filename, "rb") as f: + pickle_time = pickle.load(f) + if pickle_time == 0 or pickle_time >= time(): + return pickle.load(f) + else: + os.remove(filename) + return None + except (IOError, OSError, pickle.PickleError): + return None + + def add(self, key, value, timeout=None): + filename = self._get_filename(key) + if not os.path.exists(filename): + return self.set(key, value, timeout) + return False + + def set(self, key, value, timeout=None, mgmt_element=False): + # Management elements have no timeout + if mgmt_element: + timeout = 0 + + # Don't prune on management element update, to avoid loop + else: + self._prune() + + timeout = self._normalize_timeout(timeout) + filename = self._get_filename(key) + try: + fd, tmp = tempfile.mkstemp( + suffix=self._fs_transaction_suffix, dir=self._path + ) + with os.fdopen(fd, "wb") as f: + pickle.dump(timeout, f, 1) + pickle.dump(value, f, pickle.HIGHEST_PROTOCOL) + rename(tmp, filename) + os.chmod(filename, self._mode) + except (IOError, OSError): + return False + else: + # Management elements should not count towards threshold + if not mgmt_element: + self._update_count(delta=1) + return True + + def delete(self, key, mgmt_element=False): + try: + os.remove(self._get_filename(key)) + except (IOError, OSError): + return False + else: + # Management elements should not count towards threshold + if not mgmt_element: + self._update_count(delta=-1) + return True + + def has(self, key): + filename = self._get_filename(key) + try: + with open(filename, "rb") as f: + pickle_time = pickle.load(f) + if pickle_time == 0 or pickle_time >= time(): + return True + else: + os.remove(filename) + return False + except (IOError, OSError, pickle.PickleError): + return False + + +class UWSGICache(BaseCache): + """Implements the cache using uWSGI's caching framework. + + .. note:: + This class cannot be used when running under PyPy, because the uWSGI + API implementation for PyPy is lacking the needed functionality. + + :param default_timeout: The default timeout in seconds. + :param cache: The name of the caching instance to connect to, for + example: mycache@localhost:3031, defaults to an empty string, which + means uWSGI will cache in the local instance. If the cache is in the + same instance as the werkzeug app, you only have to provide the name of + the cache. + """ + + def __init__(self, default_timeout=300, cache=""): + BaseCache.__init__(self, default_timeout) + + if platform.python_implementation() == "PyPy": + raise RuntimeError( + "uWSGI caching does not work under PyPy, see " + "the docs for more details." + ) + + try: + import uwsgi + + self._uwsgi = uwsgi + except ImportError: + raise RuntimeError( + "uWSGI could not be imported, are you running under uWSGI?" + ) + + self.cache = cache + + def get(self, key): + rv = self._uwsgi.cache_get(key, self.cache) + if rv is None: + return + return pickle.loads(rv) + + def delete(self, key): + return self._uwsgi.cache_del(key, self.cache) + + def set(self, key, value, timeout=None): + return self._uwsgi.cache_update( + key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache + ) + + def add(self, key, value, timeout=None): + return self._uwsgi.cache_set( + key, pickle.dumps(value), self._normalize_timeout(timeout), self.cache + ) + + def clear(self): + return self._uwsgi.cache_clear(self.cache) + + def has(self, key): + return self._uwsgi.cache_exists(key, self.cache) is not None diff --git a/python/werkzeug/contrib/fixers.py b/python/werkzeug/contrib/fixers.py new file mode 100644 index 0000000..8df0afd --- /dev/null +++ b/python/werkzeug/contrib/fixers.py @@ -0,0 +1,262 @@ +""" +Fixers +====== + +.. warning:: + .. deprecated:: 0.15 + ``ProxyFix`` has moved to :mod:`werkzeug.middleware.proxy_fix`. + All other code in this module is deprecated and will be removed + in version 1.0. + +.. versionadded:: 0.5 + +This module includes various helpers that fix web server behavior. + +.. autoclass:: ProxyFix + :members: + +.. autoclass:: CGIRootFix + +.. autoclass:: PathInfoFromRequestUriFix + +.. autoclass:: HeaderRewriterFix + +.. autoclass:: InternetExplorerFix + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import warnings + +from ..datastructures import Headers +from ..datastructures import ResponseCacheControl +from ..http import parse_cache_control_header +from ..http import parse_options_header +from ..http import parse_set_header +from ..middleware.proxy_fix import ProxyFix as _ProxyFix +from ..useragents import UserAgent + +try: + from urllib.parse import unquote +except ImportError: + from urllib import unquote + + +class CGIRootFix(object): + """Wrap the application in this middleware if you are using FastCGI + or CGI and you have problems with your app root being set to the CGI + script's path instead of the path users are going to visit. + + :param app: the WSGI application + :param app_root: Defaulting to ``'/'``, you can set this to + something else if your app is mounted somewhere else. + + .. deprecated:: 0.15 + This middleware will be removed in version 1.0. + + .. versionchanged:: 0.9 + Added `app_root` parameter and renamed from + ``LighttpdCGIRootFix``. + """ + + def __init__(self, app, app_root="/"): + warnings.warn( + "'CGIRootFix' is deprecated as of version 0.15 and will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + self.app_root = app_root.strip("/") + + def __call__(self, environ, start_response): + environ["SCRIPT_NAME"] = self.app_root + return self.app(environ, start_response) + + +class LighttpdCGIRootFix(CGIRootFix): + def __init__(self, *args, **kwargs): + warnings.warn( + "'LighttpdCGIRootFix' is renamed 'CGIRootFix'. Both will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(LighttpdCGIRootFix, self).__init__(*args, **kwargs) + + +class PathInfoFromRequestUriFix(object): + """On windows environment variables are limited to the system charset + which makes it impossible to store the `PATH_INFO` variable in the + environment without loss of information on some systems. + + This is for example a problem for CGI scripts on a Windows Apache. + + This fixer works by recreating the `PATH_INFO` from `REQUEST_URI`, + `REQUEST_URL`, or `UNENCODED_URL` (whatever is available). Thus the + fix can only be applied if the webserver supports either of these + variables. + + :param app: the WSGI application + + .. deprecated:: 0.15 + This middleware will be removed in version 1.0. + """ + + def __init__(self, app): + warnings.warn( + "'PathInfoFromRequestUriFix' is deprecated as of version" + " 0.15 and will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + + def __call__(self, environ, start_response): + for key in "REQUEST_URL", "REQUEST_URI", "UNENCODED_URL": + if key not in environ: + continue + request_uri = unquote(environ[key]) + script_name = unquote(environ.get("SCRIPT_NAME", "")) + if request_uri.startswith(script_name): + environ["PATH_INFO"] = request_uri[len(script_name) :].split("?", 1)[0] + break + return self.app(environ, start_response) + + +class ProxyFix(_ProxyFix): + """ + .. deprecated:: 0.15 + ``werkzeug.contrib.fixers.ProxyFix`` has moved to + :mod:`werkzeug.middleware.proxy_fix`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.contrib.fixers.ProxyFix' has moved to 'werkzeug" + ".middleware.proxy_fix.ProxyFix'. This import is deprecated" + " as of version 0.15 and will be removed in 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(ProxyFix, self).__init__(*args, **kwargs) + + +class HeaderRewriterFix(object): + """This middleware can remove response headers and add others. This + is for example useful to remove the `Date` header from responses if you + are using a server that adds that header, no matter if it's present or + not or to add `X-Powered-By` headers:: + + app = HeaderRewriterFix(app, remove_headers=['Date'], + add_headers=[('X-Powered-By', 'WSGI')]) + + :param app: the WSGI application + :param remove_headers: a sequence of header keys that should be + removed. + :param add_headers: a sequence of ``(key, value)`` tuples that should + be added. + + .. deprecated:: 0.15 + This middleware will be removed in 1.0. + """ + + def __init__(self, app, remove_headers=None, add_headers=None): + warnings.warn( + "'HeaderRewriterFix' is deprecated as of version 0.15 and" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + self.remove_headers = set(x.lower() for x in (remove_headers or ())) + self.add_headers = list(add_headers or ()) + + def __call__(self, environ, start_response): + def rewriting_start_response(status, headers, exc_info=None): + new_headers = [] + for key, value in headers: + if key.lower() not in self.remove_headers: + new_headers.append((key, value)) + new_headers += self.add_headers + return start_response(status, new_headers, exc_info) + + return self.app(environ, rewriting_start_response) + + +class InternetExplorerFix(object): + """This middleware fixes a couple of bugs with Microsoft Internet + Explorer. Currently the following fixes are applied: + + - removing of `Vary` headers for unsupported mimetypes which + causes troubles with caching. Can be disabled by passing + ``fix_vary=False`` to the constructor. + see: https://support.microsoft.com/en-us/help/824847 + + - removes offending headers to work around caching bugs in + Internet Explorer if `Content-Disposition` is set. Can be + disabled by passing ``fix_attach=False`` to the constructor. + + If it does not detect affected Internet Explorer versions it won't touch + the request / response. + + .. deprecated:: 0.15 + This middleware will be removed in 1.0. + """ + + # This code was inspired by Django fixers for the same bugs. The + # fix_vary and fix_attach fixers were originally implemented in Django + # by Michael Axiak and is available as part of the Django project: + # https://code.djangoproject.com/ticket/4148 + + def __init__(self, app, fix_vary=True, fix_attach=True): + warnings.warn( + "'InternetExplorerFix' is deprecated as of version 0.15 and" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + self.app = app + self.fix_vary = fix_vary + self.fix_attach = fix_attach + + def fix_headers(self, environ, headers, status=None): + if self.fix_vary: + header = headers.get("content-type", "") + mimetype, options = parse_options_header(header) + if mimetype not in ("text/html", "text/plain", "text/sgml"): + headers.pop("vary", None) + + if self.fix_attach and "content-disposition" in headers: + pragma = parse_set_header(headers.get("pragma", "")) + pragma.discard("no-cache") + header = pragma.to_header() + if not header: + headers.pop("pragma", "") + else: + headers["Pragma"] = header + header = headers.get("cache-control", "") + if header: + cc = parse_cache_control_header(header, cls=ResponseCacheControl) + cc.no_cache = None + cc.no_store = False + header = cc.to_header() + if not header: + headers.pop("cache-control", "") + else: + headers["Cache-Control"] = header + + def run_fixed(self, environ, start_response): + def fixing_start_response(status, headers, exc_info=None): + headers = Headers(headers) + self.fix_headers(environ, headers, status) + return start_response(status, headers.to_wsgi_list(), exc_info) + + return self.app(environ, fixing_start_response) + + def __call__(self, environ, start_response): + ua = UserAgent(environ) + if ua.browser != "msie": + return self.app(environ, start_response) + return self.run_fixed(environ, start_response) diff --git a/python/werkzeug/contrib/iterio.py b/python/werkzeug/contrib/iterio.py new file mode 100644 index 0000000..b672454 --- /dev/null +++ b/python/werkzeug/contrib/iterio.py @@ -0,0 +1,358 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.contrib.iterio + ~~~~~~~~~~~~~~~~~~~~~~~ + + This module implements a :class:`IterIO` that converts an iterator into + a stream object and the other way round. Converting streams into + iterators requires the `greenlet`_ module. + + To convert an iterator into a stream all you have to do is to pass it + directly to the :class:`IterIO` constructor. In this example we pass it + a newly created generator:: + + def foo(): + yield "something\n" + yield "otherthings" + stream = IterIO(foo()) + print stream.read() # read the whole iterator + + The other way round works a bit different because we have to ensure that + the code execution doesn't take place yet. An :class:`IterIO` call with a + callable as first argument does two things. The function itself is passed + an :class:`IterIO` stream it can feed. The object returned by the + :class:`IterIO` constructor on the other hand is not an stream object but + an iterator:: + + def foo(stream): + stream.write("some") + stream.write("thing") + stream.flush() + stream.write("otherthing") + iterator = IterIO(foo) + print iterator.next() # prints something + print iterator.next() # prints otherthing + iterator.next() # raises StopIteration + + .. _greenlet: https://github.com/python-greenlet/greenlet + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import warnings + +from .._compat import implements_iterator + +try: + import greenlet +except ImportError: + greenlet = None + +warnings.warn( + "'werkzeug.contrib.iterio' is deprecated as of version 0.15 and" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, +) + + +def _mixed_join(iterable, sentinel): + """concatenate any string type in an intelligent way.""" + iterator = iter(iterable) + first_item = next(iterator, sentinel) + if isinstance(first_item, bytes): + return first_item + b"".join(iterator) + return first_item + u"".join(iterator) + + +def _newline(reference_string): + if isinstance(reference_string, bytes): + return b"\n" + return u"\n" + + +@implements_iterator +class IterIO(object): + """Instances of this object implement an interface compatible with the + standard Python :class:`file` object. Streams are either read-only or + write-only depending on how the object is created. + + If the first argument is an iterable a file like object is returned that + returns the contents of the iterable. In case the iterable is empty + read operations will return the sentinel value. + + If the first argument is a callable then the stream object will be + created and passed to that function. The caller itself however will + not receive a stream but an iterable. The function will be executed + step by step as something iterates over the returned iterable. Each + call to :meth:`flush` will create an item for the iterable. If + :meth:`flush` is called without any writes in-between the sentinel + value will be yielded. + + Note for Python 3: due to the incompatible interface of bytes and + streams you should set the sentinel value explicitly to an empty + bytestring (``b''``) if you are expecting to deal with bytes as + otherwise the end of the stream is marked with the wrong sentinel + value. + + .. versionadded:: 0.9 + `sentinel` parameter was added. + """ + + def __new__(cls, obj, sentinel=""): + try: + iterator = iter(obj) + except TypeError: + return IterI(obj, sentinel) + return IterO(iterator, sentinel) + + def __iter__(self): + return self + + def tell(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return self.pos + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def seek(self, pos, mode=0): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def truncate(self, size=None): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def write(self, s): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def writelines(self, list): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def read(self, n=-1): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def readlines(self, sizehint=0): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def readline(self, length=None): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def flush(self): + if self.closed: + raise ValueError("I/O operation on closed file") + raise IOError(9, "Bad file descriptor") + + def __next__(self): + if self.closed: + raise StopIteration() + line = self.readline() + if not line: + raise StopIteration() + return line + + +class IterI(IterIO): + """Convert an stream into an iterator.""" + + def __new__(cls, func, sentinel=""): + if greenlet is None: + raise RuntimeError("IterI requires greenlet support") + stream = object.__new__(cls) + stream._parent = greenlet.getcurrent() + stream._buffer = [] + stream.closed = False + stream.sentinel = sentinel + stream.pos = 0 + + def run(): + func(stream) + stream.close() + + g = greenlet.greenlet(run, stream._parent) + while 1: + rv = g.switch() + if not rv: + return + yield rv[0] + + def close(self): + if not self.closed: + self.closed = True + self._flush_impl() + + def write(self, s): + if self.closed: + raise ValueError("I/O operation on closed file") + if s: + self.pos += len(s) + self._buffer.append(s) + + def writelines(self, list): + for item in list: + self.write(item) + + def flush(self): + if self.closed: + raise ValueError("I/O operation on closed file") + self._flush_impl() + + def _flush_impl(self): + data = _mixed_join(self._buffer, self.sentinel) + self._buffer = [] + if not data and self.closed: + self._parent.switch() + else: + self._parent.switch((data,)) + + +class IterO(IterIO): + """Iter output. Wrap an iterator and give it a stream like interface.""" + + def __new__(cls, gen, sentinel=""): + self = object.__new__(cls) + self._gen = gen + self._buf = None + self.sentinel = sentinel + self.closed = False + self.pos = 0 + return self + + def __iter__(self): + return self + + def _buf_append(self, string): + """Replace string directly without appending to an empty string, + avoiding type issues.""" + if not self._buf: + self._buf = string + else: + self._buf += string + + def close(self): + if not self.closed: + self.closed = True + if hasattr(self._gen, "close"): + self._gen.close() + + def seek(self, pos, mode=0): + if self.closed: + raise ValueError("I/O operation on closed file") + if mode == 1: + pos += self.pos + elif mode == 2: + self.read() + self.pos = min(self.pos, self.pos + pos) + return + elif mode != 0: + raise IOError("Invalid argument") + buf = [] + try: + tmp_end_pos = len(self._buf or "") + while pos > tmp_end_pos: + item = next(self._gen) + tmp_end_pos += len(item) + buf.append(item) + except StopIteration: + pass + if buf: + self._buf_append(_mixed_join(buf, self.sentinel)) + self.pos = max(0, pos) + + def read(self, n=-1): + if self.closed: + raise ValueError("I/O operation on closed file") + if n < 0: + self._buf_append(_mixed_join(self._gen, self.sentinel)) + result = self._buf[self.pos :] + self.pos += len(result) + return result + new_pos = self.pos + n + buf = [] + try: + tmp_end_pos = 0 if self._buf is None else len(self._buf) + while new_pos > tmp_end_pos or (self._buf is None and not buf): + item = next(self._gen) + tmp_end_pos += len(item) + buf.append(item) + except StopIteration: + pass + if buf: + self._buf_append(_mixed_join(buf, self.sentinel)) + + if self._buf is None: + return self.sentinel + + new_pos = max(0, new_pos) + try: + return self._buf[self.pos : new_pos] + finally: + self.pos = min(new_pos, len(self._buf)) + + def readline(self, length=None): + if self.closed: + raise ValueError("I/O operation on closed file") + + nl_pos = -1 + if self._buf: + nl_pos = self._buf.find(_newline(self._buf), self.pos) + buf = [] + try: + if self._buf is None: + pos = self.pos + else: + pos = len(self._buf) + while nl_pos < 0: + item = next(self._gen) + local_pos = item.find(_newline(item)) + buf.append(item) + if local_pos >= 0: + nl_pos = pos + local_pos + break + pos += len(item) + except StopIteration: + pass + if buf: + self._buf_append(_mixed_join(buf, self.sentinel)) + + if self._buf is None: + return self.sentinel + + if nl_pos < 0: + new_pos = len(self._buf) + else: + new_pos = nl_pos + 1 + if length is not None and self.pos + length < new_pos: + new_pos = self.pos + length + try: + return self._buf[self.pos : new_pos] + finally: + self.pos = min(new_pos, len(self._buf)) + + def readlines(self, sizehint=0): + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines diff --git a/python/werkzeug/contrib/lint.py b/python/werkzeug/contrib/lint.py new file mode 100644 index 0000000..8bd8b8a --- /dev/null +++ b/python/werkzeug/contrib/lint.py @@ -0,0 +1,11 @@ +import warnings + +from ..middleware.lint import * # noqa: F401, F403 + +warnings.warn( + "'werkzeug.contrib.lint' has moved to 'werkzeug.middleware.lint'." + " This import is deprecated as of version 0.15 and will be removed" + " in version 1.0.", + DeprecationWarning, + stacklevel=2, +) diff --git a/python/werkzeug/contrib/profiler.py b/python/werkzeug/contrib/profiler.py new file mode 100644 index 0000000..b79fe56 --- /dev/null +++ b/python/werkzeug/contrib/profiler.py @@ -0,0 +1,42 @@ +import warnings + +from ..middleware.profiler import * # noqa: F401, F403 + +warnings.warn( + "'werkzeug.contrib.profiler' has moved to" + "'werkzeug.middleware.profiler'. This import is deprecated as of" + "version 0.15 and will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, +) + + +class MergeStream(object): + """An object that redirects ``write`` calls to multiple streams. + Use this to log to both ``sys.stdout`` and a file:: + + f = open('profiler.log', 'w') + stream = MergeStream(sys.stdout, f) + profiler = ProfilerMiddleware(app, stream) + + .. deprecated:: 0.15 + Use the ``tee`` command in your terminal instead. This class + will be removed in 1.0. + """ + + def __init__(self, *streams): + warnings.warn( + "'MergeStream' is deprecated as of version 0.15 and will be removed in" + " version 1.0. Use your terminal's 'tee' command instead.", + DeprecationWarning, + stacklevel=2, + ) + + if not streams: + raise TypeError("At least one stream must be given.") + + self.streams = streams + + def write(self, data): + for stream in self.streams: + stream.write(data) diff --git a/python/werkzeug/contrib/securecookie.py b/python/werkzeug/contrib/securecookie.py new file mode 100644 index 0000000..c4c9eee --- /dev/null +++ b/python/werkzeug/contrib/securecookie.py @@ -0,0 +1,362 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.contrib.securecookie + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + This module implements a cookie that is not alterable from the client + because it adds a checksum the server checks for. You can use it as + session replacement if all you have is a user id or something to mark + a logged in user. + + Keep in mind that the data is still readable from the client as a + normal cookie is. However you don't have to store and flush the + sessions you have at the server. + + Example usage: + + >>> from werkzeug.contrib.securecookie import SecureCookie + >>> x = SecureCookie({"foo": 42, "baz": (1, 2, 3)}, "deadbeef") + + Dumping into a string so that one can store it in a cookie: + + >>> value = x.serialize() + + Loading from that string again: + + >>> x = SecureCookie.unserialize(value, "deadbeef") + >>> x["baz"] + (1, 2, 3) + + If someone modifies the cookie and the checksum is wrong the unserialize + method will fail silently and return a new empty `SecureCookie` object. + + Keep in mind that the values will be visible in the cookie so do not + store data in a cookie you don't want the user to see. + + Application Integration + ======================= + + If you are using the werkzeug request objects you could integrate the + secure cookie into your application like this:: + + from werkzeug.utils import cached_property + from werkzeug.wrappers import BaseRequest + from werkzeug.contrib.securecookie import SecureCookie + + # don't use this key but a different one; you could just use + # os.urandom(20) to get something random + SECRET_KEY = '\xfa\xdd\xb8z\xae\xe0}4\x8b\xea' + + class Request(BaseRequest): + + @cached_property + def client_session(self): + data = self.cookies.get('session_data') + if not data: + return SecureCookie(secret_key=SECRET_KEY) + return SecureCookie.unserialize(data, SECRET_KEY) + + def application(environ, start_response): + request = Request(environ) + + # get a response object here + response = ... + + if request.client_session.should_save: + session_data = request.client_session.serialize() + response.set_cookie('session_data', session_data, + httponly=True) + return response(environ, start_response) + + A less verbose integration can be achieved by using shorthand methods:: + + class Request(BaseRequest): + + @cached_property + def client_session(self): + return SecureCookie.load_cookie(self, secret_key=COOKIE_SECRET) + + def application(environ, start_response): + request = Request(environ) + + # get a response object here + response = ... + + request.client_session.save_cookie(response) + return response(environ, start_response) + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import base64 +import pickle +import warnings +from hashlib import sha1 as _default_hash +from hmac import new as hmac +from time import time + +from .._compat import iteritems +from .._compat import text_type +from .._compat import to_bytes +from .._compat import to_native +from .._internal import _date_to_unix +from ..contrib.sessions import ModificationTrackingDict +from ..security import safe_str_cmp +from ..urls import url_quote_plus +from ..urls import url_unquote_plus + +warnings.warn( + "'werkzeug.contrib.securecookie' is deprecated as of version 0.15" + " and will be removed in version 1.0. It has moved to" + " https://github.com/pallets/secure-cookie.", + DeprecationWarning, + stacklevel=2, +) + + +class UnquoteError(Exception): + """Internal exception used to signal failures on quoting.""" + + +class SecureCookie(ModificationTrackingDict): + """Represents a secure cookie. You can subclass this class and provide + an alternative mac method. The import thing is that the mac method + is a function with a similar interface to the hashlib. Required + methods are update() and digest(). + + Example usage: + + >>> x = SecureCookie({"foo": 42, "baz": (1, 2, 3)}, "deadbeef") + >>> x["foo"] + 42 + >>> x["baz"] + (1, 2, 3) + >>> x["blafasel"] = 23 + >>> x.should_save + True + + :param data: the initial data. Either a dict, list of tuples or `None`. + :param secret_key: the secret key. If not set `None` or not specified + it has to be set before :meth:`serialize` is called. + :param new: The initial value of the `new` flag. + """ + + #: The hash method to use. This has to be a module with a new function + #: or a function that creates a hashlib object. Such as `hashlib.md5` + #: Subclasses can override this attribute. The default hash is sha1. + #: Make sure to wrap this in staticmethod() if you store an arbitrary + #: function there such as hashlib.sha1 which might be implemented + #: as a function. + hash_method = staticmethod(_default_hash) + + #: The module used for serialization. Should have a ``dumps`` and a + #: ``loads`` method that takes bytes. The default is :mod:`pickle`. + #: + #: .. versionchanged:: 0.15 + #: The default of ``pickle`` will change to :mod:`json` in 1.0. + serialization_method = pickle + + #: if the contents should be base64 quoted. This can be disabled if the + #: serialization process returns cookie safe strings only. + quote_base64 = True + + def __init__(self, data=None, secret_key=None, new=True): + ModificationTrackingDict.__init__(self, data or ()) + # explicitly convert it into a bytestring because python 2.6 + # no longer performs an implicit string conversion on hmac + if secret_key is not None: + secret_key = to_bytes(secret_key, "utf-8") + self.secret_key = secret_key + self.new = new + + if self.serialization_method is pickle: + warnings.warn( + "The default 'SecureCookie.serialization_method' will" + " change from pickle to json in version 1.0. To upgrade" + " existing tokens, override 'unquote' to try pickle if" + " json fails.", + stacklevel=2, + ) + + def __repr__(self): + return "<%s %s%s>" % ( + self.__class__.__name__, + dict.__repr__(self), + "*" if self.should_save else "", + ) + + @property + def should_save(self): + """True if the session should be saved. By default this is only true + for :attr:`modified` cookies, not :attr:`new`. + """ + return self.modified + + @classmethod + def quote(cls, value): + """Quote the value for the cookie. This can be any object supported + by :attr:`serialization_method`. + + :param value: the value to quote. + """ + if cls.serialization_method is not None: + value = cls.serialization_method.dumps(value) + if cls.quote_base64: + value = b"".join( + base64.b64encode(to_bytes(value, "utf8")).splitlines() + ).strip() + return value + + @classmethod + def unquote(cls, value): + """Unquote the value for the cookie. If unquoting does not work a + :exc:`UnquoteError` is raised. + + :param value: the value to unquote. + """ + try: + if cls.quote_base64: + value = base64.b64decode(value) + if cls.serialization_method is not None: + value = cls.serialization_method.loads(value) + return value + except Exception: + # unfortunately pickle and other serialization modules can + # cause pretty every error here. if we get one we catch it + # and convert it into an UnquoteError + raise UnquoteError() + + def serialize(self, expires=None): + """Serialize the secure cookie into a string. + + If expires is provided, the session will be automatically invalidated + after expiration when you unseralize it. This provides better + protection against session cookie theft. + + :param expires: an optional expiration date for the cookie (a + :class:`datetime.datetime` object) + """ + if self.secret_key is None: + raise RuntimeError("no secret key defined") + if expires: + self["_expires"] = _date_to_unix(expires) + result = [] + mac = hmac(self.secret_key, None, self.hash_method) + for key, value in sorted(self.items()): + result.append( + ( + "%s=%s" % (url_quote_plus(key), self.quote(value).decode("ascii")) + ).encode("ascii") + ) + mac.update(b"|" + result[-1]) + return b"?".join([base64.b64encode(mac.digest()).strip(), b"&".join(result)]) + + @classmethod + def unserialize(cls, string, secret_key): + """Load the secure cookie from a serialized string. + + :param string: the cookie value to unserialize. + :param secret_key: the secret key used to serialize the cookie. + :return: a new :class:`SecureCookie`. + """ + if isinstance(string, text_type): + string = string.encode("utf-8", "replace") + if isinstance(secret_key, text_type): + secret_key = secret_key.encode("utf-8", "replace") + try: + base64_hash, data = string.split(b"?", 1) + except (ValueError, IndexError): + items = () + else: + items = {} + mac = hmac(secret_key, None, cls.hash_method) + for item in data.split(b"&"): + mac.update(b"|" + item) + if b"=" not in item: + items = None + break + key, value = item.split(b"=", 1) + # try to make the key a string + key = url_unquote_plus(key.decode("ascii")) + try: + key = to_native(key) + except UnicodeError: + pass + items[key] = value + + # no parsing error and the mac looks okay, we can now + # sercurely unpickle our cookie. + try: + client_hash = base64.b64decode(base64_hash) + except TypeError: + items = client_hash = None + if items is not None and safe_str_cmp(client_hash, mac.digest()): + try: + for key, value in iteritems(items): + items[key] = cls.unquote(value) + except UnquoteError: + items = () + else: + if "_expires" in items: + if time() > items["_expires"]: + items = () + else: + del items["_expires"] + else: + items = () + return cls(items, secret_key, False) + + @classmethod + def load_cookie(cls, request, key="session", secret_key=None): + """Loads a :class:`SecureCookie` from a cookie in request. If the + cookie is not set, a new :class:`SecureCookie` instanced is + returned. + + :param request: a request object that has a `cookies` attribute + which is a dict of all cookie values. + :param key: the name of the cookie. + :param secret_key: the secret key used to unquote the cookie. + Always provide the value even though it has + no default! + """ + data = request.cookies.get(key) + if not data: + return cls(secret_key=secret_key) + return cls.unserialize(data, secret_key) + + def save_cookie( + self, + response, + key="session", + expires=None, + session_expires=None, + max_age=None, + path="/", + domain=None, + secure=None, + httponly=False, + force=False, + ): + """Saves the SecureCookie in a cookie on response object. All + parameters that are not described here are forwarded directly + to :meth:`~BaseResponse.set_cookie`. + + :param response: a response object that has a + :meth:`~BaseResponse.set_cookie` method. + :param key: the name of the cookie. + :param session_expires: the expiration date of the secure cookie + stored information. If this is not provided + the cookie `expires` date is used instead. + """ + if force or self.should_save: + data = self.serialize(session_expires or expires) + response.set_cookie( + key, + data, + expires=expires, + max_age=max_age, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + ) diff --git a/python/werkzeug/contrib/sessions.py b/python/werkzeug/contrib/sessions.py new file mode 100644 index 0000000..866e827 --- /dev/null +++ b/python/werkzeug/contrib/sessions.py @@ -0,0 +1,389 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.contrib.sessions + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + This module contains some helper classes that help one to add session + support to a python WSGI application. For full client-side session + storage see :mod:`~werkzeug.contrib.securecookie` which implements a + secure, client-side session storage. + + + Application Integration + ======================= + + :: + + from werkzeug.contrib.sessions import SessionMiddleware, \ + FilesystemSessionStore + + app = SessionMiddleware(app, FilesystemSessionStore()) + + The current session will then appear in the WSGI environment as + `werkzeug.session`. However it's recommended to not use the middleware + but the stores directly in the application. However for very simple + scripts a middleware for sessions could be sufficient. + + This module does not implement methods or ways to check if a session is + expired. That should be done by a cronjob and storage specific. For + example to prune unused filesystem sessions one could check the modified + time of the files. If sessions are stored in the database the new() + method should add an expiration timestamp for the session. + + For better flexibility it's recommended to not use the middleware but the + store and session object directly in the application dispatching:: + + session_store = FilesystemSessionStore() + + def application(environ, start_response): + request = Request(environ) + sid = request.cookies.get('cookie_name') + if sid is None: + request.session = session_store.new() + else: + request.session = session_store.get(sid) + response = get_the_response_object(request) + if request.session.should_save: + session_store.save(request.session) + response.set_cookie('cookie_name', request.session.sid) + return response(environ, start_response) + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import os +import re +import tempfile +import warnings +from hashlib import sha1 +from os import path +from pickle import dump +from pickle import HIGHEST_PROTOCOL +from pickle import load +from random import random +from time import time + +from .._compat import PY2 +from .._compat import text_type +from ..datastructures import CallbackDict +from ..filesystem import get_filesystem_encoding +from ..posixemulation import rename +from ..utils import dump_cookie +from ..utils import parse_cookie +from ..wsgi import ClosingIterator + +warnings.warn( + "'werkzeug.contrib.sessions' is deprecated as of version 0.15 and" + " will be removed in version 1.0. It has moved to" + " https://github.com/pallets/secure-cookie.", + DeprecationWarning, + stacklevel=2, +) + +_sha1_re = re.compile(r"^[a-f0-9]{40}$") + + +def _urandom(): + if hasattr(os, "urandom"): + return os.urandom(30) + return text_type(random()).encode("ascii") + + +def generate_key(salt=None): + if salt is None: + salt = repr(salt).encode("ascii") + return sha1(b"".join([salt, str(time()).encode("ascii"), _urandom()])).hexdigest() + + +class ModificationTrackingDict(CallbackDict): + __slots__ = ("modified",) + + def __init__(self, *args, **kwargs): + def on_update(self): + self.modified = True + + self.modified = False + CallbackDict.__init__(self, on_update=on_update) + dict.update(self, *args, **kwargs) + + def copy(self): + """Create a flat copy of the dict.""" + missing = object() + result = object.__new__(self.__class__) + for name in self.__slots__: + val = getattr(self, name, missing) + if val is not missing: + setattr(result, name, val) + return result + + def __copy__(self): + return self.copy() + + +class Session(ModificationTrackingDict): + """Subclass of a dict that keeps track of direct object changes. Changes + in mutable structures are not tracked, for those you have to set + `modified` to `True` by hand. + """ + + __slots__ = ModificationTrackingDict.__slots__ + ("sid", "new") + + def __init__(self, data, sid, new=False): + ModificationTrackingDict.__init__(self, data) + self.sid = sid + self.new = new + + def __repr__(self): + return "<%s %s%s>" % ( + self.__class__.__name__, + dict.__repr__(self), + "*" if self.should_save else "", + ) + + @property + def should_save(self): + """True if the session should be saved. + + .. versionchanged:: 0.6 + By default the session is now only saved if the session is + modified, not if it is new like it was before. + """ + return self.modified + + +class SessionStore(object): + """Baseclass for all session stores. The Werkzeug contrib module does not + implement any useful stores besides the filesystem store, application + developers are encouraged to create their own stores. + + :param session_class: The session class to use. Defaults to + :class:`Session`. + """ + + def __init__(self, session_class=None): + if session_class is None: + session_class = Session + self.session_class = session_class + + def is_valid_key(self, key): + """Check if a key has the correct format.""" + return _sha1_re.match(key) is not None + + def generate_key(self, salt=None): + """Simple function that generates a new session key.""" + return generate_key(salt) + + def new(self): + """Generate a new session.""" + return self.session_class({}, self.generate_key(), True) + + def save(self, session): + """Save a session.""" + + def save_if_modified(self, session): + """Save if a session class wants an update.""" + if session.should_save: + self.save(session) + + def delete(self, session): + """Delete a session.""" + + def get(self, sid): + """Get a session for this sid or a new session object. This method + has to check if the session key is valid and create a new session if + that wasn't the case. + """ + return self.session_class({}, sid, True) + + +#: used for temporary files by the filesystem session store +_fs_transaction_suffix = ".__wz_sess" + + +class FilesystemSessionStore(SessionStore): + """Simple example session store that saves sessions on the filesystem. + This store works best on POSIX systems and Windows Vista / Windows + Server 2008 and newer. + + .. versionchanged:: 0.6 + `renew_missing` was added. Previously this was considered `True`, + now the default changed to `False` and it can be explicitly + deactivated. + + :param path: the path to the folder used for storing the sessions. + If not provided the default temporary directory is used. + :param filename_template: a string template used to give the session + a filename. ``%s`` is replaced with the + session id. + :param session_class: The session class to use. Defaults to + :class:`Session`. + :param renew_missing: set to `True` if you want the store to + give the user a new sid if the session was + not yet saved. + """ + + def __init__( + self, + path=None, + filename_template="werkzeug_%s.sess", + session_class=None, + renew_missing=False, + mode=0o644, + ): + SessionStore.__init__(self, session_class) + if path is None: + path = tempfile.gettempdir() + self.path = path + if isinstance(filename_template, text_type) and PY2: + filename_template = filename_template.encode(get_filesystem_encoding()) + assert not filename_template.endswith(_fs_transaction_suffix), ( + "filename templates may not end with %s" % _fs_transaction_suffix + ) + self.filename_template = filename_template + self.renew_missing = renew_missing + self.mode = mode + + def get_session_filename(self, sid): + # out of the box, this should be a strict ASCII subset but + # you might reconfigure the session object to have a more + # arbitrary string. + if isinstance(sid, text_type) and PY2: + sid = sid.encode(get_filesystem_encoding()) + return path.join(self.path, self.filename_template % sid) + + def save(self, session): + fn = self.get_session_filename(session.sid) + fd, tmp = tempfile.mkstemp(suffix=_fs_transaction_suffix, dir=self.path) + f = os.fdopen(fd, "wb") + try: + dump(dict(session), f, HIGHEST_PROTOCOL) + finally: + f.close() + try: + rename(tmp, fn) + os.chmod(fn, self.mode) + except (IOError, OSError): + pass + + def delete(self, session): + fn = self.get_session_filename(session.sid) + try: + os.unlink(fn) + except OSError: + pass + + def get(self, sid): + if not self.is_valid_key(sid): + return self.new() + try: + f = open(self.get_session_filename(sid), "rb") + except IOError: + if self.renew_missing: + return self.new() + data = {} + else: + try: + try: + data = load(f) + except Exception: + data = {} + finally: + f.close() + return self.session_class(data, sid, False) + + def list(self): + """Lists all sessions in the store. + + .. versionadded:: 0.6 + """ + before, after = self.filename_template.split("%s", 1) + filename_re = re.compile( + r"%s(.{5,})%s$" % (re.escape(before), re.escape(after)) + ) + result = [] + for filename in os.listdir(self.path): + #: this is a session that is still being saved. + if filename.endswith(_fs_transaction_suffix): + continue + match = filename_re.match(filename) + if match is not None: + result.append(match.group(1)) + return result + + +class SessionMiddleware(object): + """A simple middleware that puts the session object of a store provided + into the WSGI environ. It automatically sets cookies and restores + sessions. + + However a middleware is not the preferred solution because it won't be as + fast as sessions managed by the application itself and will put a key into + the WSGI environment only relevant for the application which is against + the concept of WSGI. + + The cookie parameters are the same as for the :func:`~dump_cookie` + function just prefixed with ``cookie_``. Additionally `max_age` is + called `cookie_age` and not `cookie_max_age` because of backwards + compatibility. + """ + + def __init__( + self, + app, + store, + cookie_name="session_id", + cookie_age=None, + cookie_expires=None, + cookie_path="/", + cookie_domain=None, + cookie_secure=None, + cookie_httponly=False, + cookie_samesite="Lax", + environ_key="werkzeug.session", + ): + self.app = app + self.store = store + self.cookie_name = cookie_name + self.cookie_age = cookie_age + self.cookie_expires = cookie_expires + self.cookie_path = cookie_path + self.cookie_domain = cookie_domain + self.cookie_secure = cookie_secure + self.cookie_httponly = cookie_httponly + self.cookie_samesite = cookie_samesite + self.environ_key = environ_key + + def __call__(self, environ, start_response): + cookie = parse_cookie(environ.get("HTTP_COOKIE", "")) + sid = cookie.get(self.cookie_name, None) + if sid is None: + session = self.store.new() + else: + session = self.store.get(sid) + environ[self.environ_key] = session + + def injecting_start_response(status, headers, exc_info=None): + if session.should_save: + self.store.save(session) + headers.append( + ( + "Set-Cookie", + dump_cookie( + self.cookie_name, + session.sid, + self.cookie_age, + self.cookie_expires, + self.cookie_path, + self.cookie_domain, + self.cookie_secure, + self.cookie_httponly, + samesite=self.cookie_samesite, + ), + ) + ) + return start_response(status, headers, exc_info) + + return ClosingIterator( + self.app(environ, injecting_start_response), + lambda: self.store.save_if_modified(session), + ) diff --git a/python/werkzeug/contrib/wrappers.py b/python/werkzeug/contrib/wrappers.py new file mode 100644 index 0000000..49b82a7 --- /dev/null +++ b/python/werkzeug/contrib/wrappers.py @@ -0,0 +1,385 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.contrib.wrappers + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + Extra wrappers or mixins contributed by the community. These wrappers can + be mixed in into request objects to add extra functionality. + + Example:: + + from werkzeug.wrappers import Request as RequestBase + from werkzeug.contrib.wrappers import JSONRequestMixin + + class Request(RequestBase, JSONRequestMixin): + pass + + Afterwards this request object provides the extra functionality of the + :class:`JSONRequestMixin`. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import warnings + +from .._compat import wsgi_decoding_dance +from ..exceptions import BadRequest +from ..http import dump_options_header +from ..http import parse_options_header +from ..utils import cached_property +from ..wrappers.json import JSONMixin as _JSONMixin + + +def is_known_charset(charset): + """Checks if the given charset is known to Python.""" + try: + codecs.lookup(charset) + except LookupError: + return False + return True + + +class JSONRequestMixin(_JSONMixin): + """ + .. deprecated:: 0.15 + Moved to :class:`werkzeug.wrappers.json.JSONMixin`. This old + import will be removed in version 1.0. + """ + + @property + def json(self): + warnings.warn( + "'werkzeug.contrib.wrappers.JSONRequestMixin' has moved to" + " 'werkzeug.wrappers.json.JSONMixin'. This old import will" + " be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return super(JSONRequestMixin, self).json + + +class ProtobufRequestMixin(object): + + """Add protobuf parsing method to a request object. This will parse the + input data through `protobuf`_ if possible. + + :exc:`~werkzeug.exceptions.BadRequest` will be raised if the content-type + is not protobuf or if the data itself cannot be parsed property. + + .. _protobuf: https://github.com/protocolbuffers/protobuf + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + """ + + #: by default the :class:`ProtobufRequestMixin` will raise a + #: :exc:`~werkzeug.exceptions.BadRequest` if the object is not + #: initialized. You can bypass that check by setting this + #: attribute to `False`. + protobuf_check_initialization = True + + def parse_protobuf(self, proto_type): + """Parse the data into an instance of proto_type.""" + warnings.warn( + "'werkzeug.contrib.wrappers.ProtobufRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + if "protobuf" not in self.environ.get("CONTENT_TYPE", ""): + raise BadRequest("Not a Protobuf request") + + obj = proto_type() + try: + obj.ParseFromString(self.data) + except Exception: + raise BadRequest("Unable to parse Protobuf request") + + # Fail if not all required fields are set + if self.protobuf_check_initialization and not obj.IsInitialized(): + raise BadRequest("Partial Protobuf request") + + return obj + + +class RoutingArgsRequestMixin(object): + + """This request mixin adds support for the wsgiorg routing args + `specification`_. + + .. _specification: https://wsgi.readthedocs.io/en/latest/ + specifications/routing_args.html + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + """ + + def _get_routing_args(self): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return self.environ.get("wsgiorg.routing_args", (()))[0] + + def _set_routing_args(self, value): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + if self.shallow: + raise RuntimeError( + "A shallow request tried to modify the WSGI " + "environment. If you really want to do that, " + "set `shallow` to False." + ) + self.environ["wsgiorg.routing_args"] = (value, self.routing_vars) + + routing_args = property( + _get_routing_args, + _set_routing_args, + doc=""" + The positional URL arguments as `tuple`.""", + ) + del _get_routing_args, _set_routing_args + + def _get_routing_vars(self): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + rv = self.environ.get("wsgiorg.routing_args") + if rv is not None: + return rv[1] + rv = {} + if not self.shallow: + self.routing_vars = rv + return rv + + def _set_routing_vars(self, value): + warnings.warn( + "'werkzeug.contrib.wrappers.RoutingArgsRequestMixin' is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + if self.shallow: + raise RuntimeError( + "A shallow request tried to modify the WSGI " + "environment. If you really want to do that, " + "set `shallow` to False." + ) + self.environ["wsgiorg.routing_args"] = (self.routing_args, value) + + routing_vars = property( + _get_routing_vars, + _set_routing_vars, + doc=""" + The keyword URL arguments as `dict`.""", + ) + del _get_routing_vars, _set_routing_vars + + +class ReverseSlashBehaviorRequestMixin(object): + + """This mixin reverses the trailing slash behavior of :attr:`script_root` + and :attr:`path`. This makes it possible to use :func:`~urlparse.urljoin` + directly on the paths. + + Because it changes the behavior or :class:`Request` this class has to be + mixed in *before* the actual request class:: + + class MyRequest(ReverseSlashBehaviorRequestMixin, Request): + pass + + This example shows the differences (for an application mounted on + `/application` and the request going to `/application/foo/bar`): + + +---------------+-------------------+---------------------+ + | | normal behavior | reverse behavior | + +===============+===================+=====================+ + | `script_root` | ``/application`` | ``/application/`` | + +---------------+-------------------+---------------------+ + | `path` | ``/foo/bar`` | ``foo/bar`` | + +---------------+-------------------+---------------------+ + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + """ + + @cached_property + def path(self): + """Requested path as unicode. This works a bit like the regular path + info in the WSGI environment but will not include a leading slash. + """ + warnings.warn( + "'werkzeug.contrib.wrappers.ReverseSlashBehaviorRequestMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + path = wsgi_decoding_dance( + self.environ.get("PATH_INFO") or "", self.charset, self.encoding_errors + ) + return path.lstrip("/") + + @cached_property + def script_root(self): + """The root path of the script includling a trailing slash.""" + warnings.warn( + "'werkzeug.contrib.wrappers.ReverseSlashBehaviorRequestMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + path = wsgi_decoding_dance( + self.environ.get("SCRIPT_NAME") or "", self.charset, self.encoding_errors + ) + return path.rstrip("/") + "/" + + +class DynamicCharsetRequestMixin(object): + + """"If this mixin is mixed into a request class it will provide + a dynamic `charset` attribute. This means that if the charset is + transmitted in the content type headers it's used from there. + + Because it changes the behavior or :class:`Request` this class has + to be mixed in *before* the actual request class:: + + class MyRequest(DynamicCharsetRequestMixin, Request): + pass + + By default the request object assumes that the URL charset is the + same as the data charset. If the charset varies on each request + based on the transmitted data it's not a good idea to let the URLs + change based on that. Most browsers assume either utf-8 or latin1 + for the URLs if they have troubles figuring out. It's strongly + recommended to set the URL charset to utf-8:: + + class MyRequest(DynamicCharsetRequestMixin, Request): + url_charset = 'utf-8' + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + + .. versionadded:: 0.6 + """ + + #: the default charset that is assumed if the content type header + #: is missing or does not contain a charset parameter. The default + #: is latin1 which is what HTTP specifies as default charset. + #: You may however want to set this to utf-8 to better support + #: browsers that do not transmit a charset for incoming data. + default_charset = "latin1" + + def unknown_charset(self, charset): + """Called if a charset was provided but is not supported by + the Python codecs module. By default latin1 is assumed then + to not lose any information, you may override this method to + change the behavior. + + :param charset: the charset that was not found. + :return: the replacement charset. + """ + return "latin1" + + @cached_property + def charset(self): + """The charset from the content type.""" + warnings.warn( + "'werkzeug.contrib.wrappers.DynamicCharsetRequestMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + header = self.environ.get("CONTENT_TYPE") + if header: + ct, options = parse_options_header(header) + charset = options.get("charset") + if charset: + if is_known_charset(charset): + return charset + return self.unknown_charset(charset) + return self.default_charset + + +class DynamicCharsetResponseMixin(object): + + """If this mixin is mixed into a response class it will provide + a dynamic `charset` attribute. This means that if the charset is + looked up and stored in the `Content-Type` header and updates + itself automatically. This also means a small performance hit but + can be useful if you're working with different charsets on + responses. + + Because the charset attribute is no a property at class-level, the + default value is stored in `default_charset`. + + Because it changes the behavior or :class:`Response` this class has + to be mixed in *before* the actual response class:: + + class MyResponse(DynamicCharsetResponseMixin, Response): + pass + + .. deprecated:: 0.15 + This mixin will be removed in version 1.0. + + .. versionadded:: 0.6 + """ + + #: the default charset. + default_charset = "utf-8" + + def _get_charset(self): + warnings.warn( + "'werkzeug.contrib.wrappers.DynamicCharsetResponseMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + header = self.headers.get("content-type") + if header: + charset = parse_options_header(header)[1].get("charset") + if charset: + return charset + return self.default_charset + + def _set_charset(self, charset): + warnings.warn( + "'werkzeug.contrib.wrappers.DynamicCharsetResponseMixin'" + " is deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + header = self.headers.get("content-type") + ct, options = parse_options_header(header) + if not ct: + raise TypeError("Cannot set charset if Content-Type header is missing.") + options["charset"] = charset + self.headers["Content-Type"] = dump_options_header(ct, options) + + charset = property( + _get_charset, + _set_charset, + doc=""" + The charset for the response. It's stored inside the + Content-Type header as a parameter.""", + ) + del _get_charset, _set_charset diff --git a/python/werkzeug/datastructures.py b/python/werkzeug/datastructures.py new file mode 100644 index 0000000..9643db9 --- /dev/null +++ b/python/werkzeug/datastructures.py @@ -0,0 +1,2852 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.datastructures + ~~~~~~~~~~~~~~~~~~~~~~~ + + This module provides mixins and classes with an immutable interface. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import mimetypes +import re +from copy import deepcopy +from itertools import repeat + +from ._compat import BytesIO +from ._compat import collections_abc +from ._compat import integer_types +from ._compat import iteritems +from ._compat import iterkeys +from ._compat import iterlists +from ._compat import itervalues +from ._compat import make_literal_wrapper +from ._compat import PY2 +from ._compat import string_types +from ._compat import text_type +from ._compat import to_native +from ._internal import _missing +from .filesystem import get_filesystem_encoding + +_locale_delim_re = re.compile(r"[_-]") + + +def is_immutable(self): + raise TypeError("%r objects are immutable" % self.__class__.__name__) + + +def iter_multi_items(mapping): + """Iterates over the items of a mapping yielding keys and values + without dropping any from more complex structures. + """ + if isinstance(mapping, MultiDict): + for item in iteritems(mapping, multi=True): + yield item + elif isinstance(mapping, dict): + for key, value in iteritems(mapping): + if isinstance(value, (tuple, list)): + for value in value: + yield key, value + else: + yield key, value + else: + for item in mapping: + yield item + + +def native_itermethods(names): + if not PY2: + return lambda x: x + + def setviewmethod(cls, name): + viewmethod_name = "view%s" % name + repr_name = "view_%s" % name + + def viewmethod(self, *a, **kw): + return ViewItems(self, name, repr_name, *a, **kw) + + viewmethod.__name__ = viewmethod_name + viewmethod.__doc__ = "`%s()` object providing a view on %s" % ( + viewmethod_name, + name, + ) + setattr(cls, viewmethod_name, viewmethod) + + def setitermethod(cls, name): + itermethod = getattr(cls, name) + setattr(cls, "iter%s" % name, itermethod) + + def listmethod(self, *a, **kw): + return list(itermethod(self, *a, **kw)) + + listmethod.__name__ = name + listmethod.__doc__ = "Like :py:meth:`iter%s`, but returns a list." % name + setattr(cls, name, listmethod) + + def wrap(cls): + for name in names: + setitermethod(cls, name) + setviewmethod(cls, name) + return cls + + return wrap + + +class ImmutableListMixin(object): + """Makes a :class:`list` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + _hash_cache = None + + def __hash__(self): + if self._hash_cache is not None: + return self._hash_cache + rv = self._hash_cache = hash(tuple(self)) + return rv + + def __reduce_ex__(self, protocol): + return type(self), (list(self),) + + def __delitem__(self, key): + is_immutable(self) + + def __iadd__(self, other): + is_immutable(self) + + __imul__ = __iadd__ + + def __setitem__(self, key, value): + is_immutable(self) + + def append(self, item): + is_immutable(self) + + remove = append + + def extend(self, iterable): + is_immutable(self) + + def insert(self, pos, value): + is_immutable(self) + + def pop(self, index=-1): + is_immutable(self) + + def reverse(self): + is_immutable(self) + + def sort(self, cmp=None, key=None, reverse=None): + is_immutable(self) + + +class ImmutableList(ImmutableListMixin, list): + """An immutable :class:`list`. + + .. versionadded:: 0.5 + + :private: + """ + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, list.__repr__(self)) + + +class ImmutableDictMixin(object): + """Makes a :class:`dict` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + _hash_cache = None + + @classmethod + def fromkeys(cls, keys, value=None): + instance = super(cls, cls).__new__(cls) + instance.__init__(zip(keys, repeat(value))) + return instance + + def __reduce_ex__(self, protocol): + return type(self), (dict(self),) + + def _iter_hashitems(self): + return iteritems(self) + + def __hash__(self): + if self._hash_cache is not None: + return self._hash_cache + rv = self._hash_cache = hash(frozenset(self._iter_hashitems())) + return rv + + def setdefault(self, key, default=None): + is_immutable(self) + + def update(self, *args, **kwargs): + is_immutable(self) + + def pop(self, key, default=None): + is_immutable(self) + + def popitem(self): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + def __delitem__(self, key): + is_immutable(self) + + def clear(self): + is_immutable(self) + + +class ImmutableMultiDictMixin(ImmutableDictMixin): + """Makes a :class:`MultiDict` immutable. + + .. versionadded:: 0.5 + + :private: + """ + + def __reduce_ex__(self, protocol): + return type(self), (list(iteritems(self, multi=True)),) + + def _iter_hashitems(self): + return iteritems(self, multi=True) + + def add(self, key, value): + is_immutable(self) + + def popitemlist(self): + is_immutable(self) + + def poplist(self, key): + is_immutable(self) + + def setlist(self, key, new_list): + is_immutable(self) + + def setlistdefault(self, key, default_list=None): + is_immutable(self) + + +class UpdateDictMixin(object): + """Makes dicts call `self.on_update` on modifications. + + .. versionadded:: 0.5 + + :private: + """ + + on_update = None + + def calls_update(name): # noqa: B902 + def oncall(self, *args, **kw): + rv = getattr(super(UpdateDictMixin, self), name)(*args, **kw) + if self.on_update is not None: + self.on_update(self) + return rv + + oncall.__name__ = name + return oncall + + def setdefault(self, key, default=None): + modified = key not in self + rv = super(UpdateDictMixin, self).setdefault(key, default) + if modified and self.on_update is not None: + self.on_update(self) + return rv + + def pop(self, key, default=_missing): + modified = key in self + if default is _missing: + rv = super(UpdateDictMixin, self).pop(key) + else: + rv = super(UpdateDictMixin, self).pop(key, default) + if modified and self.on_update is not None: + self.on_update(self) + return rv + + __setitem__ = calls_update("__setitem__") + __delitem__ = calls_update("__delitem__") + clear = calls_update("clear") + popitem = calls_update("popitem") + update = calls_update("update") + del calls_update + + +class TypeConversionDict(dict): + """Works like a regular dict but the :meth:`get` method can perform + type conversions. :class:`MultiDict` and :class:`CombinedMultiDict` + are subclasses of this class and provide the same feature. + + .. versionadded:: 0.5 + """ + + def get(self, key, default=None, type=None): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = TypeConversionDict(foo='42', bar='blub') + >>> d.get('foo', type=int) + 42 + >>> d.get('bar', -1, type=int) + -1 + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + """ + try: + rv = self[key] + except KeyError: + return default + if type is not None: + try: + rv = type(rv) + except ValueError: + rv = default + return rv + + +class ImmutableTypeConversionDict(ImmutableDictMixin, TypeConversionDict): + """Works like a :class:`TypeConversionDict` but does not support + modifications. + + .. versionadded:: 0.5 + """ + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return TypeConversionDict(self) + + def __copy__(self): + return self + + +class ViewItems(object): + def __init__(self, multi_dict, method, repr_name, *a, **kw): + self.__multi_dict = multi_dict + self.__method = method + self.__repr_name = repr_name + self.__a = a + self.__kw = kw + + def __get_items(self): + return getattr(self.__multi_dict, self.__method)(*self.__a, **self.__kw) + + def __repr__(self): + return "%s(%r)" % (self.__repr_name, list(self.__get_items())) + + def __iter__(self): + return iter(self.__get_items()) + + +@native_itermethods(["keys", "values", "items", "lists", "listvalues"]) +class MultiDict(TypeConversionDict): + """A :class:`MultiDict` is a dictionary subclass customized to deal with + multiple values for the same key which is for example used by the parsing + functions in the wrappers. This is necessary because some HTML form + elements pass multiple values for the same key. + + :class:`MultiDict` implements all standard dictionary methods. + Internally, it saves all values for a key as a list, but the standard dict + access methods will only return the first value for a key. If you want to + gain access to the other values, too, you have to use the `list` methods as + explained below. + + Basic Usage: + + >>> d = MultiDict([('a', 'b'), ('a', 'c')]) + >>> d + MultiDict([('a', 'b'), ('a', 'c')]) + >>> d['a'] + 'b' + >>> d.getlist('a') + ['b', 'c'] + >>> 'a' in d + True + + It behaves like a normal dict thus all dict functions will only return the + first value when multiple values for one key are found. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + + A :class:`MultiDict` can be constructed from an iterable of + ``(key, value)`` tuples, a dict, a :class:`MultiDict` or from Werkzeug 0.2 + onwards some keyword parameters. + + :param mapping: the initial value for the :class:`MultiDict`. Either a + regular dict, an iterable of ``(key, value)`` tuples + or `None`. + """ + + def __init__(self, mapping=None): + if isinstance(mapping, MultiDict): + dict.__init__(self, ((k, l[:]) for k, l in iterlists(mapping))) + elif isinstance(mapping, dict): + tmp = {} + for key, value in iteritems(mapping): + if isinstance(value, (tuple, list)): + if len(value) == 0: + continue + value = list(value) + else: + value = [value] + tmp[key] = value + dict.__init__(self, tmp) + else: + tmp = {} + for key, value in mapping or (): + tmp.setdefault(key, []).append(value) + dict.__init__(self, tmp) + + def __getstate__(self): + return dict(self.lists()) + + def __setstate__(self, value): + dict.clear(self) + dict.update(self, value) + + def __getitem__(self, key): + """Return the first data value for this key; + raises KeyError if not found. + + :param key: The key to be looked up. + :raise KeyError: if the key does not exist. + """ + + if key in self: + lst = dict.__getitem__(self, key) + if len(lst) > 0: + return lst[0] + raise exceptions.BadRequestKeyError(key) + + def __setitem__(self, key, value): + """Like :meth:`add` but removes an existing key first. + + :param key: the key for the value. + :param value: the value to set. + """ + dict.__setitem__(self, key, [value]) + + def add(self, key, value): + """Adds a new value for the key. + + .. versionadded:: 0.6 + + :param key: the key for the value. + :param value: the value to add. + """ + dict.setdefault(self, key, []).append(value) + + def getlist(self, key, type=None): + """Return the list of items for a given key. If that key is not in the + `MultiDict`, the return value will be an empty list. Just as `get` + `getlist` accepts a `type` parameter. All items will be converted + with the callable defined there. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`MultiDict`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + """ + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return list(rv) + result = [] + for item in rv: + try: + result.append(type(item)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + """Remove the old values for a key and add new ones. Note that the list + you pass the values in will be shallow-copied before it is inserted in + the dictionary. + + >>> d = MultiDict() + >>> d.setlist('foo', ['1', '2']) + >>> d['foo'] + '1' + >>> d.getlist('foo') + ['1', '2'] + + :param key: The key for which the values are set. + :param new_list: An iterable with the new values for the key. Old values + are removed first. + """ + dict.__setitem__(self, key, list(new_list)) + + def setdefault(self, key, default=None): + """Returns the value for the key if it is in the dict, otherwise it + returns `default` and sets that value for `key`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key is not + in the dict. If not further specified it's `None`. + """ + if key not in self: + self[key] = default + else: + default = self[key] + return default + + def setlistdefault(self, key, default_list=None): + """Like `setdefault` but sets multiple values. The list returned + is not a copy, but the list that is actually used internally. This + means that you can put new values into the dict by appending items + to the list: + + >>> d = MultiDict({"foo": 1}) + >>> d.setlistdefault("foo").extend([2, 3]) + >>> d.getlist("foo") + [1, 2, 3] + + :param key: The key to be looked up. + :param default_list: An iterable of default values. It is either copied + (in case it was a list) or converted into a list + before returned. + :return: a :class:`list` + """ + if key not in self: + default_list = list(default_list or ()) + dict.__setitem__(self, key, default_list) + else: + default_list = dict.__getitem__(self, key) + return default_list + + def items(self, multi=False): + """Return an iterator of ``(key, value)`` pairs. + + :param multi: If set to `True` the iterator returned will have a pair + for each value of each key. Otherwise it will only + contain pairs for the first value of each key. + """ + + for key, values in iteritems(dict, self): + if multi: + for value in values: + yield key, value + else: + yield key, values[0] + + def lists(self): + """Return a iterator of ``(key, values)`` pairs, where values is the list + of all values associated with the key.""" + + for key, values in iteritems(dict, self): + yield key, list(values) + + def keys(self): + return iterkeys(dict, self) + + __iter__ = keys + + def values(self): + """Returns an iterator of the first value on every key's value list.""" + for values in itervalues(dict, self): + yield values[0] + + def listvalues(self): + """Return an iterator of all values associated with a key. Zipping + :meth:`keys` and this is the same as calling :meth:`lists`: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> zip(d.keys(), d.listvalues()) == d.lists() + True + """ + + return itervalues(dict, self) + + def copy(self): + """Return a shallow copy of this object.""" + return self.__class__(self) + + def deepcopy(self, memo=None): + """Return a deep copy of this object.""" + return self.__class__(deepcopy(self.to_dict(flat=False), memo)) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first value for each key. + :return: a :class:`dict` + """ + if flat: + return dict(iteritems(self)) + return dict(self.lists()) + + def update(self, other_dict): + """update() extends rather than replaces existing key lists: + + >>> a = MultiDict({'x': 1}) + >>> b = MultiDict({'x': 2, 'y': 3}) + >>> a.update(b) + >>> a + MultiDict([('y', 3), ('x', 1), ('x', 2)]) + + If the value list for a key in ``other_dict`` is empty, no new values + will be added to the dict and the key will not be created: + + >>> x = {'empty_list': []} + >>> y = MultiDict() + >>> y.update(x) + >>> y + MultiDict([]) + """ + for key, value in iter_multi_items(other_dict): + MultiDict.add(self, key, value) + + def pop(self, key, default=_missing): + """Pop the first item for a list on the dict. Afterwards the + key is removed from the dict, so additional values are discarded: + + >>> d = MultiDict({"foo": [1, 2, 3]}) + >>> d.pop("foo") + 1 + >>> "foo" in d + False + + :param key: the key to pop. + :param default: if provided the value to return if the key was + not in the dictionary. + """ + try: + lst = dict.pop(self, key) + + if len(lst) == 0: + raise exceptions.BadRequestKeyError(key) + + return lst[0] + except KeyError: + if default is not _missing: + return default + raise exceptions.BadRequestKeyError(key) + + def popitem(self): + """Pop an item from the dict.""" + try: + item = dict.popitem(self) + + if len(item[1]) == 0: + raise exceptions.BadRequestKeyError(item) + + return (item[0], item[1][0]) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + + def poplist(self, key): + """Pop the list for a key from the dict. If the key is not in the dict + an empty list is returned. + + .. versionchanged:: 0.5 + If the key does no longer exist a list is returned instead of + raising an error. + """ + return dict.pop(self, key, []) + + def popitemlist(self): + """Pop a ``(key, list)`` tuple from the dict.""" + try: + return dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + + def __copy__(self): + return self.copy() + + def __deepcopy__(self, memo): + return self.deepcopy(memo=memo) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, list(iteritems(self, multi=True))) + + +class _omd_bucket(object): + """Wraps values in the :class:`OrderedMultiDict`. This makes it + possible to keep an order over multiple different keys. It requires + a lot of extra memory and slows down access a lot, but makes it + possible to access elements in O(1) and iterate in O(n). + """ + + __slots__ = ("prev", "key", "value", "next") + + def __init__(self, omd, key, value): + self.prev = omd._last_bucket + self.key = key + self.value = value + self.next = None + + if omd._first_bucket is None: + omd._first_bucket = self + if omd._last_bucket is not None: + omd._last_bucket.next = self + omd._last_bucket = self + + def unlink(self, omd): + if self.prev: + self.prev.next = self.next + if self.next: + self.next.prev = self.prev + if omd._first_bucket is self: + omd._first_bucket = self.next + if omd._last_bucket is self: + omd._last_bucket = self.prev + + +@native_itermethods(["keys", "values", "items", "lists", "listvalues"]) +class OrderedMultiDict(MultiDict): + """Works like a regular :class:`MultiDict` but preserves the + order of the fields. To convert the ordered multi dict into a + list you can use the :meth:`items` method and pass it ``multi=True``. + + In general an :class:`OrderedMultiDict` is an order of magnitude + slower than a :class:`MultiDict`. + + .. admonition:: note + + Due to a limitation in Python you cannot convert an ordered + multi dict into a regular dict by using ``dict(multidict)``. + Instead you have to use the :meth:`to_dict` method, otherwise + the internal bucket objects are exposed. + """ + + def __init__(self, mapping=None): + dict.__init__(self) + self._first_bucket = self._last_bucket = None + if mapping is not None: + OrderedMultiDict.update(self, mapping) + + def __eq__(self, other): + if not isinstance(other, MultiDict): + return NotImplemented + if isinstance(other, OrderedMultiDict): + iter1 = iteritems(self, multi=True) + iter2 = iteritems(other, multi=True) + try: + for k1, v1 in iter1: + k2, v2 = next(iter2) + if k1 != k2 or v1 != v2: + return False + except StopIteration: + return False + try: + next(iter2) + except StopIteration: + return True + return False + if len(self) != len(other): + return False + for key, values in iterlists(self): + if other.getlist(key) != values: + return False + return True + + __hash__ = None + + def __ne__(self, other): + return not self.__eq__(other) + + def __reduce_ex__(self, protocol): + return type(self), (list(iteritems(self, multi=True)),) + + def __getstate__(self): + return list(iteritems(self, multi=True)) + + def __setstate__(self, values): + dict.clear(self) + for key, value in values: + self.add(key, value) + + def __getitem__(self, key): + if key in self: + return dict.__getitem__(self, key)[0].value + raise exceptions.BadRequestKeyError(key) + + def __setitem__(self, key, value): + self.poplist(key) + self.add(key, value) + + def __delitem__(self, key): + self.pop(key) + + def keys(self): + return (key for key, value in iteritems(self)) + + __iter__ = keys + + def values(self): + return (value for key, value in iteritems(self)) + + def items(self, multi=False): + ptr = self._first_bucket + if multi: + while ptr is not None: + yield ptr.key, ptr.value + ptr = ptr.next + else: + returned_keys = set() + while ptr is not None: + if ptr.key not in returned_keys: + returned_keys.add(ptr.key) + yield ptr.key, ptr.value + ptr = ptr.next + + def lists(self): + returned_keys = set() + ptr = self._first_bucket + while ptr is not None: + if ptr.key not in returned_keys: + yield ptr.key, self.getlist(ptr.key) + returned_keys.add(ptr.key) + ptr = ptr.next + + def listvalues(self): + for _key, values in iterlists(self): + yield values + + def add(self, key, value): + dict.setdefault(self, key, []).append(_omd_bucket(self, key, value)) + + def getlist(self, key, type=None): + try: + rv = dict.__getitem__(self, key) + except KeyError: + return [] + if type is None: + return [x.value for x in rv] + result = [] + for item in rv: + try: + result.append(type(item.value)) + except ValueError: + pass + return result + + def setlist(self, key, new_list): + self.poplist(key) + for value in new_list: + self.add(key, value) + + def setlistdefault(self, key, default_list=None): + raise TypeError("setlistdefault is unsupported for ordered multi dicts") + + def update(self, mapping): + for key, value in iter_multi_items(mapping): + OrderedMultiDict.add(self, key, value) + + def poplist(self, key): + buckets = dict.pop(self, key, ()) + for bucket in buckets: + bucket.unlink(self) + return [x.value for x in buckets] + + def pop(self, key, default=_missing): + try: + buckets = dict.pop(self, key) + except KeyError: + if default is not _missing: + return default + raise exceptions.BadRequestKeyError(key) + for bucket in buckets: + bucket.unlink(self) + return buckets[0].value + + def popitem(self): + try: + key, buckets = dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + for bucket in buckets: + bucket.unlink(self) + return key, buckets[0].value + + def popitemlist(self): + try: + key, buckets = dict.popitem(self) + except KeyError as e: + raise exceptions.BadRequestKeyError(e.args[0]) + for bucket in buckets: + bucket.unlink(self) + return key, [x.value for x in buckets] + + +def _options_header_vkw(value, kw): + return dump_options_header( + value, dict((k.replace("_", "-"), v) for k, v in kw.items()) + ) + + +def _unicodify_header_value(value): + if isinstance(value, bytes): + value = value.decode("latin-1") + if not isinstance(value, text_type): + value = text_type(value) + return value + + +@native_itermethods(["keys", "values", "items"]) +class Headers(object): + """An object that stores some headers. It has a dict-like interface + but is ordered and can store the same keys multiple times. + + This data structure is useful if you want a nicer way to handle WSGI + headers which are stored as tuples in a list. + + From Werkzeug 0.3 onwards, the :exc:`KeyError` raised by this class is + also a subclass of the :class:`~exceptions.BadRequest` HTTP exception + and will render a page for a ``400 BAD REQUEST`` if caught in a + catch-all for HTTP exceptions. + + Headers is mostly compatible with the Python :class:`wsgiref.headers.Headers` + class, with the exception of `__getitem__`. :mod:`wsgiref` will return + `None` for ``headers['missing']``, whereas :class:`Headers` will raise + a :class:`KeyError`. + + To create a new :class:`Headers` object pass it a list or dict of headers + which are used as default values. This does not reuse the list passed + to the constructor for internal usage. + + :param defaults: The list of default values for the :class:`Headers`. + + .. versionchanged:: 0.9 + This data structure now stores unicode values similar to how the + multi dicts do it. The main difference is that bytes can be set as + well which will automatically be latin1 decoded. + + .. versionchanged:: 0.9 + The :meth:`linked` function was removed without replacement as it + was an API that does not support the changes to the encoding model. + """ + + def __init__(self, defaults=None): + self._list = [] + if defaults is not None: + if isinstance(defaults, (list, Headers)): + self._list.extend(defaults) + else: + self.extend(defaults) + + def __getitem__(self, key, _get_mode=False): + if not _get_mode: + if isinstance(key, integer_types): + return self._list[key] + elif isinstance(key, slice): + return self.__class__(self._list[key]) + if not isinstance(key, string_types): + raise exceptions.BadRequestKeyError(key) + ikey = key.lower() + for k, v in self._list: + if k.lower() == ikey: + return v + # micro optimization: if we are in get mode we will catch that + # exception one stack level down so we can raise a standard + # key error instead of our special one. + if _get_mode: + raise KeyError() + raise exceptions.BadRequestKeyError(key) + + def __eq__(self, other): + return other.__class__ is self.__class__ and set(other._list) == set(self._list) + + __hash__ = None + + def __ne__(self, other): + return not self.__eq__(other) + + def get(self, key, default=None, type=None, as_bytes=False): + """Return the default value if the requested data doesn't exist. + If `type` is provided and is a callable it should convert the value, + return it or raise a :exc:`ValueError` if that is not possible. In + this case the function will return the default as if the value was not + found: + + >>> d = Headers([('Content-Length', '42')]) + >>> d.get('Content-Length', type=int) + 42 + + If a headers object is bound you must not add unicode strings + because no encoding takes place. + + .. versionadded:: 0.9 + Added support for `as_bytes`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key can't + be looked up. If not further specified `None` is + returned. + :param type: A callable that is used to cast the value in the + :class:`Headers`. If a :exc:`ValueError` is raised + by this callable the default value is returned. + :param as_bytes: return bytes instead of unicode strings. + """ + try: + rv = self.__getitem__(key, _get_mode=True) + except KeyError: + return default + if as_bytes: + rv = rv.encode("latin1") + if type is None: + return rv + try: + return type(rv) + except ValueError: + return default + + def getlist(self, key, type=None, as_bytes=False): + """Return the list of items for a given key. If that key is not in the + :class:`Headers`, the return value will be an empty list. Just as + :meth:`get` :meth:`getlist` accepts a `type` parameter. All items will + be converted with the callable defined there. + + .. versionadded:: 0.9 + Added support for `as_bytes`. + + :param key: The key to be looked up. + :param type: A callable that is used to cast the value in the + :class:`Headers`. If a :exc:`ValueError` is raised + by this callable the value will be removed from the list. + :return: a :class:`list` of all the values for the key. + :param as_bytes: return bytes instead of unicode strings. + """ + ikey = key.lower() + result = [] + for k, v in self: + if k.lower() == ikey: + if as_bytes: + v = v.encode("latin1") + if type is not None: + try: + v = type(v) + except ValueError: + continue + result.append(v) + return result + + def get_all(self, name): + """Return a list of all the values for the named field. + + This method is compatible with the :mod:`wsgiref` + :meth:`~wsgiref.headers.Headers.get_all` method. + """ + return self.getlist(name) + + def items(self, lower=False): + for key, value in self: + if lower: + key = key.lower() + yield key, value + + def keys(self, lower=False): + for key, _ in iteritems(self, lower): + yield key + + def values(self): + for _, value in iteritems(self): + yield value + + def extend(self, iterable): + """Extend the headers with a dict or an iterable yielding keys and + values. + """ + if isinstance(iterable, dict): + for key, value in iteritems(iterable): + if isinstance(value, (tuple, list)): + for v in value: + self.add(key, v) + else: + self.add(key, value) + else: + for key, value in iterable: + self.add(key, value) + + def __delitem__(self, key, _index_operation=True): + if _index_operation and isinstance(key, (integer_types, slice)): + del self._list[key] + return + key = key.lower() + new = [] + for k, v in self._list: + if k.lower() != key: + new.append((k, v)) + self._list[:] = new + + def remove(self, key): + """Remove a key. + + :param key: The key to be removed. + """ + return self.__delitem__(key, _index_operation=False) + + def pop(self, key=None, default=_missing): + """Removes and returns a key or index. + + :param key: The key to be popped. If this is an integer the item at + that position is removed, if it's a string the value for + that key is. If the key is omitted or `None` the last + item is removed. + :return: an item. + """ + if key is None: + return self._list.pop() + if isinstance(key, integer_types): + return self._list.pop(key) + try: + rv = self[key] + self.remove(key) + except KeyError: + if default is not _missing: + return default + raise + return rv + + def popitem(self): + """Removes a key or index and returns a (key, value) item.""" + return self.pop() + + def __contains__(self, key): + """Check if a key is present.""" + try: + self.__getitem__(key, _get_mode=True) + except KeyError: + return False + return True + + has_key = __contains__ + + def __iter__(self): + """Yield ``(key, value)`` tuples.""" + return iter(self._list) + + def __len__(self): + return len(self._list) + + def add(self, _key, _value, **kw): + """Add a new header tuple to the list. + + Keyword arguments can specify additional parameters for the header + value, with underscores converted to dashes:: + + >>> d = Headers() + >>> d.add('Content-Type', 'text/plain') + >>> d.add('Content-Disposition', 'attachment', filename='foo.png') + + The keyword argument dumping uses :func:`dump_options_header` + behind the scenes. + + .. versionadded:: 0.4.1 + keyword arguments were added for :mod:`wsgiref` compatibility. + """ + if kw: + _value = _options_header_vkw(_value, kw) + _key = _unicodify_header_value(_key) + _value = _unicodify_header_value(_value) + self._validate_value(_value) + self._list.append((_key, _value)) + + def _validate_value(self, value): + if not isinstance(value, text_type): + raise TypeError("Value should be unicode.") + if u"\n" in value or u"\r" in value: + raise ValueError( + "Detected newline in header value. This is " + "a potential security problem" + ) + + def add_header(self, _key, _value, **_kw): + """Add a new header tuple to the list. + + An alias for :meth:`add` for compatibility with the :mod:`wsgiref` + :meth:`~wsgiref.headers.Headers.add_header` method. + """ + self.add(_key, _value, **_kw) + + def clear(self): + """Clears all headers.""" + del self._list[:] + + def set(self, _key, _value, **kw): + """Remove all header tuples for `key` and add a new one. The newly + added key either appears at the end of the list if there was no + entry or replaces the first one. + + Keyword arguments can specify additional parameters for the header + value, with underscores converted to dashes. See :meth:`add` for + more information. + + .. versionchanged:: 0.6.1 + :meth:`set` now accepts the same arguments as :meth:`add`. + + :param key: The key to be inserted. + :param value: The value to be inserted. + """ + if kw: + _value = _options_header_vkw(_value, kw) + _key = _unicodify_header_value(_key) + _value = _unicodify_header_value(_value) + self._validate_value(_value) + if not self._list: + self._list.append((_key, _value)) + return + listiter = iter(self._list) + ikey = _key.lower() + for idx, (old_key, _old_value) in enumerate(listiter): + if old_key.lower() == ikey: + # replace first ocurrence + self._list[idx] = (_key, _value) + break + else: + self._list.append((_key, _value)) + return + self._list[idx + 1 :] = [t for t in listiter if t[0].lower() != ikey] + + def setdefault(self, key, default): + """Returns the value for the key if it is in the dict, otherwise it + returns `default` and sets that value for `key`. + + :param key: The key to be looked up. + :param default: The default value to be returned if the key is not + in the dict. If not further specified it's `None`. + """ + if key in self: + return self[key] + self.set(key, default) + return default + + def __setitem__(self, key, value): + """Like :meth:`set` but also supports index/slice based setting.""" + if isinstance(key, (slice, integer_types)): + if isinstance(key, integer_types): + value = [value] + value = [ + (_unicodify_header_value(k), _unicodify_header_value(v)) + for (k, v) in value + ] + [self._validate_value(v) for (k, v) in value] + if isinstance(key, integer_types): + self._list[key] = value[0] + else: + self._list[key] = value + else: + self.set(key, value) + + def to_list(self, charset="iso-8859-1"): + """Convert the headers into a list suitable for WSGI. + + .. deprecated:: 0.9 + """ + from warnings import warn + + warn( + "'to_list' deprecated as of version 0.9 and will be removed" + " in version 1.0. Use 'to_wsgi_list' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.to_wsgi_list() + + def to_wsgi_list(self): + """Convert the headers into a list suitable for WSGI. + + The values are byte strings in Python 2 converted to latin1 and unicode + strings in Python 3 for the WSGI server to encode. + + :return: list + """ + if PY2: + return [(to_native(k), v.encode("latin1")) for k, v in self] + return list(self) + + def copy(self): + return self.__class__(self._list) + + def __copy__(self): + return self.copy() + + def __str__(self): + """Returns formatted headers suitable for HTTP transmission.""" + strs = [] + for key, value in self.to_wsgi_list(): + strs.append("%s: %s" % (key, value)) + strs.append("\r\n") + return "\r\n".join(strs) + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, list(self)) + + +class ImmutableHeadersMixin(object): + """Makes a :class:`Headers` immutable. We do not mark them as + hashable though since the only usecase for this datastructure + in Werkzeug is a view on a mutable structure. + + .. versionadded:: 0.5 + + :private: + """ + + def __delitem__(self, key, **kwargs): + is_immutable(self) + + def __setitem__(self, key, value): + is_immutable(self) + + set = __setitem__ + + def add(self, item): + is_immutable(self) + + remove = add_header = add + + def extend(self, iterable): + is_immutable(self) + + def insert(self, pos, value): + is_immutable(self) + + def pop(self, index=-1): + is_immutable(self) + + def popitem(self): + is_immutable(self) + + def setdefault(self, key, default): + is_immutable(self) + + +class EnvironHeaders(ImmutableHeadersMixin, Headers): + """Read only version of the headers from a WSGI environment. This + provides the same interface as `Headers` and is constructed from + a WSGI environment. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for + HTTP exceptions. + """ + + def __init__(self, environ): + self.environ = environ + + def __eq__(self, other): + return self.environ is other.environ + + __hash__ = None + + def __getitem__(self, key, _get_mode=False): + # _get_mode is a no-op for this class as there is no index but + # used because get() calls it. + if not isinstance(key, string_types): + raise KeyError(key) + key = key.upper().replace("-", "_") + if key in ("CONTENT_TYPE", "CONTENT_LENGTH"): + return _unicodify_header_value(self.environ[key]) + return _unicodify_header_value(self.environ["HTTP_" + key]) + + def __len__(self): + # the iter is necessary because otherwise list calls our + # len which would call list again and so forth. + return len(list(iter(self))) + + def __iter__(self): + for key, value in iteritems(self.environ): + if key.startswith("HTTP_") and key not in ( + "HTTP_CONTENT_TYPE", + "HTTP_CONTENT_LENGTH", + ): + yield ( + key[5:].replace("_", "-").title(), + _unicodify_header_value(value), + ) + elif key in ("CONTENT_TYPE", "CONTENT_LENGTH") and value: + yield (key.replace("_", "-").title(), _unicodify_header_value(value)) + + def copy(self): + raise TypeError("cannot create %r copies" % self.__class__.__name__) + + +@native_itermethods(["keys", "values", "items", "lists", "listvalues"]) +class CombinedMultiDict(ImmutableMultiDictMixin, MultiDict): + """A read only :class:`MultiDict` that you can pass multiple :class:`MultiDict` + instances as sequence and it will combine the return values of all wrapped + dicts: + + >>> from werkzeug.datastructures import CombinedMultiDict, MultiDict + >>> post = MultiDict([('foo', 'bar')]) + >>> get = MultiDict([('blub', 'blah')]) + >>> combined = CombinedMultiDict([get, post]) + >>> combined['foo'] + 'bar' + >>> combined['blub'] + 'blah' + + This works for all read operations and will raise a `TypeError` for + methods that usually change data which isn't possible. + + From Werkzeug 0.3 onwards, the `KeyError` raised by this class is also a + subclass of the :exc:`~exceptions.BadRequest` HTTP exception and will + render a page for a ``400 BAD REQUEST`` if caught in a catch-all for HTTP + exceptions. + """ + + def __reduce_ex__(self, protocol): + return type(self), (self.dicts,) + + def __init__(self, dicts=None): + self.dicts = dicts or [] + + @classmethod + def fromkeys(cls): + raise TypeError("cannot create %r instances by fromkeys" % cls.__name__) + + def __getitem__(self, key): + for d in self.dicts: + if key in d: + return d[key] + raise exceptions.BadRequestKeyError(key) + + def get(self, key, default=None, type=None): + for d in self.dicts: + if key in d: + if type is not None: + try: + return type(d[key]) + except ValueError: + continue + return d[key] + return default + + def getlist(self, key, type=None): + rv = [] + for d in self.dicts: + rv.extend(d.getlist(key, type)) + return rv + + def _keys_impl(self): + """This function exists so __len__ can be implemented more efficiently, + saving one list creation from an iterator. + + Using this for Python 2's ``dict.keys`` behavior would be useless since + `dict.keys` in Python 2 returns a list, while we have a set here. + """ + rv = set() + for d in self.dicts: + rv.update(iterkeys(d)) + return rv + + def keys(self): + return iter(self._keys_impl()) + + __iter__ = keys + + def items(self, multi=False): + found = set() + for d in self.dicts: + for key, value in iteritems(d, multi): + if multi: + yield key, value + elif key not in found: + found.add(key) + yield key, value + + def values(self): + for _key, value in iteritems(self): + yield value + + def lists(self): + rv = {} + for d in self.dicts: + for key, values in iterlists(d): + rv.setdefault(key, []).extend(values) + return iteritems(rv) + + def listvalues(self): + return (x[1] for x in self.lists()) + + def copy(self): + """Return a shallow mutable copy of this object. + + This returns a :class:`MultiDict` representing the data at the + time of copying. The copy will no longer reflect changes to the + wrapped dicts. + + .. versionchanged:: 0.15 + Return a mutable :class:`MultiDict`. + """ + return MultiDict(self) + + def to_dict(self, flat=True): + """Return the contents as regular dict. If `flat` is `True` the + returned dict will only have the first item present, if `flat` is + `False` all values will be returned as lists. + + :param flat: If set to `False` the dict returned will have lists + with all the values in it. Otherwise it will only + contain the first item for each key. + :return: a :class:`dict` + """ + rv = {} + for d in reversed(self.dicts): + rv.update(d.to_dict(flat)) + return rv + + def __len__(self): + return len(self._keys_impl()) + + def __contains__(self, key): + for d in self.dicts: + if key in d: + return True + return False + + has_key = __contains__ + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.dicts) + + +class FileMultiDict(MultiDict): + """A special :class:`MultiDict` that has convenience methods to add + files to it. This is used for :class:`EnvironBuilder` and generally + useful for unittesting. + + .. versionadded:: 0.5 + """ + + def add_file(self, name, file, filename=None, content_type=None): + """Adds a new file to the dict. `file` can be a file name or + a :class:`file`-like or a :class:`FileStorage` object. + + :param name: the name of the field. + :param file: a filename or :class:`file`-like object + :param filename: an optional filename + :param content_type: an optional content type + """ + if isinstance(file, FileStorage): + value = file + else: + if isinstance(file, string_types): + if filename is None: + filename = file + file = open(file, "rb") + if filename and content_type is None: + content_type = ( + mimetypes.guess_type(filename)[0] or "application/octet-stream" + ) + value = FileStorage(file, filename, name, content_type) + + self.add(name, value) + + +class ImmutableDict(ImmutableDictMixin, dict): + """An immutable :class:`dict`. + + .. versionadded:: 0.5 + """ + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, dict.__repr__(self)) + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return dict(self) + + def __copy__(self): + return self + + +class ImmutableMultiDict(ImmutableMultiDictMixin, MultiDict): + """An immutable :class:`MultiDict`. + + .. versionadded:: 0.5 + """ + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return MultiDict(self) + + def __copy__(self): + return self + + +class ImmutableOrderedMultiDict(ImmutableMultiDictMixin, OrderedMultiDict): + """An immutable :class:`OrderedMultiDict`. + + .. versionadded:: 0.6 + """ + + def _iter_hashitems(self): + return enumerate(iteritems(self, multi=True)) + + def copy(self): + """Return a shallow mutable copy of this object. Keep in mind that + the standard library's :func:`copy` function is a no-op for this class + like for any other python immutable type (eg: :class:`tuple`). + """ + return OrderedMultiDict(self) + + def __copy__(self): + return self + + +@native_itermethods(["values"]) +class Accept(ImmutableList): + """An :class:`Accept` object is just a list subclass for lists of + ``(value, quality)`` tuples. It is automatically sorted by specificity + and quality. + + All :class:`Accept` objects work similar to a list but provide extra + functionality for working with the data. Containment checks are + normalized to the rules of that header: + + >>> a = CharsetAccept([('ISO-8859-1', 1), ('utf-8', 0.7)]) + >>> a.best + 'ISO-8859-1' + >>> 'iso-8859-1' in a + True + >>> 'UTF8' in a + True + >>> 'utf7' in a + False + + To get the quality for an item you can use normal item lookup: + + >>> print a['utf-8'] + 0.7 + >>> a['utf7'] + 0 + + .. versionchanged:: 0.5 + :class:`Accept` objects are forced immutable now. + """ + + def __init__(self, values=()): + if values is None: + list.__init__(self) + self.provided = False + elif isinstance(values, Accept): + self.provided = values.provided + list.__init__(self, values) + else: + self.provided = True + values = sorted( + values, + key=lambda x: (self._specificity(x[0]), x[1], x[0]), + reverse=True, + ) + list.__init__(self, values) + + def _specificity(self, value): + """Returns a tuple describing the value's specificity.""" + return (value != "*",) + + def _value_matches(self, value, item): + """Check if a value matches a given accept item.""" + return item == "*" or item.lower() == value.lower() + + def __getitem__(self, key): + """Besides index lookup (getting item n) you can also pass it a string + to get the quality for the item. If the item is not in the list, the + returned quality is ``0``. + """ + if isinstance(key, string_types): + return self.quality(key) + return list.__getitem__(self, key) + + def quality(self, key): + """Returns the quality of the key. + + .. versionadded:: 0.6 + In previous versions you had to use the item-lookup syntax + (eg: ``obj[key]`` instead of ``obj.quality(key)``) + """ + for item, quality in self: + if self._value_matches(key, item): + return quality + return 0 + + def __contains__(self, value): + for item, _quality in self: + if self._value_matches(value, item): + return True + return False + + def __repr__(self): + return "%s([%s])" % ( + self.__class__.__name__, + ", ".join("(%r, %s)" % (x, y) for x, y in self), + ) + + def index(self, key): + """Get the position of an entry or raise :exc:`ValueError`. + + :param key: The key to be looked up. + + .. versionchanged:: 0.5 + This used to raise :exc:`IndexError`, which was inconsistent + with the list API. + """ + if isinstance(key, string_types): + for idx, (item, _quality) in enumerate(self): + if self._value_matches(key, item): + return idx + raise ValueError(key) + return list.index(self, key) + + def find(self, key): + """Get the position of an entry or return -1. + + :param key: The key to be looked up. + """ + try: + return self.index(key) + except ValueError: + return -1 + + def values(self): + """Iterate over all values.""" + for item in self: + yield item[0] + + def to_header(self): + """Convert the header set into an HTTP header string.""" + result = [] + for value, quality in self: + if quality != 1: + value = "%s;q=%s" % (value, quality) + result.append(value) + return ",".join(result) + + def __str__(self): + return self.to_header() + + def _best_single_match(self, match): + for client_item, quality in self: + if self._value_matches(match, client_item): + # self is sorted by specificity descending, we can exit + return client_item, quality + + def best_match(self, matches, default=None): + """Returns the best match from a list of possible matches based + on the specificity and quality of the client. If two items have the + same quality and specificity, the one is returned that comes first. + + :param matches: a list of matches to check for + :param default: the value that is returned if none match + """ + result = default + best_quality = -1 + best_specificity = (-1,) + for server_item in matches: + match = self._best_single_match(server_item) + if not match: + continue + client_item, quality = match + specificity = self._specificity(client_item) + if quality <= 0 or quality < best_quality: + continue + # better quality or same quality but more specific => better match + if quality > best_quality or specificity > best_specificity: + result = server_item + best_quality = quality + best_specificity = specificity + return result + + @property + def best(self): + """The best match as value.""" + if self: + return self[0][0] + + +class MIMEAccept(Accept): + """Like :class:`Accept` but with special methods and behavior for + mimetypes. + """ + + def _specificity(self, value): + return tuple(x != "*" for x in value.split("/", 1)) + + def _value_matches(self, value, item): + def _normalize(x): + x = x.lower() + return ("*", "*") if x == "*" else x.split("/", 1) + + # this is from the application which is trusted. to avoid developer + # frustration we actually check these for valid values + if "/" not in value: + raise ValueError("invalid mimetype %r" % value) + value_type, value_subtype = _normalize(value) + if value_type == "*" and value_subtype != "*": + raise ValueError("invalid mimetype %r" % value) + + if "/" not in item: + return False + item_type, item_subtype = _normalize(item) + if item_type == "*" and item_subtype != "*": + return False + return ( + item_type == item_subtype == "*" or value_type == value_subtype == "*" + ) or ( + item_type == value_type + and ( + item_subtype == "*" + or value_subtype == "*" + or item_subtype == value_subtype + ) + ) + + @property + def accept_html(self): + """True if this object accepts HTML.""" + return ( + "text/html" in self or "application/xhtml+xml" in self or self.accept_xhtml + ) + + @property + def accept_xhtml(self): + """True if this object accepts XHTML.""" + return "application/xhtml+xml" in self or "application/xml" in self + + @property + def accept_json(self): + """True if this object accepts JSON.""" + return "application/json" in self + + +class LanguageAccept(Accept): + """Like :class:`Accept` but with normalization for languages.""" + + def _value_matches(self, value, item): + def _normalize(language): + return _locale_delim_re.split(language.lower()) + + return item == "*" or _normalize(value) == _normalize(item) + + +class CharsetAccept(Accept): + """Like :class:`Accept` but with normalization for charsets.""" + + def _value_matches(self, value, item): + def _normalize(name): + try: + return codecs.lookup(name).name + except LookupError: + return name.lower() + + return item == "*" or _normalize(value) == _normalize(item) + + +def cache_property(key, empty, type): + """Return a new property object for a cache header. Useful if you + want to add support for a cache extension in a subclass.""" + return property( + lambda x: x._get_cache_value(key, empty, type), + lambda x, v: x._set_cache_value(key, v, type), + lambda x: x._del_cache_value(key), + "accessor for %r" % key, + ) + + +class _CacheControl(UpdateDictMixin, dict): + """Subclass of a dict that stores values for a Cache-Control header. It + has accessors for all the cache-control directives specified in RFC 2616. + The class does not differentiate between request and response directives. + + Because the cache-control directives in the HTTP header use dashes the + python descriptors use underscores for that. + + To get a header of the :class:`CacheControl` object again you can convert + the object into a string or call the :meth:`to_header` method. If you plan + to subclass it and add your own items have a look at the sourcecode for + that class. + + .. versionchanged:: 0.4 + + Setting `no_cache` or `private` to boolean `True` will set the implicit + none-value which is ``*``: + + >>> cc = ResponseCacheControl() + >>> cc.no_cache = True + >>> cc + <ResponseCacheControl 'no-cache'> + >>> cc.no_cache + '*' + >>> cc.no_cache = None + >>> cc + <ResponseCacheControl ''> + + In versions before 0.5 the behavior documented here affected the now + no longer existing `CacheControl` class. + """ + + no_cache = cache_property("no-cache", "*", None) + no_store = cache_property("no-store", None, bool) + max_age = cache_property("max-age", -1, int) + no_transform = cache_property("no-transform", None, None) + + def __init__(self, values=(), on_update=None): + dict.__init__(self, values or ()) + self.on_update = on_update + self.provided = values is not None + + def _get_cache_value(self, key, empty, type): + """Used internally by the accessor properties.""" + if type is bool: + return key in self + if key in self: + value = self[key] + if value is None: + return empty + elif type is not None: + try: + value = type(value) + except ValueError: + pass + return value + + def _set_cache_value(self, key, value, type): + """Used internally by the accessor properties.""" + if type is bool: + if value: + self[key] = None + else: + self.pop(key, None) + else: + if value is None: + self.pop(key) + elif value is True: + self[key] = None + else: + self[key] = value + + def _del_cache_value(self, key): + """Used internally by the accessor properties.""" + if key in self: + del self[key] + + def to_header(self): + """Convert the stored values into a cache control header.""" + return dump_header(self) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %s>" % ( + self.__class__.__name__, + " ".join("%s=%r" % (k, v) for k, v in sorted(self.items())), + ) + + +class RequestCacheControl(ImmutableDictMixin, _CacheControl): + """A cache control for requests. This is immutable and gives access + to all the request-relevant cache control headers. + + To get a header of the :class:`RequestCacheControl` object again you can + convert the object into a string or call the :meth:`to_header` method. If + you plan to subclass it and add your own items have a look at the sourcecode + for that class. + + .. versionadded:: 0.5 + In previous versions a `CacheControl` class existed that was used + both for request and response. + """ + + max_stale = cache_property("max-stale", "*", int) + min_fresh = cache_property("min-fresh", "*", int) + no_transform = cache_property("no-transform", None, None) + only_if_cached = cache_property("only-if-cached", None, bool) + + +class ResponseCacheControl(_CacheControl): + """A cache control for responses. Unlike :class:`RequestCacheControl` + this is mutable and gives access to response-relevant cache control + headers. + + To get a header of the :class:`ResponseCacheControl` object again you can + convert the object into a string or call the :meth:`to_header` method. If + you plan to subclass it and add your own items have a look at the sourcecode + for that class. + + .. versionadded:: 0.5 + In previous versions a `CacheControl` class existed that was used + both for request and response. + """ + + public = cache_property("public", None, bool) + private = cache_property("private", "*", None) + must_revalidate = cache_property("must-revalidate", None, bool) + proxy_revalidate = cache_property("proxy-revalidate", None, bool) + s_maxage = cache_property("s-maxage", None, None) + + +# attach cache_property to the _CacheControl as staticmethod +# so that others can reuse it. +_CacheControl.cache_property = staticmethod(cache_property) + + +class CallbackDict(UpdateDictMixin, dict): + """A dict that calls a function passed every time something is changed. + The function is passed the dict instance. + """ + + def __init__(self, initial=None, on_update=None): + dict.__init__(self, initial or ()) + self.on_update = on_update + + def __repr__(self): + return "<%s %s>" % (self.__class__.__name__, dict.__repr__(self)) + + +class HeaderSet(collections_abc.MutableSet): + """Similar to the :class:`ETags` class this implements a set-like structure. + Unlike :class:`ETags` this is case insensitive and used for vary, allow, and + content-language headers. + + If not constructed using the :func:`parse_set_header` function the + instantiation works like this: + + >>> hs = HeaderSet(['foo', 'bar', 'baz']) + >>> hs + HeaderSet(['foo', 'bar', 'baz']) + """ + + def __init__(self, headers=None, on_update=None): + self._headers = list(headers or ()) + self._set = set([x.lower() for x in self._headers]) + self.on_update = on_update + + def add(self, header): + """Add a new header to the set.""" + self.update((header,)) + + def remove(self, header): + """Remove a header from the set. This raises an :exc:`KeyError` if the + header is not in the set. + + .. versionchanged:: 0.5 + In older versions a :exc:`IndexError` was raised instead of a + :exc:`KeyError` if the object was missing. + + :param header: the header to be removed. + """ + key = header.lower() + if key not in self._set: + raise KeyError(header) + self._set.remove(key) + for idx, key in enumerate(self._headers): + if key.lower() == header: + del self._headers[idx] + break + if self.on_update is not None: + self.on_update(self) + + def update(self, iterable): + """Add all the headers from the iterable to the set. + + :param iterable: updates the set with the items from the iterable. + """ + inserted_any = False + for header in iterable: + key = header.lower() + if key not in self._set: + self._headers.append(header) + self._set.add(key) + inserted_any = True + if inserted_any and self.on_update is not None: + self.on_update(self) + + def discard(self, header): + """Like :meth:`remove` but ignores errors. + + :param header: the header to be discarded. + """ + try: + return self.remove(header) + except KeyError: + pass + + def find(self, header): + """Return the index of the header in the set or return -1 if not found. + + :param header: the header to be looked up. + """ + header = header.lower() + for idx, item in enumerate(self._headers): + if item.lower() == header: + return idx + return -1 + + def index(self, header): + """Return the index of the header in the set or raise an + :exc:`IndexError`. + + :param header: the header to be looked up. + """ + rv = self.find(header) + if rv < 0: + raise IndexError(header) + return rv + + def clear(self): + """Clear the set.""" + self._set.clear() + del self._headers[:] + if self.on_update is not None: + self.on_update(self) + + def as_set(self, preserve_casing=False): + """Return the set as real python set type. When calling this, all + the items are converted to lowercase and the ordering is lost. + + :param preserve_casing: if set to `True` the items in the set returned + will have the original case like in the + :class:`HeaderSet`, otherwise they will + be lowercase. + """ + if preserve_casing: + return set(self._headers) + return set(self._set) + + def to_header(self): + """Convert the header set into an HTTP header string.""" + return ", ".join(map(quote_header_value, self._headers)) + + def __getitem__(self, idx): + return self._headers[idx] + + def __delitem__(self, idx): + rv = self._headers.pop(idx) + self._set.remove(rv.lower()) + if self.on_update is not None: + self.on_update(self) + + def __setitem__(self, idx, value): + old = self._headers[idx] + self._set.remove(old.lower()) + self._headers[idx] = value + self._set.add(value.lower()) + if self.on_update is not None: + self.on_update(self) + + def __contains__(self, header): + return header.lower() in self._set + + def __len__(self): + return len(self._set) + + def __iter__(self): + return iter(self._headers) + + def __nonzero__(self): + return bool(self._set) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self._headers) + + +class ETags(collections_abc.Container, collections_abc.Iterable): + """A set that can be used to check if one etag is present in a collection + of etags. + """ + + def __init__(self, strong_etags=None, weak_etags=None, star_tag=False): + self._strong = frozenset(not star_tag and strong_etags or ()) + self._weak = frozenset(weak_etags or ()) + self.star_tag = star_tag + + def as_set(self, include_weak=False): + """Convert the `ETags` object into a python set. Per default all the + weak etags are not part of this set.""" + rv = set(self._strong) + if include_weak: + rv.update(self._weak) + return rv + + def is_weak(self, etag): + """Check if an etag is weak.""" + return etag in self._weak + + def is_strong(self, etag): + """Check if an etag is strong.""" + return etag in self._strong + + def contains_weak(self, etag): + """Check if an etag is part of the set including weak and strong tags.""" + return self.is_weak(etag) or self.contains(etag) + + def contains(self, etag): + """Check if an etag is part of the set ignoring weak tags. + It is also possible to use the ``in`` operator. + """ + if self.star_tag: + return True + return self.is_strong(etag) + + def contains_raw(self, etag): + """When passed a quoted tag it will check if this tag is part of the + set. If the tag is weak it is checked against weak and strong tags, + otherwise strong only.""" + etag, weak = unquote_etag(etag) + if weak: + return self.contains_weak(etag) + return self.contains(etag) + + def to_header(self): + """Convert the etags set into a HTTP header string.""" + if self.star_tag: + return "*" + return ", ".join( + ['"%s"' % x for x in self._strong] + ['W/"%s"' % x for x in self._weak] + ) + + def __call__(self, etag=None, data=None, include_weak=False): + if [etag, data].count(None) != 1: + raise TypeError("either tag or data required, but at least one") + if etag is None: + etag = generate_etag(data) + if include_weak: + if etag in self._weak: + return True + return etag in self._strong + + def __bool__(self): + return bool(self.star_tag or self._strong or self._weak) + + __nonzero__ = __bool__ + + def __str__(self): + return self.to_header() + + def __iter__(self): + return iter(self._strong) + + def __contains__(self, etag): + return self.contains(etag) + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class IfRange(object): + """Very simple object that represents the `If-Range` header in parsed + form. It will either have neither a etag or date or one of either but + never both. + + .. versionadded:: 0.7 + """ + + def __init__(self, etag=None, date=None): + #: The etag parsed and unquoted. Ranges always operate on strong + #: etags so the weakness information is not necessary. + self.etag = etag + #: The date in parsed format or `None`. + self.date = date + + def to_header(self): + """Converts the object back into an HTTP header.""" + if self.date is not None: + return http_date(self.date) + if self.etag is not None: + return quote_etag(self.etag) + return "" + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class Range(object): + """Represents a ``Range`` header. All methods only support only + bytes as the unit. Stores a list of ranges if given, but the methods + only work if only one range is provided. + + :raise ValueError: If the ranges provided are invalid. + + .. versionchanged:: 0.15 + The ranges passed in are validated. + + .. versionadded:: 0.7 + """ + + def __init__(self, units, ranges): + #: The units of this range. Usually "bytes". + self.units = units + #: A list of ``(begin, end)`` tuples for the range header provided. + #: The ranges are non-inclusive. + self.ranges = ranges + + for start, end in ranges: + if start is None or (end is not None and (start < 0 or start >= end)): + raise ValueError("{} is not a valid range.".format((start, end))) + + def range_for_length(self, length): + """If the range is for bytes, the length is not None and there is + exactly one range and it is satisfiable it returns a ``(start, stop)`` + tuple, otherwise `None`. + """ + if self.units != "bytes" or length is None or len(self.ranges) != 1: + return None + start, end = self.ranges[0] + if end is None: + end = length + if start < 0: + start += length + if is_byte_range_valid(start, end, length): + return start, min(end, length) + + def make_content_range(self, length): + """Creates a :class:`~werkzeug.datastructures.ContentRange` object + from the current range and given content length. + """ + rng = self.range_for_length(length) + if rng is not None: + return ContentRange(self.units, rng[0], rng[1], length) + + def to_header(self): + """Converts the object back into an HTTP header.""" + ranges = [] + for begin, end in self.ranges: + if end is None: + ranges.append("%s-" % begin if begin >= 0 else str(begin)) + else: + ranges.append("%s-%s" % (begin, end - 1)) + return "%s=%s" % (self.units, ",".join(ranges)) + + def to_content_range_header(self, length): + """Converts the object into `Content-Range` HTTP header, + based on given length + """ + range_for_length = self.range_for_length(length) + if range_for_length is not None: + return "%s %d-%d/%d" % ( + self.units, + range_for_length[0], + range_for_length[1] - 1, + length, + ) + return None + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class ContentRange(object): + """Represents the content range header. + + .. versionadded:: 0.7 + """ + + def __init__(self, units, start, stop, length=None, on_update=None): + assert is_byte_range_valid(start, stop, length), "Bad range provided" + self.on_update = on_update + self.set(start, stop, length, units) + + def _callback_property(name): # noqa: B902 + def fget(self): + return getattr(self, name) + + def fset(self, value): + setattr(self, name, value) + if self.on_update is not None: + self.on_update(self) + + return property(fget, fset) + + #: The units to use, usually "bytes" + units = _callback_property("_units") + #: The start point of the range or `None`. + start = _callback_property("_start") + #: The stop point of the range (non-inclusive) or `None`. Can only be + #: `None` if also start is `None`. + stop = _callback_property("_stop") + #: The length of the range or `None`. + length = _callback_property("_length") + del _callback_property + + def set(self, start, stop, length=None, units="bytes"): + """Simple method to update the ranges.""" + assert is_byte_range_valid(start, stop, length), "Bad range provided" + self._units = units + self._start = start + self._stop = stop + self._length = length + if self.on_update is not None: + self.on_update(self) + + def unset(self): + """Sets the units to `None` which indicates that the header should + no longer be used. + """ + self.set(None, None, units=None) + + def to_header(self): + if self.units is None: + return "" + if self.length is None: + length = "*" + else: + length = self.length + if self.start is None: + return "%s */%s" % (self.units, length) + return "%s %s-%s/%s" % (self.units, self.start, self.stop - 1, length) + + def __nonzero__(self): + return self.units is not None + + __bool__ = __nonzero__ + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, str(self)) + + +class Authorization(ImmutableDictMixin, dict): + """Represents an `Authorization` header sent by the client. You should + not create this kind of object yourself but use it when it's returned by + the `parse_authorization_header` function. + + This object is a dict subclass and can be altered by setting dict items + but it should be considered immutable as it's returned by the client and + not meant for modifications. + + .. versionchanged:: 0.5 + This object became immutable. + """ + + def __init__(self, auth_type, data=None): + dict.__init__(self, data or {}) + self.type = auth_type + + username = property( + lambda self: self.get("username"), + doc=""" + The username transmitted. This is set for both basic and digest + auth all the time.""", + ) + password = property( + lambda self: self.get("password"), + doc=""" + When the authentication type is basic this is the password + transmitted by the client, else `None`.""", + ) + realm = property( + lambda self: self.get("realm"), + doc=""" + This is the server realm sent back for HTTP digest auth.""", + ) + nonce = property( + lambda self: self.get("nonce"), + doc=""" + The nonce the server sent for digest auth, sent back by the client. + A nonce should be unique for every 401 response for HTTP digest + auth.""", + ) + uri = property( + lambda self: self.get("uri"), + doc=""" + The URI from Request-URI of the Request-Line; duplicated because + proxies are allowed to change the Request-Line in transit. HTTP + digest auth only.""", + ) + nc = property( + lambda self: self.get("nc"), + doc=""" + The nonce count value transmitted by clients if a qop-header is + also transmitted. HTTP digest auth only.""", + ) + cnonce = property( + lambda self: self.get("cnonce"), + doc=""" + If the server sent a qop-header in the ``WWW-Authenticate`` + header, the client has to provide this value for HTTP digest auth. + See the RFC for more details.""", + ) + response = property( + lambda self: self.get("response"), + doc=""" + A string of 32 hex digits computed as defined in RFC 2617, which + proves that the user knows a password. Digest auth only.""", + ) + opaque = property( + lambda self: self.get("opaque"), + doc=""" + The opaque header from the server returned unchanged by the client. + It is recommended that this string be base64 or hexadecimal data. + Digest auth only.""", + ) + qop = property( + lambda self: self.get("qop"), + doc=""" + Indicates what "quality of protection" the client has applied to + the message for HTTP digest auth. Note that this is a single token, + not a quoted list of alternatives as in WWW-Authenticate.""", + ) + + +class WWWAuthenticate(UpdateDictMixin, dict): + """Provides simple access to `WWW-Authenticate` headers.""" + + #: list of keys that require quoting in the generated header + _require_quoting = frozenset(["domain", "nonce", "opaque", "realm", "qop"]) + + def __init__(self, auth_type=None, values=None, on_update=None): + dict.__init__(self, values or ()) + if auth_type: + self["__auth_type__"] = auth_type + self.on_update = on_update + + def set_basic(self, realm="authentication required"): + """Clear the auth info and enable basic auth.""" + dict.clear(self) + dict.update(self, {"__auth_type__": "basic", "realm": realm}) + if self.on_update: + self.on_update(self) + + def set_digest( + self, realm, nonce, qop=("auth",), opaque=None, algorithm=None, stale=False + ): + """Clear the auth info and enable digest auth.""" + d = { + "__auth_type__": "digest", + "realm": realm, + "nonce": nonce, + "qop": dump_header(qop), + } + if stale: + d["stale"] = "TRUE" + if opaque is not None: + d["opaque"] = opaque + if algorithm is not None: + d["algorithm"] = algorithm + dict.clear(self) + dict.update(self, d) + if self.on_update: + self.on_update(self) + + def to_header(self): + """Convert the stored values into a WWW-Authenticate header.""" + d = dict(self) + auth_type = d.pop("__auth_type__", None) or "basic" + return "%s %s" % ( + auth_type.title(), + ", ".join( + [ + "%s=%s" + % ( + key, + quote_header_value( + value, allow_token=key not in self._require_quoting + ), + ) + for key, value in iteritems(d) + ] + ), + ) + + def __str__(self): + return self.to_header() + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.to_header()) + + def auth_property(name, doc=None): # noqa: B902 + """A static helper function for subclasses to add extra authentication + system properties onto a class:: + + class FooAuthenticate(WWWAuthenticate): + special_realm = auth_property('special_realm') + + For more information have a look at the sourcecode to see how the + regular properties (:attr:`realm` etc.) are implemented. + """ + + def _set_value(self, value): + if value is None: + self.pop(name, None) + else: + self[name] = str(value) + + return property(lambda x: x.get(name), _set_value, doc=doc) + + def _set_property(name, doc=None): # noqa: B902 + def fget(self): + def on_update(header_set): + if not header_set and name in self: + del self[name] + elif header_set: + self[name] = header_set.to_header() + + return parse_set_header(self.get(name), on_update) + + return property(fget, doc=doc) + + type = auth_property( + "__auth_type__", + doc="""The type of the auth mechanism. HTTP currently specifies + ``Basic`` and ``Digest``.""", + ) + realm = auth_property( + "realm", + doc="""A string to be displayed to users so they know which + username and password to use. This string should contain at + least the name of the host performing the authentication and + might additionally indicate the collection of users who might + have access.""", + ) + domain = _set_property( + "domain", + doc="""A list of URIs that define the protection space. If a URI + is an absolute path, it is relative to the canonical root URL of + the server being accessed.""", + ) + nonce = auth_property( + "nonce", + doc=""" + A server-specified data string which should be uniquely generated + each time a 401 response is made. It is recommended that this + string be base64 or hexadecimal data.""", + ) + opaque = auth_property( + "opaque", + doc="""A string of data, specified by the server, which should + be returned by the client unchanged in the Authorization header + of subsequent requests with URIs in the same protection space. + It is recommended that this string be base64 or hexadecimal + data.""", + ) + algorithm = auth_property( + "algorithm", + doc="""A string indicating a pair of algorithms used to produce + the digest and a checksum. If this is not present it is assumed + to be "MD5". If the algorithm is not understood, the challenge + should be ignored (and a different one used, if there is more + than one).""", + ) + qop = _set_property( + "qop", + doc="""A set of quality-of-privacy directives such as auth and + auth-int.""", + ) + + @property + def stale(self): + """A flag, indicating that the previous request from the client + was rejected because the nonce value was stale. + """ + val = self.get("stale") + if val is not None: + return val.lower() == "true" + + @stale.setter + def stale(self, value): + if value is None: + self.pop("stale", None) + else: + self["stale"] = "TRUE" if value else "FALSE" + + auth_property = staticmethod(auth_property) + del _set_property + + +class FileStorage(object): + """The :class:`FileStorage` class is a thin wrapper over incoming files. + It is used by the request object to represent uploaded files. All the + attributes of the wrapper stream are proxied by the file storage so + it's possible to do ``storage.read()`` instead of the long form + ``storage.stream.read()``. + """ + + def __init__( + self, + stream=None, + filename=None, + name=None, + content_type=None, + content_length=None, + headers=None, + ): + self.name = name + self.stream = stream or BytesIO() + + # if no filename is provided we can attempt to get the filename + # from the stream object passed. There we have to be careful to + # skip things like <fdopen>, <stderr> etc. Python marks these + # special filenames with angular brackets. + if filename is None: + filename = getattr(stream, "name", None) + s = make_literal_wrapper(filename) + if filename and filename[0] == s("<") and filename[-1] == s(">"): + filename = None + + # On Python 3 we want to make sure the filename is always unicode. + # This might not be if the name attribute is bytes due to the + # file being opened from the bytes API. + if not PY2 and isinstance(filename, bytes): + filename = filename.decode(get_filesystem_encoding(), "replace") + + self.filename = filename + if headers is None: + headers = Headers() + self.headers = headers + if content_type is not None: + headers["Content-Type"] = content_type + if content_length is not None: + headers["Content-Length"] = str(content_length) + + def _parse_content_type(self): + if not hasattr(self, "_parsed_content_type"): + self._parsed_content_type = parse_options_header(self.content_type) + + @property + def content_type(self): + """The content-type sent in the header. Usually not available""" + return self.headers.get("content-type") + + @property + def content_length(self): + """The content-length sent in the header. Usually not available""" + return int(self.headers.get("content-length") or 0) + + @property + def mimetype(self): + """Like :attr:`content_type`, but without parameters (eg, without + charset, type etc.) and always lowercase. For example if the content + type is ``text/HTML; charset=utf-8`` the mimetype would be + ``'text/html'``. + + .. versionadded:: 0.7 + """ + self._parse_content_type() + return self._parsed_content_type[0].lower() + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the content + type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.7 + """ + self._parse_content_type() + return self._parsed_content_type[1] + + def save(self, dst, buffer_size=16384): + """Save the file to a destination path or file object. If the + destination is a file object you have to close it yourself after the + call. The buffer size is the number of bytes held in memory during + the copy process. It defaults to 16KB. + + For secure file saving also have a look at :func:`secure_filename`. + + :param dst: a filename or open file object the uploaded file + is saved to. + :param buffer_size: the size of the buffer. This works the same as + the `length` parameter of + :func:`shutil.copyfileobj`. + """ + from shutil import copyfileobj + + close_dst = False + if isinstance(dst, string_types): + dst = open(dst, "wb") + close_dst = True + try: + copyfileobj(self.stream, dst, buffer_size) + finally: + if close_dst: + dst.close() + + def close(self): + """Close the underlying file if possible.""" + try: + self.stream.close() + except Exception: + pass + + def __nonzero__(self): + return bool(self.filename) + + __bool__ = __nonzero__ + + def __getattr__(self, name): + try: + return getattr(self.stream, name) + except AttributeError: + # SpooledTemporaryFile doesn't implement IOBase, get the + # attribute from its backing file instead. + # https://github.com/python/cpython/pull/3249 + if hasattr(self.stream, "_file"): + return getattr(self.stream._file, name) + raise + + def __iter__(self): + return iter(self.stream) + + def __repr__(self): + return "<%s: %r (%r)>" % ( + self.__class__.__name__, + self.filename, + self.content_type, + ) + + +# circular dependencies +from . import exceptions +from .http import dump_header +from .http import dump_options_header +from .http import generate_etag +from .http import http_date +from .http import is_byte_range_valid +from .http import parse_options_header +from .http import parse_set_header +from .http import quote_etag +from .http import quote_header_value +from .http import unquote_etag diff --git a/python/werkzeug/exceptions.py b/python/werkzeug/exceptions.py new file mode 100644 index 0000000..fb6528d --- /dev/null +++ b/python/werkzeug/exceptions.py @@ -0,0 +1,774 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.exceptions + ~~~~~~~~~~~~~~~~~~~ + + This module implements a number of Python exceptions you can raise from + within your views to trigger a standard non-200 response. + + + Usage Example + ------------- + + :: + + from werkzeug.wrappers import BaseRequest + from werkzeug.wsgi import responder + from werkzeug.exceptions import HTTPException, NotFound + + def view(request): + raise NotFound() + + @responder + def application(environ, start_response): + request = BaseRequest(environ) + try: + return view(request) + except HTTPException as e: + return e + + + As you can see from this example those exceptions are callable WSGI + applications. Because of Python 2.4 compatibility those do not extend + from the response objects but only from the python exception class. + + As a matter of fact they are not Werkzeug response objects. However you + can get a response object by calling ``get_response()`` on a HTTP + exception. + + Keep in mind that you have to pass an environment to ``get_response()`` + because some errors fetch additional information from the WSGI + environment. + + If you want to hook in a different exception page to say, a 404 status + code, you can add a second except for a specific subclass of an error:: + + @responder + def application(environ, start_response): + request = BaseRequest(environ) + try: + return view(request) + except NotFound, e: + return not_found(request) + except HTTPException, e: + return e + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import sys + +import werkzeug + +# Because of bootstrapping reasons we need to manually patch ourselves +# onto our parent module. +werkzeug.exceptions = sys.modules[__name__] + +from ._compat import implements_to_string +from ._compat import integer_types +from ._compat import iteritems +from ._compat import text_type +from ._internal import _get_environ +from .wrappers import Response + + +@implements_to_string +class HTTPException(Exception): + """Baseclass for all HTTP exceptions. This exception can be called as WSGI + application to render a default error page or you can catch the subclasses + of it independently and render nicer error messages. + """ + + code = None + description = None + + def __init__(self, description=None, response=None): + super(Exception, self).__init__() + if description is not None: + self.description = description + self.response = response + + @classmethod + def wrap(cls, exception, name=None): + """Create an exception that is a subclass of the calling HTTP + exception and the ``exception`` argument. + + The first argument to the class will be passed to the + wrapped ``exception``, the rest to the HTTP exception. If + ``self.args`` is not empty, the wrapped exception message is + added to the HTTP exception description. + + .. versionchanged:: 0.15 + The description includes the wrapped exception message. + """ + + class newcls(cls, exception): + def __init__(self, arg=None, *args, **kwargs): + super(cls, self).__init__(*args, **kwargs) + + if arg is None: + exception.__init__(self) + else: + exception.__init__(self, arg) + + def get_description(self, environ=None): + out = super(cls, self).get_description(environ=environ) + + if self.args: + out += "<p><pre><code>{}: {}</code></pre></p>".format( + exception.__name__, escape(exception.__str__(self)) + ) + + return out + + newcls.__module__ = sys._getframe(1).f_globals.get("__name__") + newcls.__name__ = name or cls.__name__ + exception.__name__ + return newcls + + @property + def name(self): + """The status name.""" + return HTTP_STATUS_CODES.get(self.code, "Unknown Error") + + def get_description(self, environ=None): + """Get the description.""" + return u"<p>%s</p>" % escape(self.description) + + def get_body(self, environ=None): + """Get the HTML body.""" + return text_type( + ( + u'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n' + u"<title>%(code)s %(name)s</title>\n" + u"<h1>%(name)s</h1>\n" + u"%(description)s\n" + ) + % { + "code": self.code, + "name": escape(self.name), + "description": self.get_description(environ), + } + ) + + def get_headers(self, environ=None): + """Get a list of headers.""" + return [("Content-Type", "text/html")] + + def get_response(self, environ=None): + """Get a response object. If one was passed to the exception + it's returned directly. + + :param environ: the optional environ for the request. This + can be used to modify the response depending + on how the request looked like. + :return: a :class:`Response` object or a subclass thereof. + """ + if self.response is not None: + return self.response + if environ is not None: + environ = _get_environ(environ) + headers = self.get_headers(environ) + return Response(self.get_body(environ), self.code, headers) + + def __call__(self, environ, start_response): + """Call the exception as WSGI application. + + :param environ: the WSGI environment. + :param start_response: the response callable provided by the WSGI + server. + """ + response = self.get_response(environ) + return response(environ, start_response) + + def __str__(self): + code = self.code if self.code is not None else "???" + return "%s %s: %s" % (code, self.name, self.description) + + def __repr__(self): + code = self.code if self.code is not None else "???" + return "<%s '%s: %s'>" % (self.__class__.__name__, code, self.name) + + +class BadRequest(HTTPException): + """*400* `Bad Request` + + Raise if the browser sends something to the application the application + or server cannot handle. + """ + + code = 400 + description = ( + "The browser (or proxy) sent a request that this server could " + "not understand." + ) + + +class ClientDisconnected(BadRequest): + """Internal exception that is raised if Werkzeug detects a disconnected + client. Since the client is already gone at that point attempting to + send the error message to the client might not work and might ultimately + result in another exception in the server. Mainly this is here so that + it is silenced by default as far as Werkzeug is concerned. + + Since disconnections cannot be reliably detected and are unspecified + by WSGI to a large extent this might or might not be raised if a client + is gone. + + .. versionadded:: 0.8 + """ + + +class SecurityError(BadRequest): + """Raised if something triggers a security error. This is otherwise + exactly like a bad request error. + + .. versionadded:: 0.9 + """ + + +class BadHost(BadRequest): + """Raised if the submitted host is badly formatted. + + .. versionadded:: 0.11.2 + """ + + +class Unauthorized(HTTPException): + """*401* ``Unauthorized`` + + Raise if the user is not authorized to access a resource. + + The ``www_authenticate`` argument should be used to set the + ``WWW-Authenticate`` header. This is used for HTTP basic auth and + other schemes. Use :class:`~werkzeug.datastructures.WWWAuthenticate` + to create correctly formatted values. Strictly speaking a 401 + response is invalid if it doesn't provide at least one value for + this header, although real clients typically don't care. + + :param description: Override the default message used for the body + of the response. + :param www-authenticate: A single value, or list of values, for the + WWW-Authenticate header. + + .. versionchanged:: 0.15.3 + If the ``www_authenticate`` argument is not set, the + ``WWW-Authenticate`` header is not set. + + .. versionchanged:: 0.15.3 + The ``response`` argument was restored. + + .. versionchanged:: 0.15.1 + ``description`` was moved back as the first argument, restoring + its previous position. + + .. versionchanged:: 0.15.0 + ``www_authenticate`` was added as the first argument, ahead of + ``description``. + """ + + code = 401 + description = ( + "The server could not verify that you are authorized to access" + " the URL requested. You either supplied the wrong credentials" + " (e.g. a bad password), or your browser doesn't understand" + " how to supply the credentials required." + ) + + def __init__(self, description=None, response=None, www_authenticate=None): + HTTPException.__init__(self, description, response) + + if www_authenticate is not None: + if not isinstance(www_authenticate, (tuple, list)): + www_authenticate = (www_authenticate,) + + self.www_authenticate = www_authenticate + + def get_headers(self, environ=None): + headers = HTTPException.get_headers(self, environ) + if self.www_authenticate: + headers.append( + ("WWW-Authenticate", ", ".join([str(x) for x in self.www_authenticate])) + ) + return headers + + +class Forbidden(HTTPException): + """*403* `Forbidden` + + Raise if the user doesn't have the permission for the requested resource + but was authenticated. + """ + + code = 403 + description = ( + "You don't have the permission to access the requested" + " resource. It is either read-protected or not readable by the" + " server." + ) + + +class NotFound(HTTPException): + """*404* `Not Found` + + Raise if a resource does not exist and never existed. + """ + + code = 404 + description = ( + "The requested URL was not found on the server. If you entered" + " the URL manually please check your spelling and try again." + ) + + +class MethodNotAllowed(HTTPException): + """*405* `Method Not Allowed` + + Raise if the server used a method the resource does not handle. For + example `POST` if the resource is view only. Especially useful for REST. + + The first argument for this exception should be a list of allowed methods. + Strictly speaking the response would be invalid if you don't provide valid + methods in the header which you can do with that list. + """ + + code = 405 + description = "The method is not allowed for the requested URL." + + def __init__(self, valid_methods=None, description=None): + """Takes an optional list of valid http methods + starting with werkzeug 0.3 the list will be mandatory.""" + HTTPException.__init__(self, description) + self.valid_methods = valid_methods + + def get_headers(self, environ=None): + headers = HTTPException.get_headers(self, environ) + if self.valid_methods: + headers.append(("Allow", ", ".join(self.valid_methods))) + return headers + + +class NotAcceptable(HTTPException): + """*406* `Not Acceptable` + + Raise if the server can't return any content conforming to the + `Accept` headers of the client. + """ + + code = 406 + + description = ( + "The resource identified by the request is only capable of" + " generating response entities which have content" + " characteristics not acceptable according to the accept" + " headers sent in the request." + ) + + +class RequestTimeout(HTTPException): + """*408* `Request Timeout` + + Raise to signalize a timeout. + """ + + code = 408 + description = ( + "The server closed the network connection because the browser" + " didn't finish the request within the specified time." + ) + + +class Conflict(HTTPException): + """*409* `Conflict` + + Raise to signal that a request cannot be completed because it conflicts + with the current state on the server. + + .. versionadded:: 0.7 + """ + + code = 409 + description = ( + "A conflict happened while processing the request. The" + " resource might have been modified while the request was being" + " processed." + ) + + +class Gone(HTTPException): + """*410* `Gone` + + Raise if a resource existed previously and went away without new location. + """ + + code = 410 + description = ( + "The requested URL is no longer available on this server and" + " there is no forwarding address. If you followed a link from a" + " foreign page, please contact the author of this page." + ) + + +class LengthRequired(HTTPException): + """*411* `Length Required` + + Raise if the browser submitted data but no ``Content-Length`` header which + is required for the kind of processing the server does. + """ + + code = 411 + description = ( + "A request with this method requires a valid <code>Content-" + "Length</code> header." + ) + + +class PreconditionFailed(HTTPException): + """*412* `Precondition Failed` + + Status code used in combination with ``If-Match``, ``If-None-Match``, or + ``If-Unmodified-Since``. + """ + + code = 412 + description = ( + "The precondition on the request for the URL failed positive evaluation." + ) + + +class RequestEntityTooLarge(HTTPException): + """*413* `Request Entity Too Large` + + The status code one should return if the data submitted exceeded a given + limit. + """ + + code = 413 + description = "The data value transmitted exceeds the capacity limit." + + +class RequestURITooLarge(HTTPException): + """*414* `Request URI Too Large` + + Like *413* but for too long URLs. + """ + + code = 414 + description = ( + "The length of the requested URL exceeds the capacity limit for" + " this server. The request cannot be processed." + ) + + +class UnsupportedMediaType(HTTPException): + """*415* `Unsupported Media Type` + + The status code returned if the server is unable to handle the media type + the client transmitted. + """ + + code = 415 + description = ( + "The server does not support the media type transmitted in the request." + ) + + +class RequestedRangeNotSatisfiable(HTTPException): + """*416* `Requested Range Not Satisfiable` + + The client asked for an invalid part of the file. + + .. versionadded:: 0.7 + """ + + code = 416 + description = "The server cannot provide the requested range." + + def __init__(self, length=None, units="bytes", description=None): + """Takes an optional `Content-Range` header value based on ``length`` + parameter. + """ + HTTPException.__init__(self, description) + self.length = length + self.units = units + + def get_headers(self, environ=None): + headers = HTTPException.get_headers(self, environ) + if self.length is not None: + headers.append(("Content-Range", "%s */%d" % (self.units, self.length))) + return headers + + +class ExpectationFailed(HTTPException): + """*417* `Expectation Failed` + + The server cannot meet the requirements of the Expect request-header. + + .. versionadded:: 0.7 + """ + + code = 417 + description = "The server could not meet the requirements of the Expect header" + + +class ImATeapot(HTTPException): + """*418* `I'm a teapot` + + The server should return this if it is a teapot and someone attempted + to brew coffee with it. + + .. versionadded:: 0.7 + """ + + code = 418 + description = "This server is a teapot, not a coffee machine" + + +class UnprocessableEntity(HTTPException): + """*422* `Unprocessable Entity` + + Used if the request is well formed, but the instructions are otherwise + incorrect. + """ + + code = 422 + description = ( + "The request was well-formed but was unable to be followed due" + " to semantic errors." + ) + + +class Locked(HTTPException): + """*423* `Locked` + + Used if the resource that is being accessed is locked. + """ + + code = 423 + description = "The resource that is being accessed is locked." + + +class FailedDependency(HTTPException): + """*424* `Failed Dependency` + + Used if the method could not be performed on the resource + because the requested action depended on another action and that action failed. + """ + + code = 424 + description = ( + "The method could not be performed on the resource because the" + " requested action depended on another action and that action" + " failed." + ) + + +class PreconditionRequired(HTTPException): + """*428* `Precondition Required` + + The server requires this request to be conditional, typically to prevent + the lost update problem, which is a race condition between two or more + clients attempting to update a resource through PUT or DELETE. By requiring + each client to include a conditional header ("If-Match" or "If-Unmodified- + Since") with the proper value retained from a recent GET request, the + server ensures that each client has at least seen the previous revision of + the resource. + """ + + code = 428 + description = ( + "This request is required to be conditional; try using" + ' "If-Match" or "If-Unmodified-Since".' + ) + + +class TooManyRequests(HTTPException): + """*429* `Too Many Requests` + + The server is limiting the rate at which this user receives responses, and + this request exceeds that rate. (The server may use any convenient method + to identify users and their request rates). The server may include a + "Retry-After" header to indicate how long the user should wait before + retrying. + """ + + code = 429 + description = "This user has exceeded an allotted request count. Try again later." + + +class RequestHeaderFieldsTooLarge(HTTPException): + """*431* `Request Header Fields Too Large` + + The server refuses to process the request because the header fields are too + large. One or more individual fields may be too large, or the set of all + headers is too large. + """ + + code = 431 + description = "One or more header fields exceeds the maximum size." + + +class UnavailableForLegalReasons(HTTPException): + """*451* `Unavailable For Legal Reasons` + + This status code indicates that the server is denying access to the + resource as a consequence of a legal demand. + """ + + code = 451 + description = "Unavailable for legal reasons." + + +class InternalServerError(HTTPException): + """*500* `Internal Server Error` + + Raise if an internal server error occurred. This is a good fallback if an + unknown error occurred in the dispatcher. + """ + + code = 500 + description = ( + "The server encountered an internal error and was unable to" + " complete your request. Either the server is overloaded or" + " there is an error in the application." + ) + + +class NotImplemented(HTTPException): + """*501* `Not Implemented` + + Raise if the application does not support the action requested by the + browser. + """ + + code = 501 + description = "The server does not support the action requested by the browser." + + +class BadGateway(HTTPException): + """*502* `Bad Gateway` + + If you do proxying in your application you should return this status code + if you received an invalid response from the upstream server it accessed + in attempting to fulfill the request. + """ + + code = 502 + description = ( + "The proxy server received an invalid response from an upstream server." + ) + + +class ServiceUnavailable(HTTPException): + """*503* `Service Unavailable` + + Status code you should return if a service is temporarily unavailable. + """ + + code = 503 + description = ( + "The server is temporarily unable to service your request due" + " to maintenance downtime or capacity problems. Please try" + " again later." + ) + + +class GatewayTimeout(HTTPException): + """*504* `Gateway Timeout` + + Status code you should return if a connection to an upstream server + times out. + """ + + code = 504 + description = "The connection to an upstream server timed out." + + +class HTTPVersionNotSupported(HTTPException): + """*505* `HTTP Version Not Supported` + + The server does not support the HTTP protocol version used in the request. + """ + + code = 505 + description = ( + "The server does not support the HTTP protocol version used in the request." + ) + + +default_exceptions = {} +__all__ = ["HTTPException"] + + +def _find_exceptions(): + for _name, obj in iteritems(globals()): + try: + is_http_exception = issubclass(obj, HTTPException) + except TypeError: + is_http_exception = False + if not is_http_exception or obj.code is None: + continue + __all__.append(obj.__name__) + old_obj = default_exceptions.get(obj.code, None) + if old_obj is not None and issubclass(obj, old_obj): + continue + default_exceptions[obj.code] = obj + + +_find_exceptions() +del _find_exceptions + + +class Aborter(object): + """When passed a dict of code -> exception items it can be used as + callable that raises exceptions. If the first argument to the + callable is an integer it will be looked up in the mapping, if it's + a WSGI application it will be raised in a proxy exception. + + The rest of the arguments are forwarded to the exception constructor. + """ + + def __init__(self, mapping=None, extra=None): + if mapping is None: + mapping = default_exceptions + self.mapping = dict(mapping) + if extra is not None: + self.mapping.update(extra) + + def __call__(self, code, *args, **kwargs): + if not args and not kwargs and not isinstance(code, integer_types): + raise HTTPException(response=code) + if code not in self.mapping: + raise LookupError("no exception for %r" % code) + raise self.mapping[code](*args, **kwargs) + + +def abort(status, *args, **kwargs): + """Raises an :py:exc:`HTTPException` for the given status code or WSGI + application:: + + abort(404) # 404 Not Found + abort(Response('Hello World')) + + Can be passed a WSGI application or a status code. If a status code is + given it's looked up in the list of exceptions and will raise that + exception, if passed a WSGI application it will wrap it in a proxy WSGI + exception and raise that:: + + abort(404) + abort(Response('Hello World')) + + """ + return _aborter(status, *args, **kwargs) + + +_aborter = Aborter() + + +#: an exception that is used internally to signal both a key error and a +#: bad request. Used by a lot of the datastructures. +BadRequestKeyError = BadRequest.wrap(KeyError) + +# imported here because of circular dependencies of werkzeug.utils +from .http import HTTP_STATUS_CODES +from .utils import escape diff --git a/python/werkzeug/filesystem.py b/python/werkzeug/filesystem.py new file mode 100644 index 0000000..d016cae --- /dev/null +++ b/python/werkzeug/filesystem.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.filesystem + ~~~~~~~~~~~~~~~~~~~ + + Various utilities for the local filesystem. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import sys +import warnings + +# We do not trust traditional unixes. +has_likely_buggy_unicode_filesystem = ( + sys.platform.startswith("linux") or "bsd" in sys.platform +) + + +def _is_ascii_encoding(encoding): + """Given an encoding this figures out if the encoding is actually ASCII (which + is something we don't actually want in most cases). This is necessary + because ASCII comes under many names such as ANSI_X3.4-1968. + """ + if encoding is None: + return False + try: + return codecs.lookup(encoding).name == "ascii" + except LookupError: + return False + + +class BrokenFilesystemWarning(RuntimeWarning, UnicodeWarning): + """The warning used by Werkzeug to signal a broken filesystem. Will only be + used once per runtime.""" + + +_warned_about_filesystem_encoding = False + + +def get_filesystem_encoding(): + """Returns the filesystem encoding that should be used. Note that this is + different from the Python understanding of the filesystem encoding which + might be deeply flawed. Do not use this value against Python's unicode APIs + because it might be different. See :ref:`filesystem-encoding` for the exact + behavior. + + The concept of a filesystem encoding in generally is not something you + should rely on. As such if you ever need to use this function except for + writing wrapper code reconsider. + """ + global _warned_about_filesystem_encoding + rv = sys.getfilesystemencoding() + if has_likely_buggy_unicode_filesystem and not rv or _is_ascii_encoding(rv): + if not _warned_about_filesystem_encoding: + warnings.warn( + "Detected a misconfigured UNIX filesystem: Will use" + " UTF-8 as filesystem encoding instead of {0!r}".format(rv), + BrokenFilesystemWarning, + ) + _warned_about_filesystem_encoding = True + return "utf-8" + return rv diff --git a/python/werkzeug/formparser.py b/python/werkzeug/formparser.py new file mode 100644 index 0000000..0ddc5c8 --- /dev/null +++ b/python/werkzeug/formparser.py @@ -0,0 +1,586 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.formparser + ~~~~~~~~~~~~~~~~~~~ + + This module implements the form parsing. It supports url-encoded forms + as well as non-nested multipart uploads. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import re +from functools import update_wrapper +from itertools import chain +from itertools import repeat +from itertools import tee + +from ._compat import BytesIO +from ._compat import text_type +from ._compat import to_native +from .datastructures import FileStorage +from .datastructures import Headers +from .datastructures import MultiDict +from .http import parse_options_header +from .urls import url_decode_stream +from .wsgi import get_content_length +from .wsgi import get_input_stream +from .wsgi import make_line_iter + +# there are some platforms where SpooledTemporaryFile is not available. +# In that case we need to provide a fallback. +try: + from tempfile import SpooledTemporaryFile +except ImportError: + from tempfile import TemporaryFile + + SpooledTemporaryFile = None + + +#: an iterator that yields empty strings +_empty_string_iter = repeat("") + +#: a regular expression for multipart boundaries +_multipart_boundary_re = re.compile("^[ -~]{0,200}[!-~]$") + +#: supported http encodings that are also available in python we support +#: for multipart messages. +_supported_multipart_encodings = frozenset(["base64", "quoted-printable"]) + + +def default_stream_factory( + total_content_length, filename, content_type, content_length=None +): + """The stream factory that is used per default.""" + max_size = 1024 * 500 + if SpooledTemporaryFile is not None: + return SpooledTemporaryFile(max_size=max_size, mode="wb+") + if total_content_length is None or total_content_length > max_size: + return TemporaryFile("wb+") + return BytesIO() + + +def parse_form_data( + environ, + stream_factory=None, + charset="utf-8", + errors="replace", + max_form_memory_size=None, + max_content_length=None, + cls=None, + silent=True, +): + """Parse the form data in the environ and return it as tuple in the form + ``(stream, form, files)``. You should only call this method if the + transport method is `POST`, `PUT`, or `PATCH`. + + If the mimetype of the data transmitted is `multipart/form-data` the + files multidict will be filled with `FileStorage` objects. If the + mimetype is unknown the input stream is wrapped and returned as first + argument, else the stream is empty. + + This is a shortcut for the common usage of :class:`FormDataParser`. + + Have a look at :ref:`dealing-with-request-data` for more details. + + .. versionadded:: 0.5 + The `max_form_memory_size`, `max_content_length` and + `cls` parameters were added. + + .. versionadded:: 0.5.1 + The optional `silent` flag was added. + + :param environ: the WSGI environment to be used for parsing. + :param stream_factory: An optional callable that returns a new read and + writeable file descriptor. This callable works + the same as :meth:`~BaseResponse._get_file_stream`. + :param charset: The character set for URL and url encoded form data. + :param errors: The encoding error behavior. + :param max_form_memory_size: the maximum number of bytes to be accepted for + in-memory stored form data. If the data + exceeds the value specified an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param max_content_length: If this is provided and the transmitted data + is longer than this value an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param silent: If set to False parsing errors will not be caught. + :return: A tuple in the form ``(stream, form, files)``. + """ + return FormDataParser( + stream_factory, + charset, + errors, + max_form_memory_size, + max_content_length, + cls, + silent, + ).parse_from_environ(environ) + + +def exhaust_stream(f): + """Helper decorator for methods that exhausts the stream on return.""" + + def wrapper(self, stream, *args, **kwargs): + try: + return f(self, stream, *args, **kwargs) + finally: + exhaust = getattr(stream, "exhaust", None) + if exhaust is not None: + exhaust() + else: + while 1: + chunk = stream.read(1024 * 64) + if not chunk: + break + + return update_wrapper(wrapper, f) + + +class FormDataParser(object): + """This class implements parsing of form data for Werkzeug. By itself + it can parse multipart and url encoded form data. It can be subclassed + and extended but for most mimetypes it is a better idea to use the + untouched stream and expose it as separate attributes on a request + object. + + .. versionadded:: 0.8 + + :param stream_factory: An optional callable that returns a new read and + writeable file descriptor. This callable works + the same as :meth:`~BaseResponse._get_file_stream`. + :param charset: The character set for URL and url encoded form data. + :param errors: The encoding error behavior. + :param max_form_memory_size: the maximum number of bytes to be accepted for + in-memory stored form data. If the data + exceeds the value specified an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param max_content_length: If this is provided and the transmitted data + is longer than this value an + :exc:`~exceptions.RequestEntityTooLarge` + exception is raised. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param silent: If set to False parsing errors will not be caught. + """ + + def __init__( + self, + stream_factory=None, + charset="utf-8", + errors="replace", + max_form_memory_size=None, + max_content_length=None, + cls=None, + silent=True, + ): + if stream_factory is None: + stream_factory = default_stream_factory + self.stream_factory = stream_factory + self.charset = charset + self.errors = errors + self.max_form_memory_size = max_form_memory_size + self.max_content_length = max_content_length + if cls is None: + cls = MultiDict + self.cls = cls + self.silent = silent + + def get_parse_func(self, mimetype, options): + return self.parse_functions.get(mimetype) + + def parse_from_environ(self, environ): + """Parses the information from the environment as form data. + + :param environ: the WSGI environment to be used for parsing. + :return: A tuple in the form ``(stream, form, files)``. + """ + content_type = environ.get("CONTENT_TYPE", "") + content_length = get_content_length(environ) + mimetype, options = parse_options_header(content_type) + return self.parse(get_input_stream(environ), mimetype, content_length, options) + + def parse(self, stream, mimetype, content_length, options=None): + """Parses the information from the given stream, mimetype, + content length and mimetype parameters. + + :param stream: an input stream + :param mimetype: the mimetype of the data + :param content_length: the content length of the incoming data + :param options: optional mimetype parameters (used for + the multipart boundary for instance) + :return: A tuple in the form ``(stream, form, files)``. + """ + if ( + self.max_content_length is not None + and content_length is not None + and content_length > self.max_content_length + ): + raise exceptions.RequestEntityTooLarge() + if options is None: + options = {} + + parse_func = self.get_parse_func(mimetype, options) + if parse_func is not None: + try: + return parse_func(self, stream, mimetype, content_length, options) + except ValueError: + if not self.silent: + raise + + return stream, self.cls(), self.cls() + + @exhaust_stream + def _parse_multipart(self, stream, mimetype, content_length, options): + parser = MultiPartParser( + self.stream_factory, + self.charset, + self.errors, + max_form_memory_size=self.max_form_memory_size, + cls=self.cls, + ) + boundary = options.get("boundary") + if boundary is None: + raise ValueError("Missing boundary") + if isinstance(boundary, text_type): + boundary = boundary.encode("ascii") + form, files = parser.parse(stream, boundary, content_length) + return stream, form, files + + @exhaust_stream + def _parse_urlencoded(self, stream, mimetype, content_length, options): + if ( + self.max_form_memory_size is not None + and content_length is not None + and content_length > self.max_form_memory_size + ): + raise exceptions.RequestEntityTooLarge() + form = url_decode_stream(stream, self.charset, errors=self.errors, cls=self.cls) + return stream, form, self.cls() + + #: mapping of mimetypes to parsing functions + parse_functions = { + "multipart/form-data": _parse_multipart, + "application/x-www-form-urlencoded": _parse_urlencoded, + "application/x-url-encoded": _parse_urlencoded, + } + + +def is_valid_multipart_boundary(boundary): + """Checks if the string given is a valid multipart boundary.""" + return _multipart_boundary_re.match(boundary) is not None + + +def _line_parse(line): + """Removes line ending characters and returns a tuple (`stripped_line`, + `is_terminated`). + """ + if line[-2:] in ["\r\n", b"\r\n"]: + return line[:-2], True + elif line[-1:] in ["\r", "\n", b"\r", b"\n"]: + return line[:-1], True + return line, False + + +def parse_multipart_headers(iterable): + """Parses multipart headers from an iterable that yields lines (including + the trailing newline symbol). The iterable has to be newline terminated. + + The iterable will stop at the line where the headers ended so it can be + further consumed. + + :param iterable: iterable of strings that are newline terminated + """ + result = [] + for line in iterable: + line = to_native(line) + line, line_terminated = _line_parse(line) + if not line_terminated: + raise ValueError("unexpected end of line in multipart header") + if not line: + break + elif line[0] in " \t" and result: + key, value = result[-1] + result[-1] = (key, value + "\n " + line[1:]) + else: + parts = line.split(":", 1) + if len(parts) == 2: + result.append((parts[0].strip(), parts[1].strip())) + + # we link the list to the headers, no need to create a copy, the + # list was not shared anyways. + return Headers(result) + + +_begin_form = "begin_form" +_begin_file = "begin_file" +_cont = "cont" +_end = "end" + + +class MultiPartParser(object): + def __init__( + self, + stream_factory=None, + charset="utf-8", + errors="replace", + max_form_memory_size=None, + cls=None, + buffer_size=64 * 1024, + ): + self.charset = charset + self.errors = errors + self.max_form_memory_size = max_form_memory_size + self.stream_factory = ( + default_stream_factory if stream_factory is None else stream_factory + ) + self.cls = MultiDict if cls is None else cls + + # make sure the buffer size is divisible by four so that we can base64 + # decode chunk by chunk + assert buffer_size % 4 == 0, "buffer size has to be divisible by 4" + # also the buffer size has to be at least 1024 bytes long or long headers + # will freak out the system + assert buffer_size >= 1024, "buffer size has to be at least 1KB" + + self.buffer_size = buffer_size + + def _fix_ie_filename(self, filename): + """Internet Explorer 6 transmits the full file name if a file is + uploaded. This function strips the full path if it thinks the + filename is Windows-like absolute. + """ + if filename[1:3] == ":\\" or filename[:2] == "\\\\": + return filename.split("\\")[-1] + return filename + + def _find_terminator(self, iterator): + """The terminator might have some additional newlines before it. + There is at least one application that sends additional newlines + before headers (the python setuptools package). + """ + for line in iterator: + if not line: + break + line = line.strip() + if line: + return line + return b"" + + def fail(self, message): + raise ValueError(message) + + def get_part_encoding(self, headers): + transfer_encoding = headers.get("content-transfer-encoding") + if ( + transfer_encoding is not None + and transfer_encoding in _supported_multipart_encodings + ): + return transfer_encoding + + def get_part_charset(self, headers): + # Figure out input charset for current part + content_type = headers.get("content-type") + if content_type: + mimetype, ct_params = parse_options_header(content_type) + return ct_params.get("charset", self.charset) + return self.charset + + def start_file_streaming(self, filename, headers, total_content_length): + if isinstance(filename, bytes): + filename = filename.decode(self.charset, self.errors) + filename = self._fix_ie_filename(filename) + content_type = headers.get("content-type") + try: + content_length = int(headers["content-length"]) + except (KeyError, ValueError): + content_length = 0 + container = self.stream_factory( + total_content_length=total_content_length, + filename=filename, + content_type=content_type, + content_length=content_length, + ) + return filename, container + + def in_memory_threshold_reached(self, bytes): + raise exceptions.RequestEntityTooLarge() + + def validate_boundary(self, boundary): + if not boundary: + self.fail("Missing boundary") + if not is_valid_multipart_boundary(boundary): + self.fail("Invalid boundary: %s" % boundary) + if len(boundary) > self.buffer_size: # pragma: no cover + # this should never happen because we check for a minimum size + # of 1024 and boundaries may not be longer than 200. The only + # situation when this happens is for non debug builds where + # the assert is skipped. + self.fail("Boundary longer than buffer size") + + def parse_lines(self, file, boundary, content_length, cap_at_buffer=True): + """Generate parts of + ``('begin_form', (headers, name))`` + ``('begin_file', (headers, name, filename))`` + ``('cont', bytestring)`` + ``('end', None)`` + + Always obeys the grammar + parts = ( begin_form cont* end | + begin_file cont* end )* + """ + next_part = b"--" + boundary + last_part = next_part + b"--" + + iterator = chain( + make_line_iter( + file, + limit=content_length, + buffer_size=self.buffer_size, + cap_at_buffer=cap_at_buffer, + ), + _empty_string_iter, + ) + + terminator = self._find_terminator(iterator) + + if terminator == last_part: + return + elif terminator != next_part: + self.fail("Expected boundary at start of multipart data") + + while terminator != last_part: + headers = parse_multipart_headers(iterator) + + disposition = headers.get("content-disposition") + if disposition is None: + self.fail("Missing Content-Disposition header") + disposition, extra = parse_options_header(disposition) + transfer_encoding = self.get_part_encoding(headers) + name = extra.get("name") + filename = extra.get("filename") + + # if no content type is given we stream into memory. A list is + # used as a temporary container. + if filename is None: + yield _begin_form, (headers, name) + + # otherwise we parse the rest of the headers and ask the stream + # factory for something we can write in. + else: + yield _begin_file, (headers, name, filename) + + buf = b"" + for line in iterator: + if not line: + self.fail("unexpected end of stream") + + if line[:2] == b"--": + terminator = line.rstrip() + if terminator in (next_part, last_part): + break + + if transfer_encoding is not None: + if transfer_encoding == "base64": + transfer_encoding = "base64_codec" + try: + line = codecs.decode(line, transfer_encoding) + except Exception: + self.fail("could not decode transfer encoded chunk") + + # we have something in the buffer from the last iteration. + # this is usually a newline delimiter. + if buf: + yield _cont, buf + buf = b"" + + # If the line ends with windows CRLF we write everything except + # the last two bytes. In all other cases however we write + # everything except the last byte. If it was a newline, that's + # fine, otherwise it does not matter because we will write it + # the next iteration. this ensures we do not write the + # final newline into the stream. That way we do not have to + # truncate the stream. However we do have to make sure that + # if something else than a newline is in there we write it + # out. + if line[-2:] == b"\r\n": + buf = b"\r\n" + cutoff = -2 + else: + buf = line[-1:] + cutoff = -1 + yield _cont, line[:cutoff] + + else: # pragma: no cover + raise ValueError("unexpected end of part") + + # if we have a leftover in the buffer that is not a newline + # character we have to flush it, otherwise we will chop of + # certain values. + if buf not in (b"", b"\r", b"\n", b"\r\n"): + yield _cont, buf + + yield _end, None + + def parse_parts(self, file, boundary, content_length): + """Generate ``('file', (name, val))`` and + ``('form', (name, val))`` parts. + """ + in_memory = 0 + + for ellt, ell in self.parse_lines(file, boundary, content_length): + if ellt == _begin_file: + headers, name, filename = ell + is_file = True + guard_memory = False + filename, container = self.start_file_streaming( + filename, headers, content_length + ) + _write = container.write + + elif ellt == _begin_form: + headers, name = ell + is_file = False + container = [] + _write = container.append + guard_memory = self.max_form_memory_size is not None + + elif ellt == _cont: + _write(ell) + # if we write into memory and there is a memory size limit we + # count the number of bytes in memory and raise an exception if + # there is too much data in memory. + if guard_memory: + in_memory += len(ell) + if in_memory > self.max_form_memory_size: + self.in_memory_threshold_reached(in_memory) + + elif ellt == _end: + if is_file: + container.seek(0) + yield ( + "file", + (name, FileStorage(container, filename, name, headers=headers)), + ) + else: + part_charset = self.get_part_charset(headers) + yield ( + "form", + (name, b"".join(container).decode(part_charset, self.errors)), + ) + + def parse(self, file, boundary, content_length): + formstream, filestream = tee( + self.parse_parts(file, boundary, content_length), 2 + ) + form = (p[1] for p in formstream if p[0] == "form") + files = (p[1] for p in filestream if p[0] == "file") + return self.cls(form), self.cls(files) + + +from . import exceptions diff --git a/python/werkzeug/http.py b/python/werkzeug/http.py new file mode 100644 index 0000000..af32007 --- /dev/null +++ b/python/werkzeug/http.py @@ -0,0 +1,1303 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.http + ~~~~~~~~~~~~~ + + Werkzeug comes with a bunch of utilities that help Werkzeug to deal with + HTTP data. Most of the classes and functions provided by this module are + used by the wrappers, but they are useful on their own, too, especially if + the response and request objects are not used. + + This covers some of the more HTTP centric features of WSGI, some other + utilities such as cookie handling are documented in the `werkzeug.utils` + module. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import base64 +import re +import warnings +from datetime import datetime +from datetime import timedelta +from hashlib import md5 +from time import gmtime +from time import time + +from ._compat import integer_types +from ._compat import iteritems +from ._compat import PY2 +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_unicode +from ._compat import try_coerce_native +from ._internal import _cookie_parse_impl +from ._internal import _cookie_quote +from ._internal import _make_cookie_domain + +try: + from email.utils import parsedate_tz +except ImportError: + from email.Utils import parsedate_tz + +try: + from urllib.request import parse_http_list as _parse_list_header + from urllib.parse import unquote_to_bytes as _unquote +except ImportError: + from urllib2 import parse_http_list as _parse_list_header + from urllib2 import unquote as _unquote + +_cookie_charset = "latin1" +_basic_auth_charset = "utf-8" +# for explanation of "media-range", etc. see Sections 5.3.{1,2} of RFC 7231 +_accept_re = re.compile( + r""" + ( # media-range capturing-parenthesis + [^\s;,]+ # type/subtype + (?:[ \t]*;[ \t]* # ";" + (?: # parameter non-capturing-parenthesis + [^\s;,q][^\s;,]* # token that doesn't start with "q" + | # or + q[^\s;,=][^\s;,]* # token that is more than just "q" + ) + )* # zero or more parameters + ) # end of media-range + (?:[ \t]*;[ \t]*q= # weight is a "q" parameter + (\d*(?:\.\d+)?) # qvalue capturing-parentheses + [^,]* # "extension" accept params: who cares? + )? # accept params are optional + """, + re.VERBOSE, +) +_token_chars = frozenset( + "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~" +) +_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)') +_unsafe_header_chars = set('()<>@,;:"/[]?={} \t') +_option_header_piece_re = re.compile( + r""" + ;\s*,?\s* # newlines were replaced with commas + (?P<key> + "[^"\\]*(?:\\.[^"\\]*)*" # quoted string + | + [^\s;,=*]+ # token + ) + (?:\*(?P<count>\d+))? # *1, optional continuation index + \s* + (?: # optionally followed by =value + (?: # equals sign, possibly with encoding + \*\s*=\s* # * indicates extended notation + (?: # optional encoding + (?P<encoding>[^\s]+?) + '(?P<language>[^\s]*?)' + )? + | + =\s* # basic notation + ) + (?P<value> + "[^"\\]*(?:\\.[^"\\]*)*" # quoted string + | + [^;,]+ # token + )? + )? + \s* + """, + flags=re.VERBOSE, +) +_option_header_start_mime_type = re.compile(r",\s*([^;,\s]+)([;,]\s*.+)?") + +_entity_headers = frozenset( + [ + "allow", + "content-encoding", + "content-language", + "content-length", + "content-location", + "content-md5", + "content-range", + "content-type", + "expires", + "last-modified", + ] +) +_hop_by_hop_headers = frozenset( + [ + "connection", + "keep-alive", + "proxy-authenticate", + "proxy-authorization", + "te", + "trailer", + "transfer-encoding", + "upgrade", + ] +) + + +HTTP_STATUS_CODES = { + 100: "Continue", + 101: "Switching Protocols", + 102: "Processing", + 200: "OK", + 201: "Created", + 202: "Accepted", + 203: "Non Authoritative Information", + 204: "No Content", + 205: "Reset Content", + 206: "Partial Content", + 207: "Multi Status", + 226: "IM Used", # see RFC 3229 + 300: "Multiple Choices", + 301: "Moved Permanently", + 302: "Found", + 303: "See Other", + 304: "Not Modified", + 305: "Use Proxy", + 307: "Temporary Redirect", + 308: "Permanent Redirect", + 400: "Bad Request", + 401: "Unauthorized", + 402: "Payment Required", # unused + 403: "Forbidden", + 404: "Not Found", + 405: "Method Not Allowed", + 406: "Not Acceptable", + 407: "Proxy Authentication Required", + 408: "Request Timeout", + 409: "Conflict", + 410: "Gone", + 411: "Length Required", + 412: "Precondition Failed", + 413: "Request Entity Too Large", + 414: "Request URI Too Long", + 415: "Unsupported Media Type", + 416: "Requested Range Not Satisfiable", + 417: "Expectation Failed", + 418: "I'm a teapot", # see RFC 2324 + 421: "Misdirected Request", # see RFC 7540 + 422: "Unprocessable Entity", + 423: "Locked", + 424: "Failed Dependency", + 426: "Upgrade Required", + 428: "Precondition Required", # see RFC 6585 + 429: "Too Many Requests", + 431: "Request Header Fields Too Large", + 449: "Retry With", # proprietary MS extension + 451: "Unavailable For Legal Reasons", + 500: "Internal Server Error", + 501: "Not Implemented", + 502: "Bad Gateway", + 503: "Service Unavailable", + 504: "Gateway Timeout", + 505: "HTTP Version Not Supported", + 507: "Insufficient Storage", + 510: "Not Extended", +} + + +def wsgi_to_bytes(data): + """coerce wsgi unicode represented bytes to real ones""" + if isinstance(data, bytes): + return data + return data.encode("latin1") # XXX: utf8 fallback? + + +def bytes_to_wsgi(data): + assert isinstance(data, bytes), "data must be bytes" + if isinstance(data, str): + return data + else: + return data.decode("latin1") + + +def quote_header_value(value, extra_chars="", allow_token=True): + """Quote a header value if necessary. + + .. versionadded:: 0.5 + + :param value: the value to quote. + :param extra_chars: a list of extra characters to skip quoting. + :param allow_token: if this is enabled token values are returned + unchanged. + """ + if isinstance(value, bytes): + value = bytes_to_wsgi(value) + value = str(value) + if allow_token: + token_chars = _token_chars | set(extra_chars) + if set(value).issubset(token_chars): + return value + return '"%s"' % value.replace("\\", "\\\\").replace('"', '\\"') + + +def unquote_header_value(value, is_filename=False): + r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). + This does not use the real unquoting but what browsers are actually + using for quoting. + + .. versionadded:: 0.5 + + :param value: the header value to unquote. + """ + if value and value[0] == value[-1] == '"': + # this is not the real unquoting, but fixing this so that the + # RFC is met will result in bugs with internet explorer and + # probably some other browsers as well. IE for example is + # uploading files with "C:\foo\bar.txt" as filename + value = value[1:-1] + + # if this is a filename and the starting characters look like + # a UNC path, then just return the value without quotes. Using the + # replace sequence below on a UNC path has the effect of turning + # the leading double slash into a single slash and then + # _fix_ie_filename() doesn't work correctly. See #458. + if not is_filename or value[:2] != "\\\\": + return value.replace("\\\\", "\\").replace('\\"', '"') + return value + + +def dump_options_header(header, options): + """The reverse function to :func:`parse_options_header`. + + :param header: the header to dump + :param options: a dict of options to append. + """ + segments = [] + if header is not None: + segments.append(header) + for key, value in iteritems(options): + if value is None: + segments.append(key) + else: + segments.append("%s=%s" % (key, quote_header_value(value))) + return "; ".join(segments) + + +def dump_header(iterable, allow_token=True): + """Dump an HTTP header again. This is the reversal of + :func:`parse_list_header`, :func:`parse_set_header` and + :func:`parse_dict_header`. This also quotes strings that include an + equals sign unless you pass it as dict of key, value pairs. + + >>> dump_header({'foo': 'bar baz'}) + 'foo="bar baz"' + >>> dump_header(('foo', 'bar baz')) + 'foo, "bar baz"' + + :param iterable: the iterable or dict of values to quote. + :param allow_token: if set to `False` tokens as values are disallowed. + See :func:`quote_header_value` for more details. + """ + if isinstance(iterable, dict): + items = [] + for key, value in iteritems(iterable): + if value is None: + items.append(key) + else: + items.append( + "%s=%s" % (key, quote_header_value(value, allow_token=allow_token)) + ) + else: + items = [quote_header_value(x, allow_token=allow_token) for x in iterable] + return ", ".join(items) + + +def parse_list_header(value): + """Parse lists as described by RFC 2068 Section 2. + + In particular, parse comma-separated lists where the elements of + the list may include quoted-strings. A quoted-string could + contain a comma. A non-quoted string could have quotes in the + middle. Quotes are removed automatically after parsing. + + It basically works like :func:`parse_set_header` just that items + may appear multiple times and case sensitivity is preserved. + + The return value is a standard :class:`list`: + + >>> parse_list_header('token, "quoted value"') + ['token', 'quoted value'] + + To create a header from the :class:`list` again, use the + :func:`dump_header` function. + + :param value: a string with a list header. + :return: :class:`list` + """ + result = [] + for item in _parse_list_header(value): + if item[:1] == item[-1:] == '"': + item = unquote_header_value(item[1:-1]) + result.append(item) + return result + + +def parse_dict_header(value, cls=dict): + """Parse lists of key, value pairs as described by RFC 2068 Section 2 and + convert them into a python dict (or any other mapping object created from + the type with a dict like interface provided by the `cls` argument): + + >>> d = parse_dict_header('foo="is a fish", bar="as well"') + >>> type(d) is dict + True + >>> sorted(d.items()) + [('bar', 'as well'), ('foo', 'is a fish')] + + If there is no value for a key it will be `None`: + + >>> parse_dict_header('key_without_value') + {'key_without_value': None} + + To create a header from the :class:`dict` again, use the + :func:`dump_header` function. + + .. versionchanged:: 0.9 + Added support for `cls` argument. + + :param value: a string with a dict header. + :param cls: callable to use for storage of parsed results. + :return: an instance of `cls` + """ + result = cls() + if not isinstance(value, text_type): + # XXX: validate + value = bytes_to_wsgi(value) + for item in _parse_list_header(value): + if "=" not in item: + result[item] = None + continue + name, value = item.split("=", 1) + if value[:1] == value[-1:] == '"': + value = unquote_header_value(value[1:-1]) + result[name] = value + return result + + +def parse_options_header(value, multiple=False): + """Parse a ``Content-Type`` like header into a tuple with the content + type and the options: + + >>> parse_options_header('text/html; charset=utf8') + ('text/html', {'charset': 'utf8'}) + + This should not be used to parse ``Cache-Control`` like headers that use + a slightly different format. For these headers use the + :func:`parse_dict_header` function. + + .. versionchanged:: 0.15 + :rfc:`2231` parameter continuations are handled. + + .. versionadded:: 0.5 + + :param value: the header to parse. + :param multiple: Whether try to parse and return multiple MIME types + :return: (mimetype, options) or (mimetype, options, mimetype, options, …) + if multiple=True + """ + if not value: + return "", {} + + result = [] + + value = "," + value.replace("\n", ",") + while value: + match = _option_header_start_mime_type.match(value) + if not match: + break + result.append(match.group(1)) # mimetype + options = {} + # Parse options + rest = match.group(2) + continued_encoding = None + while rest: + optmatch = _option_header_piece_re.match(rest) + if not optmatch: + break + option, count, encoding, language, option_value = optmatch.groups() + # Continuations don't have to supply the encoding after the + # first line. If we're in a continuation, track the current + # encoding to use for subsequent lines. Reset it when the + # continuation ends. + if not count: + continued_encoding = None + else: + if not encoding: + encoding = continued_encoding + continued_encoding = encoding + option = unquote_header_value(option) + if option_value is not None: + option_value = unquote_header_value(option_value, option == "filename") + if encoding is not None: + option_value = _unquote(option_value).decode(encoding) + if count: + # Continuations append to the existing value. For + # simplicity, this ignores the possibility of + # out-of-order indices, which shouldn't happen anyway. + options[option] = options.get(option, "") + option_value + else: + options[option] = option_value + rest = rest[optmatch.end() :] + result.append(options) + if multiple is False: + return tuple(result) + value = rest + + return tuple(result) if result else ("", {}) + + +def parse_accept_header(value, cls=None): + """Parses an HTTP Accept-* header. This does not implement a complete + valid algorithm but one that supports at least value and quality + extraction. + + Returns a new :class:`Accept` object (basically a list of ``(value, quality)`` + tuples sorted by the quality with some additional accessor methods). + + The second parameter can be a subclass of :class:`Accept` that is created + with the parsed values and returned. + + :param value: the accept header string to be parsed. + :param cls: the wrapper class for the return value (can be + :class:`Accept` or a subclass thereof) + :return: an instance of `cls`. + """ + if cls is None: + cls = Accept + + if not value: + return cls(None) + + result = [] + for match in _accept_re.finditer(value): + quality = match.group(2) + if not quality: + quality = 1 + else: + quality = max(min(float(quality), 1), 0) + result.append((match.group(1), quality)) + return cls(result) + + +def parse_cache_control_header(value, on_update=None, cls=None): + """Parse a cache control header. The RFC differs between response and + request cache control, this method does not. It's your responsibility + to not use the wrong control statements. + + .. versionadded:: 0.5 + The `cls` was added. If not specified an immutable + :class:`~werkzeug.datastructures.RequestCacheControl` is returned. + + :param value: a cache control header to be parsed. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.CacheControl` + object is changed. + :param cls: the class for the returned object. By default + :class:`~werkzeug.datastructures.RequestCacheControl` is used. + :return: a `cls` object. + """ + if cls is None: + cls = RequestCacheControl + if not value: + return cls(None, on_update) + return cls(parse_dict_header(value), on_update) + + +def parse_set_header(value, on_update=None): + """Parse a set-like header and return a + :class:`~werkzeug.datastructures.HeaderSet` object: + + >>> hs = parse_set_header('token, "quoted value"') + + The return value is an object that treats the items case-insensitively + and keeps the order of the items: + + >>> 'TOKEN' in hs + True + >>> hs.index('quoted value') + 1 + >>> hs + HeaderSet(['token', 'quoted value']) + + To create a header from the :class:`HeaderSet` again, use the + :func:`dump_header` function. + + :param value: a set header to be parsed. + :param on_update: an optional callable that is called every time a + value on the :class:`~werkzeug.datastructures.HeaderSet` + object is changed. + :return: a :class:`~werkzeug.datastructures.HeaderSet` + """ + if not value: + return HeaderSet(None, on_update) + return HeaderSet(parse_list_header(value), on_update) + + +def parse_authorization_header(value): + """Parse an HTTP basic/digest authorization header transmitted by the web + browser. The return value is either `None` if the header was invalid or + not given, otherwise an :class:`~werkzeug.datastructures.Authorization` + object. + + :param value: the authorization header to parse. + :return: a :class:`~werkzeug.datastructures.Authorization` object or `None`. + """ + if not value: + return + value = wsgi_to_bytes(value) + try: + auth_type, auth_info = value.split(None, 1) + auth_type = auth_type.lower() + except ValueError: + return + if auth_type == b"basic": + try: + username, password = base64.b64decode(auth_info).split(b":", 1) + except Exception: + return + return Authorization( + "basic", + { + "username": to_unicode(username, _basic_auth_charset), + "password": to_unicode(password, _basic_auth_charset), + }, + ) + elif auth_type == b"digest": + auth_map = parse_dict_header(auth_info) + for key in "username", "realm", "nonce", "uri", "response": + if key not in auth_map: + return + if "qop" in auth_map: + if not auth_map.get("nc") or not auth_map.get("cnonce"): + return + return Authorization("digest", auth_map) + + +def parse_www_authenticate_header(value, on_update=None): + """Parse an HTTP WWW-Authenticate header into a + :class:`~werkzeug.datastructures.WWWAuthenticate` object. + + :param value: a WWW-Authenticate header to parse. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.WWWAuthenticate` + object is changed. + :return: a :class:`~werkzeug.datastructures.WWWAuthenticate` object. + """ + if not value: + return WWWAuthenticate(on_update=on_update) + try: + auth_type, auth_info = value.split(None, 1) + auth_type = auth_type.lower() + except (ValueError, AttributeError): + return WWWAuthenticate(value.strip().lower(), on_update=on_update) + return WWWAuthenticate(auth_type, parse_dict_header(auth_info), on_update) + + +def parse_if_range_header(value): + """Parses an if-range header which can be an etag or a date. Returns + a :class:`~werkzeug.datastructures.IfRange` object. + + .. versionadded:: 0.7 + """ + if not value: + return IfRange() + date = parse_date(value) + if date is not None: + return IfRange(date=date) + # drop weakness information + return IfRange(unquote_etag(value)[0]) + + +def parse_range_header(value, make_inclusive=True): + """Parses a range header into a :class:`~werkzeug.datastructures.Range` + object. If the header is missing or malformed `None` is returned. + `ranges` is a list of ``(start, stop)`` tuples where the ranges are + non-inclusive. + + .. versionadded:: 0.7 + """ + if not value or "=" not in value: + return None + + ranges = [] + last_end = 0 + units, rng = value.split("=", 1) + units = units.strip().lower() + + for item in rng.split(","): + item = item.strip() + if "-" not in item: + return None + if item.startswith("-"): + if last_end < 0: + return None + try: + begin = int(item) + except ValueError: + return None + end = None + last_end = -1 + elif "-" in item: + begin, end = item.split("-", 1) + begin = begin.strip() + end = end.strip() + if not begin.isdigit(): + return None + begin = int(begin) + if begin < last_end or last_end < 0: + return None + if end: + if not end.isdigit(): + return None + end = int(end) + 1 + if begin >= end: + return None + else: + end = None + last_end = end + ranges.append((begin, end)) + + return Range(units, ranges) + + +def parse_content_range_header(value, on_update=None): + """Parses a range header into a + :class:`~werkzeug.datastructures.ContentRange` object or `None` if + parsing is not possible. + + .. versionadded:: 0.7 + + :param value: a content range header to be parsed. + :param on_update: an optional callable that is called every time a value + on the :class:`~werkzeug.datastructures.ContentRange` + object is changed. + """ + if value is None: + return None + try: + units, rangedef = (value or "").strip().split(None, 1) + except ValueError: + return None + + if "/" not in rangedef: + return None + rng, length = rangedef.split("/", 1) + if length == "*": + length = None + elif length.isdigit(): + length = int(length) + else: + return None + + if rng == "*": + return ContentRange(units, None, None, length, on_update=on_update) + elif "-" not in rng: + return None + + start, stop = rng.split("-", 1) + try: + start = int(start) + stop = int(stop) + 1 + except ValueError: + return None + + if is_byte_range_valid(start, stop, length): + return ContentRange(units, start, stop, length, on_update=on_update) + + +def quote_etag(etag, weak=False): + """Quote an etag. + + :param etag: the etag to quote. + :param weak: set to `True` to tag it "weak". + """ + if '"' in etag: + raise ValueError("invalid etag") + etag = '"%s"' % etag + if weak: + etag = "W/" + etag + return etag + + +def unquote_etag(etag): + """Unquote a single etag: + + >>> unquote_etag('W/"bar"') + ('bar', True) + >>> unquote_etag('"bar"') + ('bar', False) + + :param etag: the etag identifier to unquote. + :return: a ``(etag, weak)`` tuple. + """ + if not etag: + return None, None + etag = etag.strip() + weak = False + if etag.startswith(("W/", "w/")): + weak = True + etag = etag[2:] + if etag[:1] == etag[-1:] == '"': + etag = etag[1:-1] + return etag, weak + + +def parse_etags(value): + """Parse an etag header. + + :param value: the tag header to parse + :return: an :class:`~werkzeug.datastructures.ETags` object. + """ + if not value: + return ETags() + strong = [] + weak = [] + end = len(value) + pos = 0 + while pos < end: + match = _etag_re.match(value, pos) + if match is None: + break + is_weak, quoted, raw = match.groups() + if raw == "*": + return ETags(star_tag=True) + elif quoted: + raw = quoted + if is_weak: + weak.append(raw) + else: + strong.append(raw) + pos = match.end() + return ETags(strong, weak) + + +def generate_etag(data): + """Generate an etag for some data.""" + return md5(data).hexdigest() + + +def parse_date(value): + """Parse one of the following date formats into a datetime object: + + .. sourcecode:: text + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + + If parsing fails the return value is `None`. + + :param value: a string with a supported date format. + :return: a :class:`datetime.datetime` object. + """ + if value: + t = parsedate_tz(value.strip()) + if t is not None: + try: + year = t[0] + # unfortunately that function does not tell us if two digit + # years were part of the string, or if they were prefixed + # with two zeroes. So what we do is to assume that 69-99 + # refer to 1900, and everything below to 2000 + if year >= 0 and year <= 68: + year += 2000 + elif year >= 69 and year <= 99: + year += 1900 + return datetime(*((year,) + t[1:7])) - timedelta(seconds=t[-1] or 0) + except (ValueError, OverflowError): + return None + + +def _dump_date(d, delim): + """Used for `http_date` and `cookie_date`.""" + if d is None: + d = gmtime() + elif isinstance(d, datetime): + d = d.utctimetuple() + elif isinstance(d, (integer_types, float)): + d = gmtime(d) + return "%s, %02d%s%s%s%s %02d:%02d:%02d GMT" % ( + ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[d.tm_wday], + d.tm_mday, + delim, + ( + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + )[d.tm_mon - 1], + delim, + str(d.tm_year), + d.tm_hour, + d.tm_min, + d.tm_sec, + ) + + +def cookie_date(expires=None): + """Formats the time to ensure compatibility with Netscape's cookie + standard. + + Accepts a floating point number expressed in seconds since the epoch in, a + datetime object or a timetuple. All times in UTC. The :func:`parse_date` + function can be used to parse such a date. + + Outputs a string in the format ``Wdy, DD-Mon-YYYY HH:MM:SS GMT``. + + :param expires: If provided that date is used, otherwise the current. + """ + return _dump_date(expires, "-") + + +def http_date(timestamp=None): + """Formats the time to match the RFC1123 date format. + + Accepts a floating point number expressed in seconds since the epoch in, a + datetime object or a timetuple. All times in UTC. The :func:`parse_date` + function can be used to parse such a date. + + Outputs a string in the format ``Wdy, DD Mon YYYY HH:MM:SS GMT``. + + :param timestamp: If provided that date is used, otherwise the current. + """ + return _dump_date(timestamp, " ") + + +def parse_age(value=None): + """Parses a base-10 integer count of seconds into a timedelta. + + If parsing fails, the return value is `None`. + + :param value: a string consisting of an integer represented in base-10 + :return: a :class:`datetime.timedelta` object or `None`. + """ + if not value: + return None + try: + seconds = int(value) + except ValueError: + return None + if seconds < 0: + return None + try: + return timedelta(seconds=seconds) + except OverflowError: + return None + + +def dump_age(age=None): + """Formats the duration as a base-10 integer. + + :param age: should be an integer number of seconds, + a :class:`datetime.timedelta` object, or, + if the age is unknown, `None` (default). + """ + if age is None: + return + if isinstance(age, timedelta): + # do the equivalent of Python 2.7's timedelta.total_seconds(), + # but disregarding fractional seconds + age = age.seconds + (age.days * 24 * 3600) + + age = int(age) + if age < 0: + raise ValueError("age cannot be negative") + + return str(age) + + +def is_resource_modified( + environ, etag=None, data=None, last_modified=None, ignore_if_range=True +): + """Convenience method for conditional requests. + + :param environ: the WSGI environment of the request to be checked. + :param etag: the etag for the response for comparison. + :param data: or alternatively the data of the response to automatically + generate an etag using :func:`generate_etag`. + :param last_modified: an optional date of the last modification. + :param ignore_if_range: If `False`, `If-Range` header will be taken into + account. + :return: `True` if the resource was modified, otherwise `False`. + """ + if etag is None and data is not None: + etag = generate_etag(data) + elif data is not None: + raise TypeError("both data and etag given") + if environ["REQUEST_METHOD"] not in ("GET", "HEAD"): + return False + + unmodified = False + if isinstance(last_modified, string_types): + last_modified = parse_date(last_modified) + + # ensure that microsecond is zero because the HTTP spec does not transmit + # that either and we might have some false positives. See issue #39 + if last_modified is not None: + last_modified = last_modified.replace(microsecond=0) + + if_range = None + if not ignore_if_range and "HTTP_RANGE" in environ: + # https://tools.ietf.org/html/rfc7233#section-3.2 + # A server MUST ignore an If-Range header field received in a request + # that does not contain a Range header field. + if_range = parse_if_range_header(environ.get("HTTP_IF_RANGE")) + + if if_range is not None and if_range.date is not None: + modified_since = if_range.date + else: + modified_since = parse_date(environ.get("HTTP_IF_MODIFIED_SINCE")) + + if modified_since and last_modified and last_modified <= modified_since: + unmodified = True + + if etag: + etag, _ = unquote_etag(etag) + if if_range is not None and if_range.etag is not None: + unmodified = parse_etags(if_range.etag).contains(etag) + else: + if_none_match = parse_etags(environ.get("HTTP_IF_NONE_MATCH")) + if if_none_match: + # https://tools.ietf.org/html/rfc7232#section-3.2 + # "A recipient MUST use the weak comparison function when comparing + # entity-tags for If-None-Match" + unmodified = if_none_match.contains_weak(etag) + + # https://tools.ietf.org/html/rfc7232#section-3.1 + # "Origin server MUST use the strong comparison function when + # comparing entity-tags for If-Match" + if_match = parse_etags(environ.get("HTTP_IF_MATCH")) + if if_match: + unmodified = not if_match.is_strong(etag) + + return not unmodified + + +def remove_entity_headers(headers, allowed=("expires", "content-location")): + """Remove all entity headers from a list or :class:`Headers` object. This + operation works in-place. `Expires` and `Content-Location` headers are + by default not removed. The reason for this is :rfc:`2616` section + 10.3.5 which specifies some entity headers that should be sent. + + .. versionchanged:: 0.5 + added `allowed` parameter. + + :param headers: a list or :class:`Headers` object. + :param allowed: a list of headers that should still be allowed even though + they are entity headers. + """ + allowed = set(x.lower() for x in allowed) + headers[:] = [ + (key, value) + for key, value in headers + if not is_entity_header(key) or key.lower() in allowed + ] + + +def remove_hop_by_hop_headers(headers): + """Remove all HTTP/1.1 "Hop-by-Hop" headers from a list or + :class:`Headers` object. This operation works in-place. + + .. versionadded:: 0.5 + + :param headers: a list or :class:`Headers` object. + """ + headers[:] = [ + (key, value) for key, value in headers if not is_hop_by_hop_header(key) + ] + + +def is_entity_header(header): + """Check if a header is an entity header. + + .. versionadded:: 0.5 + + :param header: the header to test. + :return: `True` if it's an entity header, `False` otherwise. + """ + return header.lower() in _entity_headers + + +def is_hop_by_hop_header(header): + """Check if a header is an HTTP/1.1 "Hop-by-Hop" header. + + .. versionadded:: 0.5 + + :param header: the header to test. + :return: `True` if it's an HTTP/1.1 "Hop-by-Hop" header, `False` otherwise. + """ + return header.lower() in _hop_by_hop_headers + + +def parse_cookie(header, charset="utf-8", errors="replace", cls=None): + """Parse a cookie. Either from a string or WSGI environ. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a + :exc:`HTTPUnicodeError` is raised. + + .. versionchanged:: 0.5 + This function now returns a :class:`TypeConversionDict` instead of a + regular dict. The `cls` parameter was added. + + :param header: the header to be used to parse the cookie. Alternatively + this can be a WSGI environment. + :param charset: the charset for the cookie values. + :param errors: the error behavior for the charset decoding. + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`TypeConversionDict` is + used. + """ + if isinstance(header, dict): + header = header.get("HTTP_COOKIE", "") + elif header is None: + header = "" + + # If the value is an unicode string it's mangled through latin1. This + # is done because on PEP 3333 on Python 3 all headers are assumed latin1 + # which however is incorrect for cookies, which are sent in page encoding. + # As a result we + if isinstance(header, text_type): + header = header.encode("latin1", "replace") + + if cls is None: + cls = TypeConversionDict + + def _parse_pairs(): + for key, val in _cookie_parse_impl(header): + key = to_unicode(key, charset, errors, allow_none_charset=True) + if not key: + continue + val = to_unicode(val, charset, errors, allow_none_charset=True) + yield try_coerce_native(key), val + + return cls(_parse_pairs()) + + +def dump_cookie( + key, + value="", + max_age=None, + expires=None, + path="/", + domain=None, + secure=False, + httponly=False, + charset="utf-8", + sync_expires=True, + max_size=4093, + samesite=None, +): + """Creates a new Set-Cookie header without the ``Set-Cookie`` prefix + The parameters are the same as in the cookie Morsel object in the + Python standard library but it accepts unicode data, too. + + On Python 3 the return value of this function will be a unicode + string, on Python 2 it will be a native string. In both cases the + return value is usually restricted to ascii as the vast majority of + values are properly escaped, but that is no guarantee. If a unicode + string is returned it's tunneled through latin1 as required by + PEP 3333. + + The return value is not ASCII safe if the key contains unicode + characters. This is technically against the specification but + happens in the wild. It's strongly recommended to not use + non-ASCII values for the keys. + + :param max_age: should be a number of seconds, or `None` (default) if + the cookie should last only as long as the client's + browser session. Additionally `timedelta` objects + are accepted, too. + :param expires: should be a `datetime` object or unix timestamp. + :param path: limits the cookie to a given path, per default it will + span the whole domain. + :param domain: Use this if you want to set a cross-domain cookie. For + example, ``domain=".example.com"`` will set a cookie + that is readable by the domain ``www.example.com``, + ``foo.example.com`` etc. Otherwise, a cookie will only + be readable by the domain that set it. + :param secure: The cookie will only be available via HTTPS + :param httponly: disallow JavaScript to access the cookie. This is an + extension to the cookie standard and probably not + supported by all browsers. + :param charset: the encoding for unicode values. + :param sync_expires: automatically set expires if max_age is defined + but expires not. + :param max_size: Warn if the final header value exceeds this size. The + default, 4093, should be safely `supported by most browsers + <cookie_>`_. Set to 0 to disable this check. + :param samesite: Limits the scope of the cookie such that it will only + be attached to requests if those requests are "same-site". + + .. _`cookie`: http://browsercookielimits.squawky.net/ + """ + key = to_bytes(key, charset) + value = to_bytes(value, charset) + + if path is not None: + path = iri_to_uri(path, charset) + domain = _make_cookie_domain(domain) + if isinstance(max_age, timedelta): + max_age = (max_age.days * 60 * 60 * 24) + max_age.seconds + if expires is not None: + if not isinstance(expires, string_types): + expires = cookie_date(expires) + elif max_age is not None and sync_expires: + expires = to_bytes(cookie_date(time() + max_age)) + + samesite = samesite.title() if samesite else None + if samesite not in ("Strict", "Lax", None): + raise ValueError("invalid SameSite value; must be 'Strict', 'Lax' or None") + + buf = [key + b"=" + _cookie_quote(value)] + + # XXX: In theory all of these parameters that are not marked with `None` + # should be quoted. Because stdlib did not quote it before I did not + # want to introduce quoting there now. + for k, v, q in ( + (b"Domain", domain, True), + (b"Expires", expires, False), + (b"Max-Age", max_age, False), + (b"Secure", secure, None), + (b"HttpOnly", httponly, None), + (b"Path", path, False), + (b"SameSite", samesite, False), + ): + if q is None: + if v: + buf.append(k) + continue + + if v is None: + continue + + tmp = bytearray(k) + if not isinstance(v, (bytes, bytearray)): + v = to_bytes(text_type(v), charset) + if q: + v = _cookie_quote(v) + tmp += b"=" + v + buf.append(bytes(tmp)) + + # The return value will be an incorrectly encoded latin1 header on + # Python 3 for consistency with the headers object and a bytestring + # on Python 2 because that's how the API makes more sense. + rv = b"; ".join(buf) + if not PY2: + rv = rv.decode("latin1") + + # Warn if the final value of the cookie is less than the limit. If the + # cookie is too large, then it may be silently ignored, which can be quite + # hard to debug. + cookie_size = len(rv) + + if max_size and cookie_size > max_size: + value_size = len(value) + warnings.warn( + 'The "{key}" cookie is too large: the value was {value_size} bytes' + " but the header required {extra_size} extra bytes. The final size" + " was {cookie_size} bytes but the limit is {max_size} bytes." + " Browsers may silently ignore cookies larger than this.".format( + key=key, + value_size=value_size, + extra_size=cookie_size - value_size, + cookie_size=cookie_size, + max_size=max_size, + ), + stacklevel=2, + ) + + return rv + + +def is_byte_range_valid(start, stop, length): + """Checks if a given byte content range is valid for the given length. + + .. versionadded:: 0.7 + """ + if (start is None) != (stop is None): + return False + elif start is None: + return length is None or length >= 0 + elif length is None: + return 0 <= start < stop + elif start >= stop: + return False + return 0 <= start < length + + +# circular dependency fun +from .datastructures import Accept +from .datastructures import Authorization +from .datastructures import ContentRange +from .datastructures import ETags +from .datastructures import HeaderSet +from .datastructures import IfRange +from .datastructures import Range +from .datastructures import RequestCacheControl +from .datastructures import TypeConversionDict +from .datastructures import WWWAuthenticate +from .urls import iri_to_uri + +# DEPRECATED +from .datastructures import CharsetAccept as _CharsetAccept +from .datastructures import Headers as _Headers +from .datastructures import LanguageAccept as _LanguageAccept +from .datastructures import MIMEAccept as _MIMEAccept + + +class MIMEAccept(_MIMEAccept): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.MIMEAccept' has moved to 'werkzeug" + ".datastructures.MIMEAccept' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(MIMEAccept, self).__init__(*args, **kwargs) + + +class CharsetAccept(_CharsetAccept): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.CharsetAccept' has moved to 'werkzeug" + ".datastructures.CharsetAccept' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(CharsetAccept, self).__init__(*args, **kwargs) + + +class LanguageAccept(_LanguageAccept): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.LanguageAccept' has moved to 'werkzeug" + ".datastructures.LanguageAccept' as of version 0.5. This" + " old import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(LanguageAccept, self).__init__(*args, **kwargs) + + +class Headers(_Headers): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.http.Headers' has moved to 'werkzeug" + ".datastructures.Headers' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(Headers, self).__init__(*args, **kwargs) diff --git a/python/werkzeug/local.py b/python/werkzeug/local.py new file mode 100644 index 0000000..9a6088c --- /dev/null +++ b/python/werkzeug/local.py @@ -0,0 +1,421 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.local + ~~~~~~~~~~~~~~ + + This module implements context-local objects. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import copy +from functools import update_wrapper + +from ._compat import implements_bool +from ._compat import PY2 +from .wsgi import ClosingIterator + +# since each thread has its own greenlet we can just use those as identifiers +# for the context. If greenlets are not available we fall back to the +# current thread ident depending on where it is. +try: + from greenlet import getcurrent as get_ident +except ImportError: + try: + from thread import get_ident + except ImportError: + from _thread import get_ident + + +def release_local(local): + """Releases the contents of the local for the current context. + This makes it possible to use locals without a manager. + + Example:: + + >>> loc = Local() + >>> loc.foo = 42 + >>> release_local(loc) + >>> hasattr(loc, 'foo') + False + + With this function one can release :class:`Local` objects as well + as :class:`LocalStack` objects. However it is not possible to + release data held by proxies that way, one always has to retain + a reference to the underlying local object in order to be able + to release it. + + .. versionadded:: 0.6.1 + """ + local.__release_local__() + + +class Local(object): + __slots__ = ("__storage__", "__ident_func__") + + def __init__(self): + object.__setattr__(self, "__storage__", {}) + object.__setattr__(self, "__ident_func__", get_ident) + + def __iter__(self): + return iter(self.__storage__.items()) + + def __call__(self, proxy): + """Create a proxy for a name.""" + return LocalProxy(self, proxy) + + def __release_local__(self): + self.__storage__.pop(self.__ident_func__(), None) + + def __getattr__(self, name): + try: + return self.__storage__[self.__ident_func__()][name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name, value): + ident = self.__ident_func__() + storage = self.__storage__ + try: + storage[ident][name] = value + except KeyError: + storage[ident] = {name: value} + + def __delattr__(self, name): + try: + del self.__storage__[self.__ident_func__()][name] + except KeyError: + raise AttributeError(name) + + +class LocalStack(object): + """This class works similar to a :class:`Local` but keeps a stack + of objects instead. This is best explained with an example:: + + >>> ls = LocalStack() + >>> ls.push(42) + >>> ls.top + 42 + >>> ls.push(23) + >>> ls.top + 23 + >>> ls.pop() + 23 + >>> ls.top + 42 + + They can be force released by using a :class:`LocalManager` or with + the :func:`release_local` function but the correct way is to pop the + item from the stack after using. When the stack is empty it will + no longer be bound to the current context (and as such released). + + By calling the stack without arguments it returns a proxy that resolves to + the topmost item on the stack. + + .. versionadded:: 0.6.1 + """ + + def __init__(self): + self._local = Local() + + def __release_local__(self): + self._local.__release_local__() + + def _get__ident_func__(self): + return self._local.__ident_func__ + + def _set__ident_func__(self, value): + object.__setattr__(self._local, "__ident_func__", value) + + __ident_func__ = property(_get__ident_func__, _set__ident_func__) + del _get__ident_func__, _set__ident_func__ + + def __call__(self): + def _lookup(): + rv = self.top + if rv is None: + raise RuntimeError("object unbound") + return rv + + return LocalProxy(_lookup) + + def push(self, obj): + """Pushes a new item to the stack""" + rv = getattr(self._local, "stack", None) + if rv is None: + self._local.stack = rv = [] + rv.append(obj) + return rv + + def pop(self): + """Removes the topmost item from the stack, will return the + old value or `None` if the stack was already empty. + """ + stack = getattr(self._local, "stack", None) + if stack is None: + return None + elif len(stack) == 1: + release_local(self._local) + return stack[-1] + else: + return stack.pop() + + @property + def top(self): + """The topmost item on the stack. If the stack is empty, + `None` is returned. + """ + try: + return self._local.stack[-1] + except (AttributeError, IndexError): + return None + + +class LocalManager(object): + """Local objects cannot manage themselves. For that you need a local + manager. You can pass a local manager multiple locals or add them later + by appending them to `manager.locals`. Every time the manager cleans up, + it will clean up all the data left in the locals for this context. + + The `ident_func` parameter can be added to override the default ident + function for the wrapped locals. + + .. versionchanged:: 0.6.1 + Instead of a manager the :func:`release_local` function can be used + as well. + + .. versionchanged:: 0.7 + `ident_func` was added. + """ + + def __init__(self, locals=None, ident_func=None): + if locals is None: + self.locals = [] + elif isinstance(locals, Local): + self.locals = [locals] + else: + self.locals = list(locals) + if ident_func is not None: + self.ident_func = ident_func + for local in self.locals: + object.__setattr__(local, "__ident_func__", ident_func) + else: + self.ident_func = get_ident + + def get_ident(self): + """Return the context identifier the local objects use internally for + this context. You cannot override this method to change the behavior + but use it to link other context local objects (such as SQLAlchemy's + scoped sessions) to the Werkzeug locals. + + .. versionchanged:: 0.7 + You can pass a different ident function to the local manager that + will then be propagated to all the locals passed to the + constructor. + """ + return self.ident_func() + + def cleanup(self): + """Manually clean up the data in the locals for this context. Call + this at the end of the request or use `make_middleware()`. + """ + for local in self.locals: + release_local(local) + + def make_middleware(self, app): + """Wrap a WSGI application so that cleaning up happens after + request end. + """ + + def application(environ, start_response): + return ClosingIterator(app(environ, start_response), self.cleanup) + + return application + + def middleware(self, func): + """Like `make_middleware` but for decorating functions. + + Example usage:: + + @manager.middleware + def application(environ, start_response): + ... + + The difference to `make_middleware` is that the function passed + will have all the arguments copied from the inner application + (name, docstring, module). + """ + return update_wrapper(self.make_middleware(func), func) + + def __repr__(self): + return "<%s storages: %d>" % (self.__class__.__name__, len(self.locals)) + + +@implements_bool +class LocalProxy(object): + """Acts as a proxy for a werkzeug local. Forwards all operations to + a proxied object. The only operations not supported for forwarding + are right handed operands and any kind of assignment. + + Example usage:: + + from werkzeug.local import Local + l = Local() + + # these are proxies + request = l('request') + user = l('user') + + + from werkzeug.local import LocalStack + _response_local = LocalStack() + + # this is a proxy + response = _response_local() + + Whenever something is bound to l.user / l.request the proxy objects + will forward all operations. If no object is bound a :exc:`RuntimeError` + will be raised. + + To create proxies to :class:`Local` or :class:`LocalStack` objects, + call the object as shown above. If you want to have a proxy to an + object looked up by a function, you can (as of Werkzeug 0.6.1) pass + a function to the :class:`LocalProxy` constructor:: + + session = LocalProxy(lambda: get_current_request().session) + + .. versionchanged:: 0.6.1 + The class can be instantiated with a callable as well now. + """ + + __slots__ = ("__local", "__dict__", "__name__", "__wrapped__") + + def __init__(self, local, name=None): + object.__setattr__(self, "_LocalProxy__local", local) + object.__setattr__(self, "__name__", name) + if callable(local) and not hasattr(local, "__release_local__"): + # "local" is a callable that is not an instance of Local or + # LocalManager: mark it as a wrapped function. + object.__setattr__(self, "__wrapped__", local) + + def _get_current_object(self): + """Return the current object. This is useful if you want the real + object behind the proxy at a time for performance reasons or because + you want to pass the object into a different context. + """ + if not hasattr(self.__local, "__release_local__"): + return self.__local() + try: + return getattr(self.__local, self.__name__) + except AttributeError: + raise RuntimeError("no object bound to %s" % self.__name__) + + @property + def __dict__(self): + try: + return self._get_current_object().__dict__ + except RuntimeError: + raise AttributeError("__dict__") + + def __repr__(self): + try: + obj = self._get_current_object() + except RuntimeError: + return "<%s unbound>" % self.__class__.__name__ + return repr(obj) + + def __bool__(self): + try: + return bool(self._get_current_object()) + except RuntimeError: + return False + + def __unicode__(self): + try: + return unicode(self._get_current_object()) # noqa + except RuntimeError: + return repr(self) + + def __dir__(self): + try: + return dir(self._get_current_object()) + except RuntimeError: + return [] + + def __getattr__(self, name): + if name == "__members__": + return dir(self._get_current_object()) + return getattr(self._get_current_object(), name) + + def __setitem__(self, key, value): + self._get_current_object()[key] = value + + def __delitem__(self, key): + del self._get_current_object()[key] + + if PY2: + __getslice__ = lambda x, i, j: x._get_current_object()[i:j] + + def __setslice__(self, i, j, seq): + self._get_current_object()[i:j] = seq + + def __delslice__(self, i, j): + del self._get_current_object()[i:j] + + __setattr__ = lambda x, n, v: setattr(x._get_current_object(), n, v) + __delattr__ = lambda x, n: delattr(x._get_current_object(), n) + __str__ = lambda x: str(x._get_current_object()) + __lt__ = lambda x, o: x._get_current_object() < o + __le__ = lambda x, o: x._get_current_object() <= o + __eq__ = lambda x, o: x._get_current_object() == o + __ne__ = lambda x, o: x._get_current_object() != o + __gt__ = lambda x, o: x._get_current_object() > o + __ge__ = lambda x, o: x._get_current_object() >= o + __cmp__ = lambda x, o: cmp(x._get_current_object(), o) # noqa + __hash__ = lambda x: hash(x._get_current_object()) + __call__ = lambda x, *a, **kw: x._get_current_object()(*a, **kw) + __len__ = lambda x: len(x._get_current_object()) + __getitem__ = lambda x, i: x._get_current_object()[i] + __iter__ = lambda x: iter(x._get_current_object()) + __contains__ = lambda x, i: i in x._get_current_object() + __add__ = lambda x, o: x._get_current_object() + o + __sub__ = lambda x, o: x._get_current_object() - o + __mul__ = lambda x, o: x._get_current_object() * o + __floordiv__ = lambda x, o: x._get_current_object() // o + __mod__ = lambda x, o: x._get_current_object() % o + __divmod__ = lambda x, o: x._get_current_object().__divmod__(o) + __pow__ = lambda x, o: x._get_current_object() ** o + __lshift__ = lambda x, o: x._get_current_object() << o + __rshift__ = lambda x, o: x._get_current_object() >> o + __and__ = lambda x, o: x._get_current_object() & o + __xor__ = lambda x, o: x._get_current_object() ^ o + __or__ = lambda x, o: x._get_current_object() | o + __div__ = lambda x, o: x._get_current_object().__div__(o) + __truediv__ = lambda x, o: x._get_current_object().__truediv__(o) + __neg__ = lambda x: -(x._get_current_object()) + __pos__ = lambda x: +(x._get_current_object()) + __abs__ = lambda x: abs(x._get_current_object()) + __invert__ = lambda x: ~(x._get_current_object()) + __complex__ = lambda x: complex(x._get_current_object()) + __int__ = lambda x: int(x._get_current_object()) + __long__ = lambda x: long(x._get_current_object()) # noqa + __float__ = lambda x: float(x._get_current_object()) + __oct__ = lambda x: oct(x._get_current_object()) + __hex__ = lambda x: hex(x._get_current_object()) + __index__ = lambda x: x._get_current_object().__index__() + __coerce__ = lambda x, o: x._get_current_object().__coerce__(x, o) + __enter__ = lambda x: x._get_current_object().__enter__() + __exit__ = lambda x, *a, **kw: x._get_current_object().__exit__(*a, **kw) + __radd__ = lambda x, o: o + x._get_current_object() + __rsub__ = lambda x, o: o - x._get_current_object() + __rmul__ = lambda x, o: o * x._get_current_object() + __rdiv__ = lambda x, o: o / x._get_current_object() + if PY2: + __rtruediv__ = lambda x, o: x._get_current_object().__rtruediv__(o) + else: + __rtruediv__ = __rdiv__ + __rfloordiv__ = lambda x, o: o // x._get_current_object() + __rmod__ = lambda x, o: o % x._get_current_object() + __rdivmod__ = lambda x, o: x._get_current_object().__rdivmod__(o) + __copy__ = lambda x: copy.copy(x._get_current_object()) + __deepcopy__ = lambda x, memo: copy.deepcopy(x._get_current_object(), memo) diff --git a/python/werkzeug/middleware/__init__.py b/python/werkzeug/middleware/__init__.py new file mode 100644 index 0000000..5e049f5 --- /dev/null +++ b/python/werkzeug/middleware/__init__.py @@ -0,0 +1,25 @@ +""" +Middleware +========== + +A WSGI middleware is a WSGI application that wraps another application +in order to observe or change its behavior. Werkzeug provides some +middleware for common use cases. + +.. toctree:: + :maxdepth: 1 + + proxy_fix + shared_data + dispatcher + http_proxy + lint + profiler + +The :doc:`interactive debugger </debug>` is also a middleware that can +be applied manually, although it is typically used automatically with +the :doc:`development server </serving>`. + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" diff --git a/python/werkzeug/middleware/dispatcher.py b/python/werkzeug/middleware/dispatcher.py new file mode 100644 index 0000000..2eb173e --- /dev/null +++ b/python/werkzeug/middleware/dispatcher.py @@ -0,0 +1,66 @@ +""" +Application Dispatcher +====================== + +This middleware creates a single WSGI application that dispatches to +multiple other WSGI applications mounted at different URL paths. + +A common example is writing a Single Page Application, where you have a +backend API and a frontend written in JavaScript that does the routing +in the browser rather than requesting different pages from the server. +The frontend is a single HTML and JS file that should be served for any +path besides "/api". + +This example dispatches to an API app under "/api", an admin app +under "/admin", and an app that serves frontend files for all other +requests:: + + app = DispatcherMiddleware(serve_frontend, { + '/api': api_app, + '/admin': admin_app, + }) + +In production, you might instead handle this at the HTTP server level, +serving files or proxying to application servers based on location. The +API and admin apps would each be deployed with a separate WSGI server, +and the static files would be served directly by the HTTP server. + +.. autoclass:: DispatcherMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" + + +class DispatcherMiddleware(object): + """Combine multiple applications as a single WSGI application. + Requests are dispatched to an application based on the path it is + mounted under. + + :param app: The WSGI application to dispatch to if the request + doesn't match a mounted path. + :param mounts: Maps path prefixes to applications for dispatching. + """ + + def __init__(self, app, mounts=None): + self.app = app + self.mounts = mounts or {} + + def __call__(self, environ, start_response): + script = environ.get("PATH_INFO", "") + path_info = "" + + while "/" in script: + if script in self.mounts: + app = self.mounts[script] + break + + script, last_item = script.rsplit("/", 1) + path_info = "/%s%s" % (last_item, path_info) + else: + app = self.mounts.get(script, self.app) + + original_script_name = environ.get("SCRIPT_NAME", "") + environ["SCRIPT_NAME"] = original_script_name + script + environ["PATH_INFO"] = path_info + return app(environ, start_response) diff --git a/python/werkzeug/middleware/http_proxy.py b/python/werkzeug/middleware/http_proxy.py new file mode 100644 index 0000000..bfdc071 --- /dev/null +++ b/python/werkzeug/middleware/http_proxy.py @@ -0,0 +1,219 @@ +""" +Basic HTTP Proxy +================ + +.. autoclass:: ProxyMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import socket + +from ..datastructures import EnvironHeaders +from ..http import is_hop_by_hop_header +from ..urls import url_parse +from ..urls import url_quote +from ..wsgi import get_input_stream + +try: + from http import client +except ImportError: + import httplib as client + + +class ProxyMiddleware(object): + """Proxy requests under a path to an external server, routing other + requests to the app. + + This middleware can only proxy HTTP requests, as that is the only + protocol handled by the WSGI server. Other protocols, such as + websocket requests, cannot be proxied at this layer. This should + only be used for development, in production a real proxying server + should be used. + + The middleware takes a dict that maps a path prefix to a dict + describing the host to be proxied to:: + + app = ProxyMiddleware(app, { + "/static/": { + "target": "http://127.0.0.1:5001/", + } + }) + + Each host has the following options: + + ``target``: + The target URL to dispatch to. This is required. + ``remove_prefix``: + Whether to remove the prefix from the URL before dispatching it + to the target. The default is ``False``. + ``host``: + ``"<auto>"`` (default): + The host header is automatically rewritten to the URL of the + target. + ``None``: + The host header is unmodified from the client request. + Any other value: + The host header is overwritten with the value. + ``headers``: + A dictionary of headers to be sent with the request to the + target. The default is ``{}``. + ``ssl_context``: + A :class:`ssl.SSLContext` defining how to verify requests if the + target is HTTPS. The default is ``None``. + + In the example above, everything under ``"/static/"`` is proxied to + the server on port 5001. The host header is rewritten to the target, + and the ``"/static/"`` prefix is removed from the URLs. + + :param app: The WSGI application to wrap. + :param targets: Proxy target configurations. See description above. + :param chunk_size: Size of chunks to read from input stream and + write to target. + :param timeout: Seconds before an operation to a target fails. + + .. versionadded:: 0.14 + """ + + def __init__(self, app, targets, chunk_size=2 << 13, timeout=10): + def _set_defaults(opts): + opts.setdefault("remove_prefix", False) + opts.setdefault("host", "<auto>") + opts.setdefault("headers", {}) + opts.setdefault("ssl_context", None) + return opts + + self.app = app + self.targets = dict( + ("/%s/" % k.strip("/"), _set_defaults(v)) for k, v in targets.items() + ) + self.chunk_size = chunk_size + self.timeout = timeout + + def proxy_to(self, opts, path, prefix): + target = url_parse(opts["target"]) + + def application(environ, start_response): + headers = list(EnvironHeaders(environ).items()) + headers[:] = [ + (k, v) + for k, v in headers + if not is_hop_by_hop_header(k) + and k.lower() not in ("content-length", "host") + ] + headers.append(("Connection", "close")) + + if opts["host"] == "<auto>": + headers.append(("Host", target.ascii_host)) + elif opts["host"] is None: + headers.append(("Host", environ["HTTP_HOST"])) + else: + headers.append(("Host", opts["host"])) + + headers.extend(opts["headers"].items()) + remote_path = path + + if opts["remove_prefix"]: + remote_path = "%s/%s" % ( + target.path.rstrip("/"), + remote_path[len(prefix) :].lstrip("/"), + ) + + content_length = environ.get("CONTENT_LENGTH") + chunked = False + + if content_length not in ("", None): + headers.append(("Content-Length", content_length)) + elif content_length is not None: + headers.append(("Transfer-Encoding", "chunked")) + chunked = True + + try: + if target.scheme == "http": + con = client.HTTPConnection( + target.ascii_host, target.port or 80, timeout=self.timeout + ) + elif target.scheme == "https": + con = client.HTTPSConnection( + target.ascii_host, + target.port or 443, + timeout=self.timeout, + context=opts["ssl_context"], + ) + else: + raise RuntimeError( + "Target scheme must be 'http' or 'https', got '{}'.".format( + target.scheme + ) + ) + + con.connect() + remote_url = url_quote(remote_path) + querystring = environ["QUERY_STRING"] + + if querystring: + remote_url = remote_url + "?" + querystring + + con.putrequest(environ["REQUEST_METHOD"], remote_url, skip_host=True) + + for k, v in headers: + if k.lower() == "connection": + v = "close" + + con.putheader(k, v) + + con.endheaders() + stream = get_input_stream(environ) + + while 1: + data = stream.read(self.chunk_size) + + if not data: + break + + if chunked: + con.send(b"%x\r\n%s\r\n" % (len(data), data)) + else: + con.send(data) + + resp = con.getresponse() + except socket.error: + from ..exceptions import BadGateway + + return BadGateway()(environ, start_response) + + start_response( + "%d %s" % (resp.status, resp.reason), + [ + (k.title(), v) + for k, v in resp.getheaders() + if not is_hop_by_hop_header(k) + ], + ) + + def read(): + while 1: + try: + data = resp.read(self.chunk_size) + except socket.error: + break + + if not data: + break + + yield data + + return read() + + return application + + def __call__(self, environ, start_response): + path = environ["PATH_INFO"] + app = self.app + + for prefix, opts in self.targets.items(): + if path.startswith(prefix): + app = self.proxy_to(opts, path, prefix) + break + + return app(environ, start_response) diff --git a/python/werkzeug/middleware/lint.py b/python/werkzeug/middleware/lint.py new file mode 100644 index 0000000..98f9581 --- /dev/null +++ b/python/werkzeug/middleware/lint.py @@ -0,0 +1,408 @@ +""" +WSGI Protocol Linter +==================== + +This module provides a middleware that performs sanity checks on the +behavior of the WSGI server and application. It checks that the +:pep:`3333` WSGI spec is properly implemented. It also warns on some +common HTTP errors such as non-empty responses for 304 status codes. + +.. autoclass:: LintMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from warnings import warn + +from .._compat import implements_iterator +from .._compat import PY2 +from .._compat import string_types +from ..datastructures import Headers +from ..http import is_entity_header +from ..wsgi import FileWrapper + +try: + from urllib.parse import urlparse +except ImportError: + from urlparse import urlparse + + +class WSGIWarning(Warning): + """Warning class for WSGI warnings.""" + + +class HTTPWarning(Warning): + """Warning class for HTTP warnings.""" + + +def check_string(context, obj, stacklevel=3): + if type(obj) is not str: + warn( + "'%s' requires strings, got '%s'" % (context, type(obj).__name__), + WSGIWarning, + ) + + +class InputStream(object): + def __init__(self, stream): + self._stream = stream + + def read(self, *args): + if len(args) == 0: + warn( + "WSGI does not guarantee an EOF marker on the input stream, thus making" + " calls to 'wsgi.input.read()' unsafe. Conforming servers may never" + " return from this call.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) != 1: + warn( + "Too many parameters passed to 'wsgi.input.read()'.", + WSGIWarning, + stacklevel=2, + ) + return self._stream.read(*args) + + def readline(self, *args): + if len(args) == 0: + warn( + "Calls to 'wsgi.input.readline()' without arguments are unsafe. Use" + " 'wsgi.input.read()' instead.", + WSGIWarning, + stacklevel=2, + ) + elif len(args) == 1: + warn( + "'wsgi.input.readline()' was called with a size hint. WSGI does not" + " support this, although it's available on all major servers.", + WSGIWarning, + stacklevel=2, + ) + else: + raise TypeError("Too many arguments passed to 'wsgi.input.readline()'.") + return self._stream.readline(*args) + + def __iter__(self): + try: + return iter(self._stream) + except TypeError: + warn("'wsgi.input' is not iterable.", WSGIWarning, stacklevel=2) + return iter(()) + + def close(self): + warn("The application closed the input stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class ErrorStream(object): + def __init__(self, stream): + self._stream = stream + + def write(self, s): + check_string("wsgi.error.write()", s) + self._stream.write(s) + + def flush(self): + self._stream.flush() + + def writelines(self, seq): + for line in seq: + self.write(line) + + def close(self): + warn("The application closed the error stream!", WSGIWarning, stacklevel=2) + self._stream.close() + + +class GuardedWrite(object): + def __init__(self, write, chunks): + self._write = write + self._chunks = chunks + + def __call__(self, s): + check_string("write()", s) + self._write.write(s) + self._chunks.append(len(s)) + + +@implements_iterator +class GuardedIterator(object): + def __init__(self, iterator, headers_set, chunks): + self._iterator = iterator + if PY2: + self._next = iter(iterator).next + else: + self._next = iter(iterator).__next__ + self.closed = False + self.headers_set = headers_set + self.chunks = chunks + + def __iter__(self): + return self + + def __next__(self): + if self.closed: + warn("Iterated over closed 'app_iter'.", WSGIWarning, stacklevel=2) + + rv = self._next() + + if not self.headers_set: + warn( + "The application returned before it started the response.", + WSGIWarning, + stacklevel=2, + ) + + check_string("application iterator items", rv) + self.chunks.append(len(rv)) + return rv + + def close(self): + self.closed = True + + if hasattr(self._iterator, "close"): + self._iterator.close() + + if self.headers_set: + status_code, headers = self.headers_set + bytes_sent = sum(self.chunks) + content_length = headers.get("content-length", type=int) + + if status_code == 304: + for key, _value in headers: + key = key.lower() + if key not in ("expires", "content-location") and is_entity_header( + key + ): + warn( + "Entity header %r found in 304 response." % key, HTTPWarning + ) + if bytes_sent: + warn("304 responses must not have a body.", HTTPWarning) + elif 100 <= status_code < 200 or status_code == 204: + if content_length != 0: + warn( + "%r responses must have an empty content length." % status_code, + HTTPWarning, + ) + if bytes_sent: + warn( + "%r responses must not have a body." % status_code, HTTPWarning + ) + elif content_length is not None and content_length != bytes_sent: + warn( + "Content-Length and the number of bytes sent to the client do not" + " match.", + WSGIWarning, + ) + + def __del__(self): + if not self.closed: + try: + warn( + "Iterator was garbage collected before it was closed.", WSGIWarning + ) + except Exception: + pass + + +class LintMiddleware(object): + """Warns about common errors in the WSGI and HTTP behavior of the + server and wrapped application. Some of the issues it check are: + + - invalid status codes + - non-bytestrings sent to the WSGI server + - strings returned from the WSGI application + - non-empty conditional responses + - unquoted etags + - relative URLs in the Location header + - unsafe calls to wsgi.input + - unclosed iterators + + Error information is emitted using the :mod:`warnings` module. + + :param app: The WSGI application to wrap. + + .. code-block:: python + + from werkzeug.middleware.lint import LintMiddleware + app = LintMiddleware(app) + """ + + def __init__(self, app): + self.app = app + + def check_environ(self, environ): + if type(environ) is not dict: + warn( + "WSGI environment is not a standard Python dict.", + WSGIWarning, + stacklevel=4, + ) + for key in ( + "REQUEST_METHOD", + "SERVER_NAME", + "SERVER_PORT", + "wsgi.version", + "wsgi.input", + "wsgi.errors", + "wsgi.multithread", + "wsgi.multiprocess", + "wsgi.run_once", + ): + if key not in environ: + warn( + "Required environment key %r not found" % key, + WSGIWarning, + stacklevel=3, + ) + if environ["wsgi.version"] != (1, 0): + warn("Environ is not a WSGI 1.0 environ.", WSGIWarning, stacklevel=3) + + script_name = environ.get("SCRIPT_NAME", "") + path_info = environ.get("PATH_INFO", "") + + if script_name and script_name[0] != "/": + warn( + "'SCRIPT_NAME' does not start with a slash: %r" % script_name, + WSGIWarning, + stacklevel=3, + ) + + if path_info and path_info[0] != "/": + warn( + "'PATH_INFO' does not start with a slash: %r" % path_info, + WSGIWarning, + stacklevel=3, + ) + + def check_start_response(self, status, headers, exc_info): + check_string("status", status) + status_code = status.split(None, 1)[0] + + if len(status_code) != 3 or not status_code.isdigit(): + warn(WSGIWarning("Status code must be three digits"), stacklevel=3) + + if len(status) < 4 or status[3] != " ": + warn( + WSGIWarning( + "Invalid value for status %r. Valid " + "status strings are three digits, a space " + "and a status explanation" + ), + stacklevel=3, + ) + + status_code = int(status_code) + + if status_code < 100: + warn(WSGIWarning("status code < 100 detected"), stacklevel=3) + + if type(headers) is not list: + warn(WSGIWarning("header list is not a list"), stacklevel=3) + + for item in headers: + if type(item) is not tuple or len(item) != 2: + warn(WSGIWarning("Headers must tuple 2-item tuples"), stacklevel=3) + name, value = item + if type(name) is not str or type(value) is not str: + warn(WSGIWarning("header items must be strings"), stacklevel=3) + if name.lower() == "status": + warn( + WSGIWarning( + "The status header is not supported due to " + "conflicts with the CGI spec." + ), + stacklevel=3, + ) + + if exc_info is not None and not isinstance(exc_info, tuple): + warn(WSGIWarning("invalid value for exc_info"), stacklevel=3) + + headers = Headers(headers) + self.check_headers(headers) + + return status_code, headers + + def check_headers(self, headers): + etag = headers.get("etag") + + if etag is not None: + if etag.startswith(("W/", "w/")): + if etag.startswith("w/"): + warn( + HTTPWarning("weak etag indicator should be upcase."), + stacklevel=4, + ) + + etag = etag[2:] + + if not (etag[:1] == etag[-1:] == '"'): + warn(HTTPWarning("unquoted etag emitted."), stacklevel=4) + + location = headers.get("location") + + if location is not None: + if not urlparse(location).netloc: + warn( + HTTPWarning("absolute URLs required for location header"), + stacklevel=4, + ) + + def check_iterator(self, app_iter): + if isinstance(app_iter, string_types): + warn( + "The application returned astring. The response will send one character" + " at a time to the client, which will kill performance. Return a list" + " or iterable instead.", + WSGIWarning, + stacklevel=3, + ) + + def __call__(self, *args, **kwargs): + if len(args) != 2: + warn("A WSGI app takes two arguments.", WSGIWarning, stacklevel=2) + + if kwargs: + warn( + "A WSGI app does not take keyword arguments.", WSGIWarning, stacklevel=2 + ) + + environ, start_response = args + + self.check_environ(environ) + environ["wsgi.input"] = InputStream(environ["wsgi.input"]) + environ["wsgi.errors"] = ErrorStream(environ["wsgi.errors"]) + + # Hook our own file wrapper in so that applications will always + # iterate to the end and we can check the content length. + environ["wsgi.file_wrapper"] = FileWrapper + + headers_set = [] + chunks = [] + + def checking_start_response(*args, **kwargs): + if len(args) not in (2, 3): + warn( + "Invalid number of arguments: %s, expected 2 or 3." % len(args), + WSGIWarning, + stacklevel=2, + ) + + if kwargs: + warn("'start_response' does not take keyword arguments.", WSGIWarning) + + status, headers = args[:2] + + if len(args) == 3: + exc_info = args[2] + else: + exc_info = None + + headers_set[:] = self.check_start_response(status, headers, exc_info) + return GuardedWrite(start_response(status, headers, exc_info), chunks) + + app_iter = self.app(environ, checking_start_response) + self.check_iterator(app_iter) + return GuardedIterator(app_iter, headers_set, chunks) diff --git a/python/werkzeug/middleware/profiler.py b/python/werkzeug/middleware/profiler.py new file mode 100644 index 0000000..32a14d9 --- /dev/null +++ b/python/werkzeug/middleware/profiler.py @@ -0,0 +1,132 @@ +""" +Application Profiler +==================== + +This module provides a middleware that profiles each request with the +:mod:`cProfile` module. This can help identify bottlenecks in your code +that may be slowing down your application. + +.. autoclass:: ProfilerMiddleware + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from __future__ import print_function + +import os.path +import sys +import time +from pstats import Stats + +try: + from cProfile import Profile +except ImportError: + from profile import Profile + + +class ProfilerMiddleware(object): + """Wrap a WSGI application and profile the execution of each + request. Responses are buffered so that timings are more exact. + + If ``stream`` is given, :class:`pstats.Stats` are written to it + after each request. If ``profile_dir`` is given, :mod:`cProfile` + data files are saved to that directory, one file per request. + + The filename can be customized by passing ``filename_format``. If + it is a string, it will be formatted using :meth:`str.format` with + the following fields available: + + - ``{method}`` - The request method; GET, POST, etc. + - ``{path}`` - The request path or 'root' should one not exist. + - ``{elapsed}`` - The elapsed time of the request. + - ``{time}`` - The time of the request. + + If it is a callable, it will be called with the WSGI ``environ`` + dict and should return a filename. + + :param app: The WSGI application to wrap. + :param stream: Write stats to this stream. Disable with ``None``. + :param sort_by: A tuple of columns to sort stats by. See + :meth:`pstats.Stats.sort_stats`. + :param restrictions: A tuple of restrictions to filter stats by. See + :meth:`pstats.Stats.print_stats`. + :param profile_dir: Save profile data files to this directory. + :param filename_format: Format string for profile data file names, + or a callable returning a name. See explanation above. + + .. code-block:: python + + from werkzeug.middleware.profiler import ProfilerMiddleware + app = ProfilerMiddleware(app) + + .. versionchanged:: 0.15 + Stats are written even if ``profile_dir`` is given, and can be + disable by passing ``stream=None``. + + .. versionadded:: 0.15 + Added ``filename_format``. + + .. versionadded:: 0.9 + Added ``restrictions`` and ``profile_dir``. + """ + + def __init__( + self, + app, + stream=sys.stdout, + sort_by=("time", "calls"), + restrictions=(), + profile_dir=None, + filename_format="{method}.{path}.{elapsed:.0f}ms.{time:.0f}.prof", + ): + self._app = app + self._stream = stream + self._sort_by = sort_by + self._restrictions = restrictions + self._profile_dir = profile_dir + self._filename_format = filename_format + + def __call__(self, environ, start_response): + response_body = [] + + def catching_start_response(status, headers, exc_info=None): + start_response(status, headers, exc_info) + return response_body.append + + def runapp(): + app_iter = self._app(environ, catching_start_response) + response_body.extend(app_iter) + + if hasattr(app_iter, "close"): + app_iter.close() + + profile = Profile() + start = time.time() + profile.runcall(runapp) + body = b"".join(response_body) + elapsed = time.time() - start + + if self._profile_dir is not None: + if callable(self._filename_format): + filename = self._filename_format(environ) + else: + filename = self._filename_format.format( + method=environ["REQUEST_METHOD"], + path=( + environ.get("PATH_INFO").strip("/").replace("/", ".") or "root" + ), + elapsed=elapsed * 1000.0, + time=time.time(), + ) + filename = os.path.join(self._profile_dir, filename) + profile.dump_stats(filename) + + if self._stream is not None: + stats = Stats(profile, stream=self._stream) + stats.sort_stats(*self._sort_by) + print("-" * 80, file=self._stream) + print("PATH: {!r}".format(environ.get("PATH_INFO", "")), file=self._stream) + stats.print_stats(*self._restrictions) + print("-" * 80 + "\n", file=self._stream) + + return [body] diff --git a/python/werkzeug/middleware/proxy_fix.py b/python/werkzeug/middleware/proxy_fix.py new file mode 100644 index 0000000..dc1dacc --- /dev/null +++ b/python/werkzeug/middleware/proxy_fix.py @@ -0,0 +1,228 @@ +""" +X-Forwarded-For Proxy Fix +========================= + +This module provides a middleware that adjusts the WSGI environ based on +``X-Forwarded-`` headers that proxies in front of an application may +set. + +When an application is running behind a proxy server, WSGI may see the +request as coming from that server rather than the real client. Proxies +set various headers to track where the request actually came from. + +This middleware should only be applied if the application is actually +behind such a proxy, and should be configured with the number of proxies +that are chained in front of it. Not all proxies set all the headers. +Since incoming headers can be faked, you must set how many proxies are +setting each header so the middleware knows what to trust. + +.. autoclass:: ProxyFix + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import warnings + + +class ProxyFix(object): + """Adjust the WSGI environ based on ``X-Forwarded-`` that proxies in + front of the application may set. + + - ``X-Forwarded-For`` sets ``REMOTE_ADDR``. + - ``X-Forwarded-Proto`` sets ``wsgi.url_scheme``. + - ``X-Forwarded-Host`` sets ``HTTP_HOST``, ``SERVER_NAME``, and + ``SERVER_PORT``. + - ``X-Forwarded-Port`` sets ``HTTP_HOST`` and ``SERVER_PORT``. + - ``X-Forwarded-Prefix`` sets ``SCRIPT_NAME``. + + You must tell the middleware how many proxies set each header so it + knows what values to trust. It is a security issue to trust values + that came from the client rather than a proxy. + + The original values of the headers are stored in the WSGI + environ as ``werkzeug.proxy_fix.orig``, a dict. + + :param app: The WSGI application to wrap. + :param x_for: Number of values to trust for ``X-Forwarded-For``. + :param x_proto: Number of values to trust for ``X-Forwarded-Proto``. + :param x_host: Number of values to trust for ``X-Forwarded-Host``. + :param x_port: Number of values to trust for ``X-Forwarded-Port``. + :param x_prefix: Number of values to trust for + ``X-Forwarded-Prefix``. + :param num_proxies: Deprecated, use ``x_for`` instead. + + .. code-block:: python + + from werkzeug.middleware.proxy_fix import ProxyFix + # App is behind one proxy that sets the -For and -Host headers. + app = ProxyFix(app, x_for=1, x_host=1) + + .. versionchanged:: 0.15 + All headers support multiple values. The ``num_proxies`` + argument is deprecated. Each header is configured with a + separate number of trusted proxies. + + .. versionchanged:: 0.15 + Original WSGI environ values are stored in the + ``werkzeug.proxy_fix.orig`` dict. ``orig_remote_addr``, + ``orig_wsgi_url_scheme``, and ``orig_http_host`` are deprecated + and will be removed in 1.0. + + .. versionchanged:: 0.15 + Support ``X-Forwarded-Port`` and ``X-Forwarded-Prefix``. + + .. versionchanged:: 0.15 + ``X-Fowarded-Host`` and ``X-Forwarded-Port`` modify + ``SERVER_NAME`` and ``SERVER_PORT``. + """ + + def __init__( + self, app, num_proxies=None, x_for=1, x_proto=0, x_host=0, x_port=0, x_prefix=0 + ): + self.app = app + self.x_for = x_for + self.x_proto = x_proto + self.x_host = x_host + self.x_port = x_port + self.x_prefix = x_prefix + self.num_proxies = num_proxies + + @property + def num_proxies(self): + """The number of proxies setting ``X-Forwarded-For`` in front + of the application. + + .. deprecated:: 0.15 + A separate number of trusted proxies is configured for each + header. ``num_proxies`` maps to ``x_for``. This method will + be removed in 1.0. + + :internal: + """ + warnings.warn( + "'num_proxies' is deprecated as of version 0.15 and will be" + " removed in version 1.0. Use 'x_for' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.x_for + + @num_proxies.setter + def num_proxies(self, value): + if value is not None: + warnings.warn( + "'num_proxies' is deprecated as of version 0.15 and" + " will be removed in version 1.0. Use 'x_for' instead.", + DeprecationWarning, + stacklevel=2, + ) + self.x_for = value + + def get_remote_addr(self, forwarded_for): + """Get the real ``remote_addr`` by looking backwards ``x_for`` + number of values in the ``X-Forwarded-For`` header. + + :param forwarded_for: List of values parsed from the + ``X-Forwarded-For`` header. + :return: The real ``remote_addr``, or ``None`` if there were not + at least ``x_for`` values. + + .. deprecated:: 0.15 + This is handled internally for each header. This method will + be removed in 1.0. + + .. versionchanged:: 0.9 + Use ``num_proxies`` instead of always picking the first + value. + + .. versionadded:: 0.8 + """ + warnings.warn( + "'get_remote_addr' is deprecated as of version 0.15 and" + " will be removed in version 1.0. It is now handled" + " internally for each header.", + DeprecationWarning, + ) + return self._get_trusted_comma(self.x_for, ",".join(forwarded_for)) + + def _get_trusted_comma(self, trusted, value): + """Get the real value from a comma-separated header based on the + configured number of trusted proxies. + + :param trusted: Number of values to trust in the header. + :param value: Header value to parse. + :return: The real value, or ``None`` if there are fewer values + than the number of trusted proxies. + + .. versionadded:: 0.15 + """ + if not (trusted and value): + return + values = [x.strip() for x in value.split(",")] + if len(values) >= trusted: + return values[-trusted] + + def __call__(self, environ, start_response): + """Modify the WSGI environ based on the various ``Forwarded`` + headers before calling the wrapped application. Store the + original environ values in ``werkzeug.proxy_fix.orig_{key}``. + """ + environ_get = environ.get + orig_remote_addr = environ_get("REMOTE_ADDR") + orig_wsgi_url_scheme = environ_get("wsgi.url_scheme") + orig_http_host = environ_get("HTTP_HOST") + environ.update( + { + "werkzeug.proxy_fix.orig": { + "REMOTE_ADDR": orig_remote_addr, + "wsgi.url_scheme": orig_wsgi_url_scheme, + "HTTP_HOST": orig_http_host, + "SERVER_NAME": environ_get("SERVER_NAME"), + "SERVER_PORT": environ_get("SERVER_PORT"), + "SCRIPT_NAME": environ_get("SCRIPT_NAME"), + }, + # todo: remove deprecated keys + "werkzeug.proxy_fix.orig_remote_addr": orig_remote_addr, + "werkzeug.proxy_fix.orig_wsgi_url_scheme": orig_wsgi_url_scheme, + "werkzeug.proxy_fix.orig_http_host": orig_http_host, + } + ) + + x_for = self._get_trusted_comma(self.x_for, environ_get("HTTP_X_FORWARDED_FOR")) + if x_for: + environ["REMOTE_ADDR"] = x_for + + x_proto = self._get_trusted_comma( + self.x_proto, environ_get("HTTP_X_FORWARDED_PROTO") + ) + if x_proto: + environ["wsgi.url_scheme"] = x_proto + + x_host = self._get_trusted_comma( + self.x_host, environ_get("HTTP_X_FORWARDED_HOST") + ) + if x_host: + environ["HTTP_HOST"] = x_host + parts = x_host.split(":", 1) + environ["SERVER_NAME"] = parts[0] + if len(parts) == 2: + environ["SERVER_PORT"] = parts[1] + + x_port = self._get_trusted_comma( + self.x_port, environ_get("HTTP_X_FORWARDED_PORT") + ) + if x_port: + host = environ.get("HTTP_HOST") + if host: + parts = host.split(":", 1) + host = parts[0] if len(parts) == 2 else host + environ["HTTP_HOST"] = "%s:%s" % (host, x_port) + environ["SERVER_PORT"] = x_port + + x_prefix = self._get_trusted_comma( + self.x_prefix, environ_get("HTTP_X_FORWARDED_PREFIX") + ) + if x_prefix: + environ["SCRIPT_NAME"] = x_prefix + + return self.app(environ, start_response) diff --git a/python/werkzeug/middleware/shared_data.py b/python/werkzeug/middleware/shared_data.py new file mode 100644 index 0000000..a902281 --- /dev/null +++ b/python/werkzeug/middleware/shared_data.py @@ -0,0 +1,260 @@ +""" +Serve Shared Static Files +========================= + +.. autoclass:: SharedDataMiddleware + :members: is_allowed + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +import mimetypes +import os +import posixpath +from datetime import datetime +from io import BytesIO +from time import mktime +from time import time +from zlib import adler32 + +from .._compat import PY2 +from .._compat import string_types +from ..filesystem import get_filesystem_encoding +from ..http import http_date +from ..http import is_resource_modified +from ..wsgi import get_path_info +from ..wsgi import wrap_file + + +class SharedDataMiddleware(object): + + """A WSGI middleware that provides static content for development + environments or simple server setups. Usage is quite simple:: + + import os + from werkzeug.wsgi import SharedDataMiddleware + + app = SharedDataMiddleware(app, { + '/static': os.path.join(os.path.dirname(__file__), 'static') + }) + + The contents of the folder ``./shared`` will now be available on + ``http://example.com/shared/``. This is pretty useful during development + because a standalone media server is not required. One can also mount + files on the root folder and still continue to use the application because + the shared data middleware forwards all unhandled requests to the + application, even if the requests are below one of the shared folders. + + If `pkg_resources` is available you can also tell the middleware to serve + files from package data:: + + app = SharedDataMiddleware(app, { + '/static': ('myapplication', 'static') + }) + + This will then serve the ``static`` folder in the `myapplication` + Python package. + + The optional `disallow` parameter can be a list of :func:`~fnmatch.fnmatch` + rules for files that are not accessible from the web. If `cache` is set to + `False` no caching headers are sent. + + Currently the middleware does not support non ASCII filenames. If the + encoding on the file system happens to be the encoding of the URI it may + work but this could also be by accident. We strongly suggest using ASCII + only file names for static files. + + The middleware will guess the mimetype using the Python `mimetype` + module. If it's unable to figure out the charset it will fall back + to `fallback_mimetype`. + + .. versionchanged:: 0.5 + The cache timeout is configurable now. + + .. versionadded:: 0.6 + The `fallback_mimetype` parameter was added. + + :param app: the application to wrap. If you don't want to wrap an + application you can pass it :exc:`NotFound`. + :param exports: a list or dict of exported files and folders. + :param disallow: a list of :func:`~fnmatch.fnmatch` rules. + :param fallback_mimetype: the fallback mimetype for unknown files. + :param cache: enable or disable caching headers. + :param cache_timeout: the cache timeout in seconds for the headers. + """ + + def __init__( + self, + app, + exports, + disallow=None, + cache=True, + cache_timeout=60 * 60 * 12, + fallback_mimetype="text/plain", + ): + self.app = app + self.exports = [] + self.cache = cache + self.cache_timeout = cache_timeout + + if hasattr(exports, "items"): + exports = exports.items() + + for key, value in exports: + if isinstance(value, tuple): + loader = self.get_package_loader(*value) + elif isinstance(value, string_types): + if os.path.isfile(value): + loader = self.get_file_loader(value) + else: + loader = self.get_directory_loader(value) + else: + raise TypeError("unknown def %r" % value) + + self.exports.append((key, loader)) + + if disallow is not None: + from fnmatch import fnmatch + + self.is_allowed = lambda x: not fnmatch(x, disallow) + + self.fallback_mimetype = fallback_mimetype + + def is_allowed(self, filename): + """Subclasses can override this method to disallow the access to + certain files. However by providing `disallow` in the constructor + this method is overwritten. + """ + return True + + def _opener(self, filename): + return lambda: ( + open(filename, "rb"), + datetime.utcfromtimestamp(os.path.getmtime(filename)), + int(os.path.getsize(filename)), + ) + + def get_file_loader(self, filename): + return lambda x: (os.path.basename(filename), self._opener(filename)) + + def get_package_loader(self, package, package_path): + from pkg_resources import DefaultProvider, ResourceManager, get_provider + + loadtime = datetime.utcnow() + provider = get_provider(package) + manager = ResourceManager() + filesystem_bound = isinstance(provider, DefaultProvider) + + def loader(path): + if path is None: + return None, None + + path = posixpath.join(package_path, path) + + if not provider.has_resource(path): + return None, None + + basename = posixpath.basename(path) + + if filesystem_bound: + return ( + basename, + self._opener(provider.get_resource_filename(manager, path)), + ) + + s = provider.get_resource_string(manager, path) + return basename, lambda: (BytesIO(s), loadtime, len(s)) + + return loader + + def get_directory_loader(self, directory): + def loader(path): + if path is not None: + path = os.path.join(directory, path) + else: + path = directory + + if os.path.isfile(path): + return os.path.basename(path), self._opener(path) + + return None, None + + return loader + + def generate_etag(self, mtime, file_size, real_filename): + if not isinstance(real_filename, bytes): + real_filename = real_filename.encode(get_filesystem_encoding()) + + return "wzsdm-%d-%s-%s" % ( + mktime(mtime.timetuple()), + file_size, + adler32(real_filename) & 0xFFFFFFFF, + ) + + def __call__(self, environ, start_response): + cleaned_path = get_path_info(environ) + + if PY2: + cleaned_path = cleaned_path.encode(get_filesystem_encoding()) + + # sanitize the path for non unix systems + cleaned_path = cleaned_path.strip("/") + + for sep in os.sep, os.altsep: + if sep and sep != "/": + cleaned_path = cleaned_path.replace(sep, "/") + + path = "/" + "/".join(x for x in cleaned_path.split("/") if x and x != "..") + file_loader = None + + for search_path, loader in self.exports: + if search_path == path: + real_filename, file_loader = loader(None) + + if file_loader is not None: + break + + if not search_path.endswith("/"): + search_path += "/" + + if path.startswith(search_path): + real_filename, file_loader = loader(path[len(search_path) :]) + + if file_loader is not None: + break + + if file_loader is None or not self.is_allowed(real_filename): + return self.app(environ, start_response) + + guessed_type = mimetypes.guess_type(real_filename) + mime_type = guessed_type[0] or self.fallback_mimetype + f, mtime, file_size = file_loader() + + headers = [("Date", http_date())] + + if self.cache: + timeout = self.cache_timeout + etag = self.generate_etag(mtime, file_size, real_filename) + headers += [ + ("Etag", '"%s"' % etag), + ("Cache-Control", "max-age=%d, public" % timeout), + ] + + if not is_resource_modified(environ, etag, last_modified=mtime): + f.close() + start_response("304 Not Modified", headers) + return [] + + headers.append(("Expires", http_date(time() + timeout))) + else: + headers.append(("Cache-Control", "public")) + + headers.extend( + ( + ("Content-Type", mime_type), + ("Content-Length", str(file_size)), + ("Last-Modified", http_date(mtime)), + ) + ) + start_response("200 OK", headers) + return wrap_file(environ, f) diff --git a/python/werkzeug/posixemulation.py b/python/werkzeug/posixemulation.py new file mode 100644 index 0000000..696b456 --- /dev/null +++ b/python/werkzeug/posixemulation.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +r""" + werkzeug.posixemulation + ~~~~~~~~~~~~~~~~~~~~~~~ + + Provides a POSIX emulation for some features that are relevant to + web applications. The main purpose is to simplify support for + systems such as Windows NT that are not 100% POSIX compatible. + + Currently this only implements a :func:`rename` function that + follows POSIX semantics. Eg: if the target file already exists it + will be replaced without asking. + + This module was introduced in 0.6.1 and is not a public interface. + It might become one in later versions of Werkzeug. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import errno +import os +import random +import sys +import time + +from ._compat import to_unicode +from .filesystem import get_filesystem_encoding + +can_rename_open_file = False + +if os.name == "nt": + try: + import ctypes + + _MOVEFILE_REPLACE_EXISTING = 0x1 + _MOVEFILE_WRITE_THROUGH = 0x8 + _MoveFileEx = ctypes.windll.kernel32.MoveFileExW + + def _rename(src, dst): + src = to_unicode(src, get_filesystem_encoding()) + dst = to_unicode(dst, get_filesystem_encoding()) + if _rename_atomic(src, dst): + return True + retry = 0 + rv = False + while not rv and retry < 100: + rv = _MoveFileEx( + src, dst, _MOVEFILE_REPLACE_EXISTING | _MOVEFILE_WRITE_THROUGH + ) + if not rv: + time.sleep(0.001) + retry += 1 + return rv + + # new in Vista and Windows Server 2008 + _CreateTransaction = ctypes.windll.ktmw32.CreateTransaction + _CommitTransaction = ctypes.windll.ktmw32.CommitTransaction + _MoveFileTransacted = ctypes.windll.kernel32.MoveFileTransactedW + _CloseHandle = ctypes.windll.kernel32.CloseHandle + can_rename_open_file = True + + def _rename_atomic(src, dst): + ta = _CreateTransaction(None, 0, 0, 0, 0, 1000, "Werkzeug rename") + if ta == -1: + return False + try: + retry = 0 + rv = False + while not rv and retry < 100: + rv = _MoveFileTransacted( + src, + dst, + None, + None, + _MOVEFILE_REPLACE_EXISTING | _MOVEFILE_WRITE_THROUGH, + ta, + ) + if rv: + rv = _CommitTransaction(ta) + break + else: + time.sleep(0.001) + retry += 1 + return rv + finally: + _CloseHandle(ta) + + except Exception: + + def _rename(src, dst): + return False + + def _rename_atomic(src, dst): + return False + + def rename(src, dst): + # Try atomic or pseudo-atomic rename + if _rename(src, dst): + return + # Fall back to "move away and replace" + try: + os.rename(src, dst) + except OSError as e: + if e.errno != errno.EEXIST: + raise + old = "%s-%08x" % (dst, random.randint(0, sys.maxsize)) + os.rename(dst, old) + os.rename(src, dst) + try: + os.unlink(old) + except Exception: + pass + + +else: + rename = os.rename + can_rename_open_file = True diff --git a/python/werkzeug/routing.py b/python/werkzeug/routing.py new file mode 100644 index 0000000..6b1dd98 --- /dev/null +++ b/python/werkzeug/routing.py @@ -0,0 +1,2026 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.routing + ~~~~~~~~~~~~~~~~ + + When it comes to combining multiple controller or view functions (however + you want to call them) you need a dispatcher. A simple way would be + applying regular expression tests on the ``PATH_INFO`` and calling + registered callback functions that return the value then. + + This module implements a much more powerful system than simple regular + expression matching because it can also convert values in the URLs and + build URLs. + + Here a simple example that creates an URL map for an application with + two subdomains (www and kb) and some URL rules: + + >>> m = Map([ + ... # Static URLs + ... Rule('/', endpoint='static/index'), + ... Rule('/about', endpoint='static/about'), + ... Rule('/help', endpoint='static/help'), + ... # Knowledge Base + ... Subdomain('kb', [ + ... Rule('/', endpoint='kb/index'), + ... Rule('/browse/', endpoint='kb/browse'), + ... Rule('/browse/<int:id>/', endpoint='kb/browse'), + ... Rule('/browse/<int:id>/<int:page>', endpoint='kb/browse') + ... ]) + ... ], default_subdomain='www') + + If the application doesn't use subdomains it's perfectly fine to not set + the default subdomain and not use the `Subdomain` rule factory. The endpoint + in the rules can be anything, for example import paths or unique + identifiers. The WSGI application can use those endpoints to get the + handler for that URL. It doesn't have to be a string at all but it's + recommended. + + Now it's possible to create a URL adapter for one of the subdomains and + build URLs: + + >>> c = m.bind('example.com') + >>> c.build("kb/browse", dict(id=42)) + 'http://kb.example.com/browse/42/' + >>> c.build("kb/browse", dict()) + 'http://kb.example.com/browse/' + >>> c.build("kb/browse", dict(id=42, page=3)) + 'http://kb.example.com/browse/42/3' + >>> c.build("static/about") + '/about' + >>> c.build("static/index", force_external=True) + 'http://www.example.com/' + + >>> c = m.bind('example.com', subdomain='kb') + >>> c.build("static/about") + 'http://www.example.com/about' + + The first argument to bind is the server name *without* the subdomain. + Per default it will assume that the script is mounted on the root, but + often that's not the case so you can provide the real mount point as + second argument: + + >>> c = m.bind('example.com', '/applications/example') + + The third argument can be the subdomain, if not given the default + subdomain is used. For more details about binding have a look at the + documentation of the `MapAdapter`. + + And here is how you can match URLs: + + >>> c = m.bind('example.com') + >>> c.match("/") + ('static/index', {}) + >>> c.match("/about") + ('static/about', {}) + >>> c = m.bind('example.com', '/', 'kb') + >>> c.match("/") + ('kb/index', {}) + >>> c.match("/browse/42/23") + ('kb/browse', {'id': 42, 'page': 23}) + + If matching fails you get a `NotFound` exception, if the rule thinks + it's a good idea to redirect (for example because the URL was defined + to have a slash at the end but the request was missing that slash) it + will raise a `RequestRedirect` exception. Both are subclasses of the + `HTTPException` so you can use those errors as responses in the + application. + + If matching succeeded but the URL rule was incompatible to the given + method (for example there were only rules for `GET` and `HEAD` and + routing system tried to match a `POST` request) a `MethodNotAllowed` + exception is raised. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import ast +import difflib +import posixpath +import re +import uuid +from pprint import pformat +from threading import Lock + +from ._compat import implements_to_string +from ._compat import iteritems +from ._compat import itervalues +from ._compat import native_string_result +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_unicode +from ._compat import wsgi_decoding_dance +from ._internal import _encode_idna +from ._internal import _get_environ +from .datastructures import ImmutableDict +from .datastructures import MultiDict +from .exceptions import BadHost +from .exceptions import HTTPException +from .exceptions import MethodNotAllowed +from .exceptions import NotFound +from .urls import _fast_url_quote +from .urls import url_encode +from .urls import url_join +from .urls import url_quote +from .utils import cached_property +from .utils import format_string +from .utils import redirect +from .wsgi import get_host + +_rule_re = re.compile( + r""" + (?P<static>[^<]*) # static rule data + < + (?: + (?P<converter>[a-zA-Z_][a-zA-Z0-9_]*) # converter name + (?:\((?P<args>.*?)\))? # converter arguments + \: # variable delimiter + )? + (?P<variable>[a-zA-Z_][a-zA-Z0-9_]*) # variable name + > + """, + re.VERBOSE, +) +_simple_rule_re = re.compile(r"<([^>]+)>") +_converter_args_re = re.compile( + r""" + ((?P<name>\w+)\s*=\s*)? + (?P<value> + True|False| + \d+.\d+| + \d+.| + \d+| + [\w\d_.]+| + [urUR]?(?P<stringval>"[^"]*?"|'[^']*') + )\s*, + """, + re.VERBOSE | re.UNICODE, +) + + +_PYTHON_CONSTANTS = {"None": None, "True": True, "False": False} + + +def _pythonize(value): + if value in _PYTHON_CONSTANTS: + return _PYTHON_CONSTANTS[value] + for convert in int, float: + try: + return convert(value) + except ValueError: + pass + if value[:1] == value[-1:] and value[0] in "\"'": + value = value[1:-1] + return text_type(value) + + +def parse_converter_args(argstr): + argstr += "," + args = [] + kwargs = {} + + for item in _converter_args_re.finditer(argstr): + value = item.group("stringval") + if value is None: + value = item.group("value") + value = _pythonize(value) + if not item.group("name"): + args.append(value) + else: + name = item.group("name") + kwargs[name] = value + + return tuple(args), kwargs + + +def parse_rule(rule): + """Parse a rule and return it as generator. Each iteration yields tuples + in the form ``(converter, arguments, variable)``. If the converter is + `None` it's a static url part, otherwise it's a dynamic one. + + :internal: + """ + pos = 0 + end = len(rule) + do_match = _rule_re.match + used_names = set() + while pos < end: + m = do_match(rule, pos) + if m is None: + break + data = m.groupdict() + if data["static"]: + yield None, None, data["static"] + variable = data["variable"] + converter = data["converter"] or "default" + if variable in used_names: + raise ValueError("variable name %r used twice." % variable) + used_names.add(variable) + yield converter, data["args"] or None, variable + pos = m.end() + if pos < end: + remaining = rule[pos:] + if ">" in remaining or "<" in remaining: + raise ValueError("malformed url rule: %r" % rule) + yield None, None, remaining + + +class RoutingException(Exception): + """Special exceptions that require the application to redirect, notifying + about missing urls, etc. + + :internal: + """ + + +class RequestRedirect(HTTPException, RoutingException): + """Raise if the map requests a redirect. This is for example the case if + `strict_slashes` are activated and an url that requires a trailing slash. + + The attribute `new_url` contains the absolute destination url. + """ + + code = 308 + + def __init__(self, new_url): + RoutingException.__init__(self, new_url) + self.new_url = new_url + + def get_response(self, environ): + return redirect(self.new_url, self.code) + + +class RequestSlash(RoutingException): + """Internal exception.""" + + +class RequestAliasRedirect(RoutingException): # noqa: B903 + """This rule is an alias and wants to redirect to the canonical URL.""" + + def __init__(self, matched_values): + self.matched_values = matched_values + + +@implements_to_string +class BuildError(RoutingException, LookupError): + """Raised if the build system cannot find a URL for an endpoint with the + values provided. + """ + + def __init__(self, endpoint, values, method, adapter=None): + LookupError.__init__(self, endpoint, values, method) + self.endpoint = endpoint + self.values = values + self.method = method + self.adapter = adapter + + @cached_property + def suggested(self): + return self.closest_rule(self.adapter) + + def closest_rule(self, adapter): + def _score_rule(rule): + return sum( + [ + 0.98 + * difflib.SequenceMatcher( + None, rule.endpoint, self.endpoint + ).ratio(), + 0.01 * bool(set(self.values or ()).issubset(rule.arguments)), + 0.01 * bool(rule.methods and self.method in rule.methods), + ] + ) + + if adapter and adapter.map._rules: + return max(adapter.map._rules, key=_score_rule) + + def __str__(self): + message = [] + message.append("Could not build url for endpoint %r" % self.endpoint) + if self.method: + message.append(" (%r)" % self.method) + if self.values: + message.append(" with values %r" % sorted(self.values.keys())) + message.append(".") + if self.suggested: + if self.endpoint == self.suggested.endpoint: + if self.method and self.method not in self.suggested.methods: + message.append( + " Did you mean to use methods %r?" + % sorted(self.suggested.methods) + ) + missing_values = self.suggested.arguments.union( + set(self.suggested.defaults or ()) + ) - set(self.values.keys()) + if missing_values: + message.append( + " Did you forget to specify values %r?" % sorted(missing_values) + ) + else: + message.append(" Did you mean %r instead?" % self.suggested.endpoint) + return u"".join(message) + + +class ValidationError(ValueError): + """Validation error. If a rule converter raises this exception the rule + does not match the current URL and the next URL is tried. + """ + + +class RuleFactory(object): + """As soon as you have more complex URL setups it's a good idea to use rule + factories to avoid repetitive tasks. Some of them are builtin, others can + be added by subclassing `RuleFactory` and overriding `get_rules`. + """ + + def get_rules(self, map): + """Subclasses of `RuleFactory` have to override this method and return + an iterable of rules.""" + raise NotImplementedError() + + +class Subdomain(RuleFactory): + """All URLs provided by this factory have the subdomain set to a + specific domain. For example if you want to use the subdomain for + the current language this can be a good setup:: + + url_map = Map([ + Rule('/', endpoint='#select_language'), + Subdomain('<string(length=2):lang_code>', [ + Rule('/', endpoint='index'), + Rule('/about', endpoint='about'), + Rule('/help', endpoint='help') + ]) + ]) + + All the rules except for the ``'#select_language'`` endpoint will now + listen on a two letter long subdomain that holds the language code + for the current request. + """ + + def __init__(self, subdomain, rules): + self.subdomain = subdomain + self.rules = rules + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.subdomain = self.subdomain + yield rule + + +class Submount(RuleFactory): + """Like `Subdomain` but prefixes the URL rule with a given string:: + + url_map = Map([ + Rule('/', endpoint='index'), + Submount('/blog', [ + Rule('/', endpoint='blog/index'), + Rule('/entry/<entry_slug>', endpoint='blog/show') + ]) + ]) + + Now the rule ``'blog/show'`` matches ``/blog/entry/<entry_slug>``. + """ + + def __init__(self, path, rules): + self.path = path.rstrip("/") + self.rules = rules + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.rule = self.path + rule.rule + yield rule + + +class EndpointPrefix(RuleFactory): + """Prefixes all endpoints (which must be strings for this factory) with + another string. This can be useful for sub applications:: + + url_map = Map([ + Rule('/', endpoint='index'), + EndpointPrefix('blog/', [Submount('/blog', [ + Rule('/', endpoint='index'), + Rule('/entry/<entry_slug>', endpoint='show') + ])]) + ]) + """ + + def __init__(self, prefix, rules): + self.prefix = prefix + self.rules = rules + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + rule = rule.empty() + rule.endpoint = self.prefix + rule.endpoint + yield rule + + +class RuleTemplate(object): + """Returns copies of the rules wrapped and expands string templates in + the endpoint, rule, defaults or subdomain sections. + + Here a small example for such a rule template:: + + from werkzeug.routing import Map, Rule, RuleTemplate + + resource = RuleTemplate([ + Rule('/$name/', endpoint='$name.list'), + Rule('/$name/<int:id>', endpoint='$name.show') + ]) + + url_map = Map([resource(name='user'), resource(name='page')]) + + When a rule template is called the keyword arguments are used to + replace the placeholders in all the string parameters. + """ + + def __init__(self, rules): + self.rules = list(rules) + + def __call__(self, *args, **kwargs): + return RuleTemplateFactory(self.rules, dict(*args, **kwargs)) + + +class RuleTemplateFactory(RuleFactory): + """A factory that fills in template variables into rules. Used by + `RuleTemplate` internally. + + :internal: + """ + + def __init__(self, rules, context): + self.rules = rules + self.context = context + + def get_rules(self, map): + for rulefactory in self.rules: + for rule in rulefactory.get_rules(map): + new_defaults = subdomain = None + if rule.defaults: + new_defaults = {} + for key, value in iteritems(rule.defaults): + if isinstance(value, string_types): + value = format_string(value, self.context) + new_defaults[key] = value + if rule.subdomain is not None: + subdomain = format_string(rule.subdomain, self.context) + new_endpoint = rule.endpoint + if isinstance(new_endpoint, string_types): + new_endpoint = format_string(new_endpoint, self.context) + yield Rule( + format_string(rule.rule, self.context), + new_defaults, + subdomain, + rule.methods, + rule.build_only, + new_endpoint, + rule.strict_slashes, + ) + + +def _prefix_names(src): + """ast parse and prefix names with `.` to avoid collision with user vars""" + tree = ast.parse(src).body[0] + if isinstance(tree, ast.Expr): + tree = tree.value + for node in ast.walk(tree): + if isinstance(node, ast.Name): + node.id = "." + node.id + return tree + + +_CALL_CONVERTER_CODE_FMT = "self._converters[{elem!r}].to_url()" +_IF_KWARGS_URL_ENCODE_CODE = """\ +if kwargs: + q = '?' + params = self._encode_query_vars(kwargs) +else: + q = params = '' +""" +_IF_KWARGS_URL_ENCODE_AST = _prefix_names(_IF_KWARGS_URL_ENCODE_CODE) +_URL_ENCODE_AST_NAMES = (_prefix_names("q"), _prefix_names("params")) + + +@implements_to_string +class Rule(RuleFactory): + """A Rule represents one URL pattern. There are some options for `Rule` + that change the way it behaves and are passed to the `Rule` constructor. + Note that besides the rule-string all arguments *must* be keyword arguments + in order to not break the application on Werkzeug upgrades. + + `string` + Rule strings basically are just normal URL paths with placeholders in + the format ``<converter(arguments):name>`` where the converter and the + arguments are optional. If no converter is defined the `default` + converter is used which means `string` in the normal configuration. + + URL rules that end with a slash are branch URLs, others are leaves. + If you have `strict_slashes` enabled (which is the default), all + branch URLs that are matched without a trailing slash will trigger a + redirect to the same URL with the missing slash appended. + + The converters are defined on the `Map`. + + `endpoint` + The endpoint for this rule. This can be anything. A reference to a + function, a string, a number etc. The preferred way is using a string + because the endpoint is used for URL generation. + + `defaults` + An optional dict with defaults for other rules with the same endpoint. + This is a bit tricky but useful if you want to have unique URLs:: + + url_map = Map([ + Rule('/all/', defaults={'page': 1}, endpoint='all_entries'), + Rule('/all/page/<int:page>', endpoint='all_entries') + ]) + + If a user now visits ``http://example.com/all/page/1`` he will be + redirected to ``http://example.com/all/``. If `redirect_defaults` is + disabled on the `Map` instance this will only affect the URL + generation. + + `subdomain` + The subdomain rule string for this rule. If not specified the rule + only matches for the `default_subdomain` of the map. If the map is + not bound to a subdomain this feature is disabled. + + Can be useful if you want to have user profiles on different subdomains + and all subdomains are forwarded to your application:: + + url_map = Map([ + Rule('/', subdomain='<username>', endpoint='user/homepage'), + Rule('/stats', subdomain='<username>', endpoint='user/stats') + ]) + + `methods` + A sequence of http methods this rule applies to. If not specified, all + methods are allowed. For example this can be useful if you want different + endpoints for `POST` and `GET`. If methods are defined and the path + matches but the method matched against is not in this list or in the + list of another rule for that path the error raised is of the type + `MethodNotAllowed` rather than `NotFound`. If `GET` is present in the + list of methods and `HEAD` is not, `HEAD` is added automatically. + + .. versionchanged:: 0.6.1 + `HEAD` is now automatically added to the methods if `GET` is + present. The reason for this is that existing code often did not + work properly in servers not rewriting `HEAD` to `GET` + automatically and it was not documented how `HEAD` should be + treated. This was considered a bug in Werkzeug because of that. + + `strict_slashes` + Override the `Map` setting for `strict_slashes` only for this rule. If + not specified the `Map` setting is used. + + `build_only` + Set this to True and the rule will never match but will create a URL + that can be build. This is useful if you have resources on a subdomain + or folder that are not handled by the WSGI application (like static data) + + `redirect_to` + If given this must be either a string or callable. In case of a + callable it's called with the url adapter that triggered the match and + the values of the URL as keyword arguments and has to return the target + for the redirect, otherwise it has to be a string with placeholders in + rule syntax:: + + def foo_with_slug(adapter, id): + # ask the database for the slug for the old id. this of + # course has nothing to do with werkzeug. + return 'foo/' + Foo.get_slug_for_id(id) + + url_map = Map([ + Rule('/foo/<slug>', endpoint='foo'), + Rule('/some/old/url/<slug>', redirect_to='foo/<slug>'), + Rule('/other/old/url/<int:id>', redirect_to=foo_with_slug) + ]) + + When the rule is matched the routing system will raise a + `RequestRedirect` exception with the target for the redirect. + + Keep in mind that the URL will be joined against the URL root of the + script so don't use a leading slash on the target URL unless you + really mean root of that domain. + + `alias` + If enabled this rule serves as an alias for another rule with the same + endpoint and arguments. + + `host` + If provided and the URL map has host matching enabled this can be + used to provide a match rule for the whole host. This also means + that the subdomain feature is disabled. + + .. versionadded:: 0.7 + The `alias` and `host` parameters were added. + """ + + def __init__( + self, + string, + defaults=None, + subdomain=None, + methods=None, + build_only=False, + endpoint=None, + strict_slashes=None, + redirect_to=None, + alias=False, + host=None, + ): + if not string.startswith("/"): + raise ValueError("urls must start with a leading slash") + self.rule = string + self.is_leaf = not string.endswith("/") + + self.map = None + self.strict_slashes = strict_slashes + self.subdomain = subdomain + self.host = host + self.defaults = defaults + self.build_only = build_only + self.alias = alias + if methods is None: + self.methods = None + else: + if isinstance(methods, str): + raise TypeError("param `methods` should be `Iterable[str]`, not `str`") + self.methods = set([x.upper() for x in methods]) + if "HEAD" not in self.methods and "GET" in self.methods: + self.methods.add("HEAD") + self.endpoint = endpoint + self.redirect_to = redirect_to + + if defaults: + self.arguments = set(map(str, defaults)) + else: + self.arguments = set() + self._trace = self._converters = self._regex = self._argument_weights = None + + def empty(self): + """ + Return an unbound copy of this rule. + + This can be useful if want to reuse an already bound URL for another + map. See ``get_empty_kwargs`` to override what keyword arguments are + provided to the new copy. + """ + return type(self)(self.rule, **self.get_empty_kwargs()) + + def get_empty_kwargs(self): + """ + Provides kwargs for instantiating empty copy with empty() + + Use this method to provide custom keyword arguments to the subclass of + ``Rule`` when calling ``some_rule.empty()``. Helpful when the subclass + has custom keyword arguments that are needed at instantiation. + + Must return a ``dict`` that will be provided as kwargs to the new + instance of ``Rule``, following the initial ``self.rule`` value which + is always provided as the first, required positional argument. + """ + defaults = None + if self.defaults: + defaults = dict(self.defaults) + return dict( + defaults=defaults, + subdomain=self.subdomain, + methods=self.methods, + build_only=self.build_only, + endpoint=self.endpoint, + strict_slashes=self.strict_slashes, + redirect_to=self.redirect_to, + alias=self.alias, + host=self.host, + ) + + def get_rules(self, map): + yield self + + def refresh(self): + """Rebinds and refreshes the URL. Call this if you modified the + rule in place. + + :internal: + """ + self.bind(self.map, rebind=True) + + def bind(self, map, rebind=False): + """Bind the url to a map and create a regular expression based on + the information from the rule itself and the defaults from the map. + + :internal: + """ + if self.map is not None and not rebind: + raise RuntimeError("url rule %r already bound to map %r" % (self, self.map)) + self.map = map + if self.strict_slashes is None: + self.strict_slashes = map.strict_slashes + if self.subdomain is None: + self.subdomain = map.default_subdomain + self.compile() + + def get_converter(self, variable_name, converter_name, args, kwargs): + """Looks up the converter for the given parameter. + + .. versionadded:: 0.9 + """ + if converter_name not in self.map.converters: + raise LookupError("the converter %r does not exist" % converter_name) + return self.map.converters[converter_name](self.map, *args, **kwargs) + + def _encode_query_vars(self, query_vars): + return url_encode( + query_vars, + charset=self.map.charset, + sort=self.map.sort_parameters, + key=self.map.sort_key, + ) + + def compile(self): + """Compiles the regular expression and stores it.""" + assert self.map is not None, "rule not bound" + + if self.map.host_matching: + domain_rule = self.host or "" + else: + domain_rule = self.subdomain or "" + + self._trace = [] + self._converters = {} + self._static_weights = [] + self._argument_weights = [] + regex_parts = [] + + def _build_regex(rule): + index = 0 + for converter, arguments, variable in parse_rule(rule): + if converter is None: + regex_parts.append(re.escape(variable)) + self._trace.append((False, variable)) + for part in variable.split("/"): + if part: + self._static_weights.append((index, -len(part))) + else: + if arguments: + c_args, c_kwargs = parse_converter_args(arguments) + else: + c_args = () + c_kwargs = {} + convobj = self.get_converter(variable, converter, c_args, c_kwargs) + regex_parts.append("(?P<%s>%s)" % (variable, convobj.regex)) + self._converters[variable] = convobj + self._trace.append((True, variable)) + self._argument_weights.append(convobj.weight) + self.arguments.add(str(variable)) + index = index + 1 + + _build_regex(domain_rule) + regex_parts.append("\\|") + self._trace.append((False, "|")) + _build_regex(self.rule if self.is_leaf else self.rule.rstrip("/")) + if not self.is_leaf: + self._trace.append((False, "/")) + + self._build = self._compile_builder(False).__get__(self, None) + self._build_unknown = self._compile_builder(True).__get__(self, None) + + if self.build_only: + return + regex = r"^%s%s$" % ( + u"".join(regex_parts), + (not self.is_leaf or not self.strict_slashes) + and "(?<!/)(?P<__suffix__>/?)" + or "", + ) + self._regex = re.compile(regex, re.UNICODE) + + def match(self, path, method=None): + """Check if the rule matches a given path. Path is a string in the + form ``"subdomain|/path"`` and is assembled by the map. If + the map is doing host matching the subdomain part will be the host + instead. + + If the rule matches a dict with the converted values is returned, + otherwise the return value is `None`. + + :internal: + """ + if not self.build_only: + m = self._regex.search(path) + if m is not None: + groups = m.groupdict() + # we have a folder like part of the url without a trailing + # slash and strict slashes enabled. raise an exception that + # tells the map to redirect to the same url but with a + # trailing slash + if ( + self.strict_slashes + and not self.is_leaf + and not groups.pop("__suffix__") + and ( + method is None or self.methods is None or method in self.methods + ) + ): + raise RequestSlash() + # if we are not in strict slashes mode we have to remove + # a __suffix__ + elif not self.strict_slashes: + del groups["__suffix__"] + + result = {} + for name, value in iteritems(groups): + try: + value = self._converters[name].to_python(value) + except ValidationError: + return + result[str(name)] = value + if self.defaults: + result.update(self.defaults) + + if self.alias and self.map.redirect_defaults: + raise RequestAliasRedirect(result) + + return result + + @staticmethod + def _get_func_code(code, name): + globs, locs = {}, {} + exec(code, globs, locs) + return locs[name] + + def _compile_builder(self, append_unknown=True): + defaults = self.defaults or {} + dom_ops = [] + url_ops = [] + + opl = dom_ops + for is_dynamic, data in self._trace: + if data == "|" and opl is dom_ops: + opl = url_ops + continue + # this seems like a silly case to ever come up but: + # if a default is given for a value that appears in the rule, + # resolve it to a constant ahead of time + if is_dynamic and data in defaults: + data = self._converters[data].to_url(defaults[data]) + opl.append((False, data)) + elif not is_dynamic: + opl.append( + (False, url_quote(to_bytes(data, self.map.charset), safe="/:|+")) + ) + else: + opl.append((True, data)) + + def _convert(elem): + ret = _prefix_names(_CALL_CONVERTER_CODE_FMT.format(elem=elem)) + ret.args = [ast.Name(str(elem), ast.Load())] # str for py2 + return ret + + def _parts(ops): + parts = [ + _convert(elem) if is_dynamic else ast.Str(s=elem) + for is_dynamic, elem in ops + ] + parts = parts or [ast.Str("")] + # constant fold + ret = [parts[0]] + for p in parts[1:]: + if isinstance(p, ast.Str) and isinstance(ret[-1], ast.Str): + ret[-1] = ast.Str(ret[-1].s + p.s) + else: + ret.append(p) + return ret + + dom_parts = _parts(dom_ops) + url_parts = _parts(url_ops) + if not append_unknown: + body = [] + else: + body = [_IF_KWARGS_URL_ENCODE_AST] + url_parts.extend(_URL_ENCODE_AST_NAMES) + + def _join(parts): + if len(parts) == 1: # shortcut + return parts[0] + elif hasattr(ast, "JoinedStr"): # py36+ + return ast.JoinedStr(parts) + else: + call = _prefix_names('"".join()') + call.args = [ast.Tuple(parts, ast.Load())] + return call + + body.append( + ast.Return(ast.Tuple([_join(dom_parts), _join(url_parts)], ast.Load())) + ) + + # str is necessary for python2 + pargs = [ + str(elem) + for is_dynamic, elem in dom_ops + url_ops + if is_dynamic and elem not in defaults + ] + kargs = [str(k) for k in defaults] + + func_ast = _prefix_names("def _(): pass") + func_ast.name = "<builder:{!r}>".format(self.rule) + if hasattr(ast, "arg"): # py3 + func_ast.args.args.append(ast.arg(".self", None)) + for arg in pargs + kargs: + func_ast.args.args.append(ast.arg(arg, None)) + func_ast.args.kwarg = ast.arg(".kwargs", None) + else: + func_ast.args.args.append(ast.Name(".self", ast.Load())) + for arg in pargs + kargs: + func_ast.args.args.append(ast.Name(arg, ast.Load())) + func_ast.args.kwarg = ".kwargs" + for _ in kargs: + func_ast.args.defaults.append(ast.Str("")) + func_ast.body = body + + module = ast.fix_missing_locations(ast.Module([func_ast])) + code = compile(module, "<werkzeug routing>", "exec") + return self._get_func_code(code, func_ast.name) + + def build(self, values, append_unknown=True): + """Assembles the relative url for that rule and the subdomain. + If building doesn't work for some reasons `None` is returned. + + :internal: + """ + try: + if append_unknown: + return self._build_unknown(**values) + else: + return self._build(**values) + except ValidationError: + return None + + def provides_defaults_for(self, rule): + """Check if this rule has defaults for a given rule. + + :internal: + """ + return ( + not self.build_only + and self.defaults + and self.endpoint == rule.endpoint + and self != rule + and self.arguments == rule.arguments + ) + + def suitable_for(self, values, method=None): + """Check if the dict of values has enough data for url generation. + + :internal: + """ + # if a method was given explicitly and that method is not supported + # by this rule, this rule is not suitable. + if ( + method is not None + and self.methods is not None + and method not in self.methods + ): + return False + + defaults = self.defaults or () + + # all arguments required must be either in the defaults dict or + # the value dictionary otherwise it's not suitable + for key in self.arguments: + if key not in defaults and key not in values: + return False + + # in case defaults are given we ensure that either the value was + # skipped or the value is the same as the default value. + if defaults: + for key, value in iteritems(defaults): + if key in values and value != values[key]: + return False + + return True + + def match_compare_key(self): + """The match compare key for sorting. + + Current implementation: + + 1. rules without any arguments come first for performance + reasons only as we expect them to match faster and some + common ones usually don't have any arguments (index pages etc.) + 2. rules with more static parts come first so the second argument + is the negative length of the number of the static weights. + 3. we order by static weights, which is a combination of index + and length + 4. The more complex rules come first so the next argument is the + negative length of the number of argument weights. + 5. lastly we order by the actual argument weights. + + :internal: + """ + return ( + bool(self.arguments), + -len(self._static_weights), + self._static_weights, + -len(self._argument_weights), + self._argument_weights, + ) + + def build_compare_key(self): + """The build compare key for sorting. + + :internal: + """ + return 1 if self.alias else 0, -len(self.arguments), -len(self.defaults or ()) + + def __eq__(self, other): + return self.__class__ is other.__class__ and self._trace == other._trace + + __hash__ = None + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return self.rule + + @native_string_result + def __repr__(self): + if self.map is None: + return u"<%s (unbound)>" % self.__class__.__name__ + tmp = [] + for is_dynamic, data in self._trace: + if is_dynamic: + tmp.append(u"<%s>" % data) + else: + tmp.append(data) + return u"<%s %s%s -> %s>" % ( + self.__class__.__name__, + repr((u"".join(tmp)).lstrip(u"|")).lstrip(u"u"), + self.methods is not None and u" (%s)" % u", ".join(self.methods) or u"", + self.endpoint, + ) + + +class BaseConverter(object): + """Base class for all converters.""" + + regex = "[^/]+" + weight = 100 + + def __init__(self, map): + self.map = map + + def to_python(self, value): + return value + + def to_url(self, value): + if isinstance(value, (bytes, bytearray)): + return _fast_url_quote(value) + return _fast_url_quote(text_type(value).encode(self.map.charset)) + + +class UnicodeConverter(BaseConverter): + """This converter is the default converter and accepts any string but + only one path segment. Thus the string can not include a slash. + + This is the default validator. + + Example:: + + Rule('/pages/<page>'), + Rule('/<string(length=2):lang_code>') + + :param map: the :class:`Map`. + :param minlength: the minimum length of the string. Must be greater + or equal 1. + :param maxlength: the maximum length of the string. + :param length: the exact length of the string. + """ + + def __init__(self, map, minlength=1, maxlength=None, length=None): + BaseConverter.__init__(self, map) + if length is not None: + length = "{%d}" % int(length) + else: + if maxlength is None: + maxlength = "" + else: + maxlength = int(maxlength) + length = "{%s,%s}" % (int(minlength), maxlength) + self.regex = "[^/]" + length + + +class AnyConverter(BaseConverter): + """Matches one of the items provided. Items can either be Python + identifiers or strings:: + + Rule('/<any(about, help, imprint, class, "foo,bar"):page_name>') + + :param map: the :class:`Map`. + :param items: this function accepts the possible items as positional + arguments. + """ + + def __init__(self, map, *items): + BaseConverter.__init__(self, map) + self.regex = "(?:%s)" % "|".join([re.escape(x) for x in items]) + + +class PathConverter(BaseConverter): + """Like the default :class:`UnicodeConverter`, but it also matches + slashes. This is useful for wikis and similar applications:: + + Rule('/<path:wikipage>') + Rule('/<path:wikipage>/edit') + + :param map: the :class:`Map`. + """ + + regex = "[^/].*?" + weight = 200 + + +class NumberConverter(BaseConverter): + """Baseclass for `IntegerConverter` and `FloatConverter`. + + :internal: + """ + + weight = 50 + + def __init__(self, map, fixed_digits=0, min=None, max=None, signed=False): + if signed: + self.regex = self.signed_regex + BaseConverter.__init__(self, map) + self.fixed_digits = fixed_digits + self.min = min + self.max = max + self.signed = signed + + def to_python(self, value): + if self.fixed_digits and len(value) != self.fixed_digits: + raise ValidationError() + value = self.num_convert(value) + if (self.min is not None and value < self.min) or ( + self.max is not None and value > self.max + ): + raise ValidationError() + return value + + def to_url(self, value): + value = self.num_convert(value) + if self.fixed_digits: + value = ("%%0%sd" % self.fixed_digits) % value + return str(value) + + @property + def signed_regex(self): + return r"-?" + self.regex + + +class IntegerConverter(NumberConverter): + """This converter only accepts integer values:: + + Rule("/page/<int:page>") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/page/<int(signed=True):page>") + + :param map: The :class:`Map`. + :param fixed_digits: The number of fixed digits in the URL. If you + set this to ``4`` for example, the rule will only match if the + URL looks like ``/0001/``. The default is variable length. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+" + num_convert = int + + +class FloatConverter(NumberConverter): + """This converter only accepts floating point values:: + + Rule("/probability/<float:probability>") + + By default it only accepts unsigned, positive values. The ``signed`` + parameter will enable signed, negative values. :: + + Rule("/offset/<float(signed=True):offset>") + + :param map: The :class:`Map`. + :param min: The minimal value. + :param max: The maximal value. + :param signed: Allow signed (negative) values. + + .. versionadded:: 0.15 + The ``signed`` parameter. + """ + + regex = r"\d+\.\d+" + num_convert = float + + def __init__(self, map, min=None, max=None, signed=False): + NumberConverter.__init__(self, map, min=min, max=max, signed=signed) + + +class UUIDConverter(BaseConverter): + """This converter only accepts UUID strings:: + + Rule('/object/<uuid:identifier>') + + .. versionadded:: 0.10 + + :param map: the :class:`Map`. + """ + + regex = ( + r"[A-Fa-f0-9]{8}-[A-Fa-f0-9]{4}-" + r"[A-Fa-f0-9]{4}-[A-Fa-f0-9]{4}-[A-Fa-f0-9]{12}" + ) + + def to_python(self, value): + return uuid.UUID(value) + + def to_url(self, value): + return str(value) + + +#: the default converter mapping for the map. +DEFAULT_CONVERTERS = { + "default": UnicodeConverter, + "string": UnicodeConverter, + "any": AnyConverter, + "path": PathConverter, + "int": IntegerConverter, + "float": FloatConverter, + "uuid": UUIDConverter, +} + + +class Map(object): + """The map class stores all the URL rules and some configuration + parameters. Some of the configuration values are only stored on the + `Map` instance since those affect all rules, others are just defaults + and can be overridden for each rule. Note that you have to specify all + arguments besides the `rules` as keyword arguments! + + :param rules: sequence of url rules for this map. + :param default_subdomain: The default subdomain for rules without a + subdomain defined. + :param charset: charset of the url. defaults to ``"utf-8"`` + :param strict_slashes: Take care of trailing slashes. + :param redirect_defaults: This will redirect to the default rule if it + wasn't visited that way. This helps creating + unique URLs. + :param converters: A dict of converters that adds additional converters + to the list of converters. If you redefine one + converter this will override the original one. + :param sort_parameters: If set to `True` the url parameters are sorted. + See `url_encode` for more details. + :param sort_key: The sort key function for `url_encode`. + :param encoding_errors: the error method to use for decoding + :param host_matching: if set to `True` it enables the host matching + feature and disables the subdomain one. If + enabled the `host` parameter to rules is used + instead of the `subdomain` one. + + .. versionadded:: 0.5 + `sort_parameters` and `sort_key` was added. + + .. versionadded:: 0.7 + `encoding_errors` and `host_matching` was added. + """ + + #: A dict of default converters to be used. + default_converters = ImmutableDict(DEFAULT_CONVERTERS) + + def __init__( + self, + rules=None, + default_subdomain="", + charset="utf-8", + strict_slashes=True, + redirect_defaults=True, + converters=None, + sort_parameters=False, + sort_key=None, + encoding_errors="replace", + host_matching=False, + ): + self._rules = [] + self._rules_by_endpoint = {} + self._remap = True + self._remap_lock = Lock() + + self.default_subdomain = default_subdomain + self.charset = charset + self.encoding_errors = encoding_errors + self.strict_slashes = strict_slashes + self.redirect_defaults = redirect_defaults + self.host_matching = host_matching + + self.converters = self.default_converters.copy() + if converters: + self.converters.update(converters) + + self.sort_parameters = sort_parameters + self.sort_key = sort_key + + for rulefactory in rules or (): + self.add(rulefactory) + + def is_endpoint_expecting(self, endpoint, *arguments): + """Iterate over all rules and check if the endpoint expects + the arguments provided. This is for example useful if you have + some URLs that expect a language code and others that do not and + you want to wrap the builder a bit so that the current language + code is automatically added if not provided but endpoints expect + it. + + :param endpoint: the endpoint to check. + :param arguments: this function accepts one or more arguments + as positional arguments. Each one of them is + checked. + """ + self.update() + arguments = set(arguments) + for rule in self._rules_by_endpoint[endpoint]: + if arguments.issubset(rule.arguments): + return True + return False + + def iter_rules(self, endpoint=None): + """Iterate over all rules or the rules of an endpoint. + + :param endpoint: if provided only the rules for that endpoint + are returned. + :return: an iterator + """ + self.update() + if endpoint is not None: + return iter(self._rules_by_endpoint[endpoint]) + return iter(self._rules) + + def add(self, rulefactory): + """Add a new rule or factory to the map and bind it. Requires that the + rule is not bound to another map. + + :param rulefactory: a :class:`Rule` or :class:`RuleFactory` + """ + for rule in rulefactory.get_rules(self): + rule.bind(self) + self._rules.append(rule) + self._rules_by_endpoint.setdefault(rule.endpoint, []).append(rule) + self._remap = True + + def bind( + self, + server_name, + script_name=None, + subdomain=None, + url_scheme="http", + default_method="GET", + path_info=None, + query_args=None, + ): + """Return a new :class:`MapAdapter` with the details specified to the + call. Note that `script_name` will default to ``'/'`` if not further + specified or `None`. The `server_name` at least is a requirement + because the HTTP RFC requires absolute URLs for redirects and so all + redirect exceptions raised by Werkzeug will contain the full canonical + URL. + + If no path_info is passed to :meth:`match` it will use the default path + info passed to bind. While this doesn't really make sense for + manual bind calls, it's useful if you bind a map to a WSGI + environment which already contains the path info. + + `subdomain` will default to the `default_subdomain` for this map if + no defined. If there is no `default_subdomain` you cannot use the + subdomain feature. + + .. versionadded:: 0.7 + `query_args` added + + .. versionadded:: 0.8 + `query_args` can now also be a string. + + .. versionchanged:: 0.15 + ``path_info`` defaults to ``'/'`` if ``None``. + """ + server_name = server_name.lower() + if self.host_matching: + if subdomain is not None: + raise RuntimeError("host matching enabled and a subdomain was provided") + elif subdomain is None: + subdomain = self.default_subdomain + if script_name is None: + script_name = "/" + if path_info is None: + path_info = "/" + try: + server_name = _encode_idna(server_name) + except UnicodeError: + raise BadHost() + return MapAdapter( + self, + server_name, + script_name, + subdomain, + url_scheme, + path_info, + default_method, + query_args, + ) + + def bind_to_environ(self, environ, server_name=None, subdomain=None): + """Like :meth:`bind` but you can pass it an WSGI environment and it + will fetch the information from that dictionary. Note that because of + limitations in the protocol there is no way to get the current + subdomain and real `server_name` from the environment. If you don't + provide it, Werkzeug will use `SERVER_NAME` and `SERVER_PORT` (or + `HTTP_HOST` if provided) as used `server_name` with disabled subdomain + feature. + + If `subdomain` is `None` but an environment and a server name is + provided it will calculate the current subdomain automatically. + Example: `server_name` is ``'example.com'`` and the `SERVER_NAME` + in the wsgi `environ` is ``'staging.dev.example.com'`` the calculated + subdomain will be ``'staging.dev'``. + + If the object passed as environ has an environ attribute, the value of + this attribute is used instead. This allows you to pass request + objects. Additionally `PATH_INFO` added as a default of the + :class:`MapAdapter` so that you don't have to pass the path info to + the match method. + + .. versionchanged:: 0.5 + previously this method accepted a bogus `calculate_subdomain` + parameter that did not have any effect. It was removed because + of that. + + .. versionchanged:: 0.8 + This will no longer raise a ValueError when an unexpected server + name was passed. + + :param environ: a WSGI environment. + :param server_name: an optional server name hint (see above). + :param subdomain: optionally the current subdomain (see above). + """ + environ = _get_environ(environ) + + wsgi_server_name = get_host(environ).lower() + + if server_name is None: + server_name = wsgi_server_name + else: + server_name = server_name.lower() + + if subdomain is None and not self.host_matching: + cur_server_name = wsgi_server_name.split(".") + real_server_name = server_name.split(".") + offset = -len(real_server_name) + if cur_server_name[offset:] != real_server_name: + # This can happen even with valid configs if the server was + # accesssed directly by IP address under some situations. + # Instead of raising an exception like in Werkzeug 0.7 or + # earlier we go by an invalid subdomain which will result + # in a 404 error on matching. + subdomain = "<invalid>" + else: + subdomain = ".".join(filter(None, cur_server_name[:offset])) + + def _get_wsgi_string(name): + val = environ.get(name) + if val is not None: + return wsgi_decoding_dance(val, self.charset) + + script_name = _get_wsgi_string("SCRIPT_NAME") + path_info = _get_wsgi_string("PATH_INFO") + query_args = _get_wsgi_string("QUERY_STRING") + return Map.bind( + self, + server_name, + script_name, + subdomain, + environ["wsgi.url_scheme"], + environ["REQUEST_METHOD"], + path_info, + query_args=query_args, + ) + + def update(self): + """Called before matching and building to keep the compiled rules + in the correct order after things changed. + """ + if not self._remap: + return + + with self._remap_lock: + if not self._remap: + return + + self._rules.sort(key=lambda x: x.match_compare_key()) + for rules in itervalues(self._rules_by_endpoint): + rules.sort(key=lambda x: x.build_compare_key()) + self._remap = False + + def __repr__(self): + rules = self.iter_rules() + return "%s(%s)" % (self.__class__.__name__, pformat(list(rules))) + + +class MapAdapter(object): + + """Returned by :meth:`Map.bind` or :meth:`Map.bind_to_environ` and does + the URL matching and building based on runtime information. + """ + + def __init__( + self, + map, + server_name, + script_name, + subdomain, + url_scheme, + path_info, + default_method, + query_args=None, + ): + self.map = map + self.server_name = to_unicode(server_name) + script_name = to_unicode(script_name) + if not script_name.endswith(u"/"): + script_name += u"/" + self.script_name = script_name + self.subdomain = to_unicode(subdomain) + self.url_scheme = to_unicode(url_scheme) + self.path_info = to_unicode(path_info) + self.default_method = to_unicode(default_method) + self.query_args = query_args + + def dispatch( + self, view_func, path_info=None, method=None, catch_http_exceptions=False + ): + """Does the complete dispatching process. `view_func` is called with + the endpoint and a dict with the values for the view. It should + look up the view function, call it, and return a response object + or WSGI application. http exceptions are not caught by default + so that applications can display nicer error messages by just + catching them by hand. If you want to stick with the default + error messages you can pass it ``catch_http_exceptions=True`` and + it will catch the http exceptions. + + Here a small example for the dispatch usage:: + + from werkzeug.wrappers import Request, Response + from werkzeug.wsgi import responder + from werkzeug.routing import Map, Rule + + def on_index(request): + return Response('Hello from the index') + + url_map = Map([Rule('/', endpoint='index')]) + views = {'index': on_index} + + @responder + def application(environ, start_response): + request = Request(environ) + urls = url_map.bind_to_environ(environ) + return urls.dispatch(lambda e, v: views[e](request, **v), + catch_http_exceptions=True) + + Keep in mind that this method might return exception objects, too, so + use :class:`Response.force_type` to get a response object. + + :param view_func: a function that is called with the endpoint as + first argument and the value dict as second. Has + to dispatch to the actual view function with this + information. (see above) + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param catch_http_exceptions: set to `True` to catch any of the + werkzeug :class:`HTTPException`\\s. + """ + try: + try: + endpoint, args = self.match(path_info, method) + except RequestRedirect as e: + return e + return view_func(endpoint, args) + except HTTPException as e: + if catch_http_exceptions: + return e + raise + + def match(self, path_info=None, method=None, return_rule=False, query_args=None): + """The usage is simple: you just pass the match method the current + path info as well as the method (which defaults to `GET`). The + following things can then happen: + + - you receive a `NotFound` exception that indicates that no URL is + matching. A `NotFound` exception is also a WSGI application you + can call to get a default page not found page (happens to be the + same object as `werkzeug.exceptions.NotFound`) + + - you receive a `MethodNotAllowed` exception that indicates that there + is a match for this URL but not for the current request method. + This is useful for RESTful applications. + + - you receive a `RequestRedirect` exception with a `new_url` + attribute. This exception is used to notify you about a request + Werkzeug requests from your WSGI application. This is for example the + case if you request ``/foo`` although the correct URL is ``/foo/`` + You can use the `RequestRedirect` instance as response-like object + similar to all other subclasses of `HTTPException`. + + - you get a tuple in the form ``(endpoint, arguments)`` if there is + a match (unless `return_rule` is True, in which case you get a tuple + in the form ``(rule, arguments)``) + + If the path info is not passed to the match method the default path + info of the map is used (defaults to the root URL if not defined + explicitly). + + All of the exceptions raised are subclasses of `HTTPException` so they + can be used as WSGI responses. They will all render generic error or + redirect pages. + + Here is a small example for matching: + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/<int:id>', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.match("/", "GET") + ('index', {}) + >>> urls.match("/downloads/42") + ('downloads/show', {'id': 42}) + + And here is what happens on redirect and missing URLs: + + >>> urls.match("/downloads") + Traceback (most recent call last): + ... + RequestRedirect: http://example.com/downloads/ + >>> urls.match("/missing") + Traceback (most recent call last): + ... + NotFound: 404 Not Found + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + :param return_rule: return the rule that matched instead of just the + endpoint (defaults to `False`). + :param query_args: optional query arguments that are used for + automatic redirects as string or dictionary. It's + currently not possible to use the query arguments + for URL matching. + + .. versionadded:: 0.6 + `return_rule` was added. + + .. versionadded:: 0.7 + `query_args` was added. + + .. versionchanged:: 0.8 + `query_args` can now also be a string. + """ + self.map.update() + if path_info is None: + path_info = self.path_info + else: + path_info = to_unicode(path_info, self.map.charset) + if query_args is None: + query_args = self.query_args + method = (method or self.default_method).upper() + + path = u"%s|%s" % ( + self.map.host_matching and self.server_name or self.subdomain, + path_info and "/%s" % path_info.lstrip("/"), + ) + + have_match_for = set() + for rule in self.map._rules: + try: + rv = rule.match(path, method) + except RequestSlash: + raise RequestRedirect( + self.make_redirect_url( + url_quote(path_info, self.map.charset, safe="/:|+") + "/", + query_args, + ) + ) + except RequestAliasRedirect as e: + raise RequestRedirect( + self.make_alias_redirect_url( + path, rule.endpoint, e.matched_values, method, query_args + ) + ) + if rv is None: + continue + if rule.methods is not None and method not in rule.methods: + have_match_for.update(rule.methods) + continue + + if self.map.redirect_defaults: + redirect_url = self.get_default_redirect(rule, method, rv, query_args) + if redirect_url is not None: + raise RequestRedirect(redirect_url) + + if rule.redirect_to is not None: + if isinstance(rule.redirect_to, string_types): + + def _handle_match(match): + value = rv[match.group(1)] + return rule._converters[match.group(1)].to_url(value) + + redirect_url = _simple_rule_re.sub(_handle_match, rule.redirect_to) + else: + redirect_url = rule.redirect_to(self, **rv) + raise RequestRedirect( + str( + url_join( + "%s://%s%s%s" + % ( + self.url_scheme or "http", + self.subdomain + "." if self.subdomain else "", + self.server_name, + self.script_name, + ), + redirect_url, + ) + ) + ) + + if return_rule: + return rule, rv + else: + return rule.endpoint, rv + + if have_match_for: + raise MethodNotAllowed(valid_methods=list(have_match_for)) + raise NotFound() + + def test(self, path_info=None, method=None): + """Test if a rule would match. Works like `match` but returns `True` + if the URL matches, or `False` if it does not exist. + + :param path_info: the path info to use for matching. Overrides the + path info specified on binding. + :param method: the HTTP method used for matching. Overrides the + method specified on binding. + """ + try: + self.match(path_info, method) + except RequestRedirect: + pass + except HTTPException: + return False + return True + + def allowed_methods(self, path_info=None): + """Returns the valid methods that match for a given path. + + .. versionadded:: 0.7 + """ + try: + self.match(path_info, method="--") + except MethodNotAllowed as e: + return e.valid_methods + except HTTPException: + pass + return [] + + def get_host(self, domain_part): + """Figures out the full host name for the given domain part. The + domain part is a subdomain in case host matching is disabled or + a full host name. + """ + if self.map.host_matching: + if domain_part is None: + return self.server_name + return to_unicode(domain_part, "ascii") + subdomain = domain_part + if subdomain is None: + subdomain = self.subdomain + else: + subdomain = to_unicode(subdomain, "ascii") + return (subdomain + u"." if subdomain else u"") + self.server_name + + def get_default_redirect(self, rule, method, values, query_args): + """A helper that returns the URL to redirect to if it finds one. + This is used for default redirecting only. + + :internal: + """ + assert self.map.redirect_defaults + for r in self.map._rules_by_endpoint[rule.endpoint]: + # every rule that comes after this one, including ourself + # has a lower priority for the defaults. We order the ones + # with the highest priority up for building. + if r is rule: + break + if r.provides_defaults_for(rule) and r.suitable_for(values, method): + values.update(r.defaults) + domain_part, path = r.build(values) + return self.make_redirect_url(path, query_args, domain_part=domain_part) + + def encode_query_args(self, query_args): + if not isinstance(query_args, string_types): + query_args = url_encode(query_args, self.map.charset) + return query_args + + def make_redirect_url(self, path_info, query_args=None, domain_part=None): + """Creates a redirect URL. + + :internal: + """ + suffix = "" + if query_args: + suffix = "?" + self.encode_query_args(query_args) + return str( + "%s://%s/%s%s" + % ( + self.url_scheme or "http", + self.get_host(domain_part), + posixpath.join( + self.script_name[:-1].lstrip("/"), path_info.lstrip("/") + ), + suffix, + ) + ) + + def make_alias_redirect_url(self, path, endpoint, values, method, query_args): + """Internally called to make an alias redirect URL.""" + url = self.build( + endpoint, values, method, append_unknown=False, force_external=True + ) + if query_args: + url += "?" + self.encode_query_args(query_args) + assert url != path, "detected invalid alias setting. No canonical URL found" + return url + + def _partial_build(self, endpoint, values, method, append_unknown): + """Helper for :meth:`build`. Returns subdomain and path for the + rule that accepts this endpoint, values and method. + + :internal: + """ + # in case the method is none, try with the default method first + if method is None: + rv = self._partial_build( + endpoint, values, self.default_method, append_unknown + ) + if rv is not None: + return rv + + # default method did not match or a specific method is passed, + # check all and go with first result. + for rule in self.map._rules_by_endpoint.get(endpoint, ()): + if rule.suitable_for(values, method): + rv = rule.build(values, append_unknown) + if rv is not None: + return rv + + def build( + self, + endpoint, + values=None, + method=None, + force_external=False, + append_unknown=True, + ): + """Building URLs works pretty much the other way round. Instead of + `match` you call `build` and pass it the endpoint and a dict of + arguments for the placeholders. + + The `build` function also accepts an argument called `force_external` + which, if you set it to `True` will force external URLs. Per default + external URLs (include the server name) will only be used if the + target URL is on a different subdomain. + + >>> m = Map([ + ... Rule('/', endpoint='index'), + ... Rule('/downloads/', endpoint='downloads/index'), + ... Rule('/downloads/<int:id>', endpoint='downloads/show') + ... ]) + >>> urls = m.bind("example.com", "/") + >>> urls.build("index", {}) + '/' + >>> urls.build("downloads/show", {'id': 42}) + '/downloads/42' + >>> urls.build("downloads/show", {'id': 42}, force_external=True) + 'http://example.com/downloads/42' + + Because URLs cannot contain non ASCII data you will always get + bytestrings back. Non ASCII characters are urlencoded with the + charset defined on the map instance. + + Additional values are converted to unicode and appended to the URL as + URL querystring parameters: + + >>> urls.build("index", {'q': 'My Searchstring'}) + '/?q=My+Searchstring' + + When processing those additional values, lists are furthermore + interpreted as multiple values (as per + :py:class:`werkzeug.datastructures.MultiDict`): + + >>> urls.build("index", {'q': ['a', 'b', 'c']}) + '/?q=a&q=b&q=c' + + Passing a ``MultiDict`` will also add multiple values: + + >>> urls.build("index", MultiDict((('p', 'z'), ('q', 'a'), ('q', 'b')))) + '/?p=z&q=a&q=b' + + If a rule does not exist when building a `BuildError` exception is + raised. + + The build method accepts an argument called `method` which allows you + to specify the method you want to have an URL built for if you have + different methods for the same endpoint specified. + + .. versionadded:: 0.6 + the `append_unknown` parameter was added. + + :param endpoint: the endpoint of the URL to build. + :param values: the values for the URL to build. Unhandled values are + appended to the URL as query parameters. + :param method: the HTTP method for the rule if there are different + URLs for different methods on the same endpoint. + :param force_external: enforce full canonical external URLs. If the URL + scheme is not provided, this will generate + a protocol-relative URL. + :param append_unknown: unknown parameters are appended to the generated + URL as query string argument. Disable this + if you want the builder to ignore those. + """ + self.map.update() + + if values: + if isinstance(values, MultiDict): + temp_values = {} + # iteritems(dict, values) is like `values.lists()` + # without the call or `list()` coercion overhead. + for key, value in iteritems(dict, values): + if not value: + continue + if len(value) == 1: # flatten single item lists + value = value[0] + if value is None: # drop None + continue + temp_values[key] = value + values = temp_values + else: + # drop None + values = dict(i for i in iteritems(values) if i[1] is not None) + else: + values = {} + + rv = self._partial_build(endpoint, values, method, append_unknown) + if rv is None: + raise BuildError(endpoint, values, method, self) + domain_part, path = rv + + host = self.get_host(domain_part) + + # shortcut this. + if not force_external and ( + (self.map.host_matching and host == self.server_name) + or (not self.map.host_matching and domain_part == self.subdomain) + ): + return "%s/%s" % (self.script_name.rstrip("/"), path.lstrip("/")) + return str( + "%s//%s%s/%s" + % ( + self.url_scheme + ":" if self.url_scheme else "", + host, + self.script_name[:-1], + path.lstrip("/"), + ) + ) diff --git a/python/werkzeug/security.py b/python/werkzeug/security.py new file mode 100644 index 0000000..1842afd --- /dev/null +++ b/python/werkzeug/security.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.security + ~~~~~~~~~~~~~~~~~ + + Security related helpers such as secure password hashing tools. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import hashlib +import hmac +import os +import posixpath +from random import SystemRandom +from struct import Struct + +from ._compat import izip +from ._compat import PY2 +from ._compat import range_type +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_native + +SALT_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" +DEFAULT_PBKDF2_ITERATIONS = 150000 + +_pack_int = Struct(">I").pack +_builtin_safe_str_cmp = getattr(hmac, "compare_digest", None) +_sys_rng = SystemRandom() +_os_alt_seps = list( + sep for sep in [os.path.sep, os.path.altsep] if sep not in (None, "/") +) + + +def pbkdf2_hex( + data, salt, iterations=DEFAULT_PBKDF2_ITERATIONS, keylen=None, hashfunc=None +): + """Like :func:`pbkdf2_bin`, but returns a hex-encoded string. + + .. versionadded:: 0.9 + + :param data: the data to derive. + :param salt: the salt for the derivation. + :param iterations: the number of iterations. + :param keylen: the length of the resulting key. If not provided, + the digest size will be used. + :param hashfunc: the hash function to use. This can either be the + string name of a known hash function, or a function + from the hashlib module. Defaults to sha256. + """ + rv = pbkdf2_bin(data, salt, iterations, keylen, hashfunc) + return to_native(codecs.encode(rv, "hex_codec")) + + +def pbkdf2_bin( + data, salt, iterations=DEFAULT_PBKDF2_ITERATIONS, keylen=None, hashfunc=None +): + """Returns a binary digest for the PBKDF2 hash algorithm of `data` + with the given `salt`. It iterates `iterations` times and produces a + key of `keylen` bytes. By default, SHA-256 is used as hash function; + a different hashlib `hashfunc` can be provided. + + .. versionadded:: 0.9 + + :param data: the data to derive. + :param salt: the salt for the derivation. + :param iterations: the number of iterations. + :param keylen: the length of the resulting key. If not provided + the digest size will be used. + :param hashfunc: the hash function to use. This can either be the + string name of a known hash function or a function + from the hashlib module. Defaults to sha256. + """ + if not hashfunc: + hashfunc = "sha256" + + data = to_bytes(data) + salt = to_bytes(salt) + + if callable(hashfunc): + _test_hash = hashfunc() + hash_name = getattr(_test_hash, "name", None) + else: + hash_name = hashfunc + return hashlib.pbkdf2_hmac(hash_name, data, salt, iterations, keylen) + + +def safe_str_cmp(a, b): + """This function compares strings in somewhat constant time. This + requires that the length of at least one string is known in advance. + + Returns `True` if the two strings are equal, or `False` if they are not. + + .. versionadded:: 0.7 + """ + if isinstance(a, text_type): + a = a.encode("utf-8") + if isinstance(b, text_type): + b = b.encode("utf-8") + + if _builtin_safe_str_cmp is not None: + return _builtin_safe_str_cmp(a, b) + + if len(a) != len(b): + return False + + rv = 0 + if PY2: + for x, y in izip(a, b): + rv |= ord(x) ^ ord(y) + else: + for x, y in izip(a, b): + rv |= x ^ y + + return rv == 0 + + +def gen_salt(length): + """Generate a random string of SALT_CHARS with specified ``length``.""" + if length <= 0: + raise ValueError("Salt length must be positive") + return "".join(_sys_rng.choice(SALT_CHARS) for _ in range_type(length)) + + +def _hash_internal(method, salt, password): + """Internal password hash helper. Supports plaintext without salt, + unsalted and salted passwords. In case salted passwords are used + hmac is used. + """ + if method == "plain": + return password, method + + if isinstance(password, text_type): + password = password.encode("utf-8") + + if method.startswith("pbkdf2:"): + args = method[7:].split(":") + if len(args) not in (1, 2): + raise ValueError("Invalid number of arguments for PBKDF2") + method = args.pop(0) + iterations = args and int(args[0] or 0) or DEFAULT_PBKDF2_ITERATIONS + is_pbkdf2 = True + actual_method = "pbkdf2:%s:%d" % (method, iterations) + else: + is_pbkdf2 = False + actual_method = method + + if is_pbkdf2: + if not salt: + raise ValueError("Salt is required for PBKDF2") + rv = pbkdf2_hex(password, salt, iterations, hashfunc=method) + elif salt: + if isinstance(salt, text_type): + salt = salt.encode("utf-8") + mac = _create_mac(salt, password, method) + rv = mac.hexdigest() + else: + rv = hashlib.new(method, password).hexdigest() + return rv, actual_method + + +def _create_mac(key, msg, method): + if callable(method): + return hmac.HMAC(key, msg, method) + + def hashfunc(d=b""): + return hashlib.new(method, d) + + # Python 2.7 used ``hasattr(digestmod, '__call__')`` + # to detect if hashfunc is callable + hashfunc.__call__ = hashfunc + return hmac.HMAC(key, msg, hashfunc) + + +def generate_password_hash(password, method="pbkdf2:sha256", salt_length=8): + """Hash a password with the given method and salt with a string of + the given length. The format of the string returned includes the method + that was used so that :func:`check_password_hash` can check the hash. + + The format for the hashed string looks like this:: + + method$salt$hash + + This method can **not** generate unsalted passwords but it is possible + to set param method='plain' in order to enforce plaintext passwords. + If a salt is used, hmac is used internally to salt the password. + + If PBKDF2 is wanted it can be enabled by setting the method to + ``pbkdf2:method:iterations`` where iterations is optional:: + + pbkdf2:sha256:80000$salt$hash + pbkdf2:sha256$salt$hash + + :param password: the password to hash. + :param method: the hash method to use (one that hashlib supports). Can + optionally be in the format ``pbkdf2:<method>[:iterations]`` + to enable PBKDF2. + :param salt_length: the length of the salt in letters. + """ + salt = gen_salt(salt_length) if method != "plain" else "" + h, actual_method = _hash_internal(method, salt, password) + return "%s$%s$%s" % (actual_method, salt, h) + + +def check_password_hash(pwhash, password): + """check a password against a given salted and hashed password value. + In order to support unsalted legacy passwords this method supports + plain text passwords, md5 and sha1 hashes (both salted and unsalted). + + Returns `True` if the password matched, `False` otherwise. + + :param pwhash: a hashed string like returned by + :func:`generate_password_hash`. + :param password: the plaintext password to compare against the hash. + """ + if pwhash.count("$") < 2: + return False + method, salt, hashval = pwhash.split("$", 2) + return safe_str_cmp(_hash_internal(method, salt, password)[0], hashval) + + +def safe_join(directory, *pathnames): + """Safely join `directory` and one or more untrusted `pathnames`. If this + cannot be done, this function returns ``None``. + + :param directory: the base directory. + :param pathnames: the untrusted pathnames relative to that directory. + """ + parts = [directory] + for filename in pathnames: + if filename != "": + filename = posixpath.normpath(filename) + for sep in _os_alt_seps: + if sep in filename: + return None + if os.path.isabs(filename) or filename == ".." or filename.startswith("../"): + return None + parts.append(filename) + return posixpath.join(*parts) diff --git a/python/werkzeug/serving.py b/python/werkzeug/serving.py new file mode 100644 index 0000000..ff9f880 --- /dev/null +++ b/python/werkzeug/serving.py @@ -0,0 +1,1074 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.serving + ~~~~~~~~~~~~~~~~ + + There are many ways to serve a WSGI application. While you're developing + it you usually don't want a full blown webserver like Apache but a simple + standalone one. From Python 2.5 onwards there is the `wsgiref`_ server in + the standard library. If you're using older versions of Python you can + download the package from the cheeseshop. + + However there are some caveats. Sourcecode won't reload itself when + changed and each time you kill the server using ``^C`` you get an + `KeyboardInterrupt` error. While the latter is easy to solve the first + one can be a pain in the ass in some situations. + + The easiest way is creating a small ``start-myproject.py`` that runs the + application:: + + #!/usr/bin/env python + # -*- coding: utf-8 -*- + from myproject import make_app + from werkzeug.serving import run_simple + + app = make_app(...) + run_simple('localhost', 8080, app, use_reloader=True) + + You can also pass it a `extra_files` keyword argument with a list of + additional files (like configuration files) you want to observe. + + For bigger applications you should consider using `click` + (http://click.pocoo.org) instead of a simple start file. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import io +import os +import signal +import socket +import sys + +import werkzeug +from ._compat import PY2 +from ._compat import reraise +from ._compat import WIN +from ._compat import wsgi_encoding_dance +from ._internal import _log +from .exceptions import InternalServerError +from .urls import uri_to_iri +from .urls import url_parse +from .urls import url_unquote + +try: + import socketserver + from http.server import BaseHTTPRequestHandler + from http.server import HTTPServer +except ImportError: + import SocketServer as socketserver + from BaseHTTPServer import HTTPServer + from BaseHTTPServer import BaseHTTPRequestHandler + +try: + import ssl +except ImportError: + + class _SslDummy(object): + def __getattr__(self, name): + raise RuntimeError("SSL support unavailable") + + ssl = _SslDummy() + +try: + import termcolor +except ImportError: + termcolor = None + + +def _get_openssl_crypto_module(): + try: + from OpenSSL import crypto + except ImportError: + raise TypeError("Using ad-hoc certificates requires the pyOpenSSL library.") + else: + return crypto + + +ThreadingMixIn = socketserver.ThreadingMixIn +can_fork = hasattr(os, "fork") + +if can_fork: + ForkingMixIn = socketserver.ForkingMixIn +else: + + class ForkingMixIn(object): + pass + + +try: + af_unix = socket.AF_UNIX +except AttributeError: + af_unix = None + + +LISTEN_QUEUE = 128 +can_open_by_fd = not WIN and hasattr(socket, "fromfd") + +# On Python 3, ConnectionError represents the same errnos as +# socket.error from Python 2, while socket.error is an alias for the +# more generic OSError. +if PY2: + _ConnectionError = socket.error +else: + _ConnectionError = ConnectionError + + +class DechunkedInput(io.RawIOBase): + """An input stream that handles Transfer-Encoding 'chunked'""" + + def __init__(self, rfile): + self._rfile = rfile + self._done = False + self._len = 0 + + def readable(self): + return True + + def read_chunk_len(self): + try: + line = self._rfile.readline().decode("latin1") + _len = int(line.strip(), 16) + except ValueError: + raise IOError("Invalid chunk header") + if _len < 0: + raise IOError("Negative chunk length not allowed") + return _len + + def readinto(self, buf): + read = 0 + while not self._done and read < len(buf): + if self._len == 0: + # This is the first chunk or we fully consumed the previous + # one. Read the next length of the next chunk + self._len = self.read_chunk_len() + + if self._len == 0: + # Found the final chunk of size 0. The stream is now exhausted, + # but there is still a final newline that should be consumed + self._done = True + + if self._len > 0: + # There is data (left) in this chunk, so append it to the + # buffer. If this operation fully consumes the chunk, this will + # reset self._len to 0. + n = min(len(buf), self._len) + buf[read : read + n] = self._rfile.read(n) + self._len -= n + read += n + + if self._len == 0: + # Skip the terminating newline of a chunk that has been fully + # consumed. This also applies to the 0-sized final chunk + terminator = self._rfile.readline() + if terminator not in (b"\n", b"\r\n", b"\r"): + raise IOError("Missing chunk terminating newline") + + return read + + +class WSGIRequestHandler(BaseHTTPRequestHandler, object): + + """A request handler that implements WSGI dispatching.""" + + @property + def server_version(self): + return "Werkzeug/" + werkzeug.__version__ + + def make_environ(self): + request_url = url_parse(self.path) + + def shutdown_server(): + self.server.shutdown_signal = True + + url_scheme = "http" if self.server.ssl_context is None else "https" + if not self.client_address: + self.client_address = "<local>" + if isinstance(self.client_address, str): + self.client_address = (self.client_address, 0) + else: + pass + path_info = url_unquote(request_url.path) + + environ = { + "wsgi.version": (1, 0), + "wsgi.url_scheme": url_scheme, + "wsgi.input": self.rfile, + "wsgi.errors": sys.stderr, + "wsgi.multithread": self.server.multithread, + "wsgi.multiprocess": self.server.multiprocess, + "wsgi.run_once": False, + "werkzeug.server.shutdown": shutdown_server, + "SERVER_SOFTWARE": self.server_version, + "REQUEST_METHOD": self.command, + "SCRIPT_NAME": "", + "PATH_INFO": wsgi_encoding_dance(path_info), + "QUERY_STRING": wsgi_encoding_dance(request_url.query), + # Non-standard, added by mod_wsgi, uWSGI + "REQUEST_URI": wsgi_encoding_dance(self.path), + # Non-standard, added by gunicorn + "RAW_URI": wsgi_encoding_dance(self.path), + "REMOTE_ADDR": self.address_string(), + "REMOTE_PORT": self.port_integer(), + "SERVER_NAME": self.server.server_address[0], + "SERVER_PORT": str(self.server.server_address[1]), + "SERVER_PROTOCOL": self.request_version, + } + + for key, value in self.get_header_items(): + key = key.upper().replace("-", "_") + value = value.replace("\r\n", "") + if key not in ("CONTENT_TYPE", "CONTENT_LENGTH"): + key = "HTTP_" + key + if key in environ: + value = "{},{}".format(environ[key], value) + environ[key] = value + + if environ.get("HTTP_TRANSFER_ENCODING", "").strip().lower() == "chunked": + environ["wsgi.input_terminated"] = True + environ["wsgi.input"] = DechunkedInput(environ["wsgi.input"]) + + if request_url.scheme and request_url.netloc: + environ["HTTP_HOST"] = request_url.netloc + + return environ + + def run_wsgi(self): + if self.headers.get("Expect", "").lower().strip() == "100-continue": + self.wfile.write(b"HTTP/1.1 100 Continue\r\n\r\n") + + self.environ = environ = self.make_environ() + headers_set = [] + headers_sent = [] + + def write(data): + assert headers_set, "write() before start_response" + if not headers_sent: + status, response_headers = headers_sent[:] = headers_set + try: + code, msg = status.split(None, 1) + except ValueError: + code, msg = status, "" + code = int(code) + self.send_response(code, msg) + header_keys = set() + for key, value in response_headers: + self.send_header(key, value) + key = key.lower() + header_keys.add(key) + if not ( + "content-length" in header_keys + or environ["REQUEST_METHOD"] == "HEAD" + or code < 200 + or code in (204, 304) + ): + self.close_connection = True + self.send_header("Connection", "close") + if "server" not in header_keys: + self.send_header("Server", self.version_string()) + if "date" not in header_keys: + self.send_header("Date", self.date_time_string()) + self.end_headers() + + assert isinstance(data, bytes), "applications must write bytes" + self.wfile.write(data) + self.wfile.flush() + + def start_response(status, response_headers, exc_info=None): + if exc_info: + try: + if headers_sent: + reraise(*exc_info) + finally: + exc_info = None + elif headers_set: + raise AssertionError("Headers already set") + headers_set[:] = [status, response_headers] + return write + + def execute(app): + application_iter = app(environ, start_response) + try: + for data in application_iter: + write(data) + if not headers_sent: + write(b"") + finally: + if hasattr(application_iter, "close"): + application_iter.close() + application_iter = None + + try: + execute(self.server.app) + except (_ConnectionError, socket.timeout) as e: + self.connection_dropped(e, environ) + except Exception: + if self.server.passthrough_errors: + raise + from .debug.tbtools import get_current_traceback + + traceback = get_current_traceback(ignore_system_exceptions=True) + try: + # if we haven't yet sent the headers but they are set + # we roll back to be able to set them again. + if not headers_sent: + del headers_set[:] + execute(InternalServerError()) + except Exception: + pass + self.server.log("error", "Error on request:\n%s", traceback.plaintext) + + def handle(self): + """Handles a request ignoring dropped connections.""" + rv = None + try: + rv = BaseHTTPRequestHandler.handle(self) + except (_ConnectionError, socket.timeout) as e: + self.connection_dropped(e) + except Exception as e: + if self.server.ssl_context is None or not is_ssl_error(e): + raise + if self.server.shutdown_signal: + self.initiate_shutdown() + return rv + + def initiate_shutdown(self): + """A horrible, horrible way to kill the server for Python 2.6 and + later. It's the best we can do. + """ + # Windows does not provide SIGKILL, go with SIGTERM then. + sig = getattr(signal, "SIGKILL", signal.SIGTERM) + # reloader active + if is_running_from_reloader(): + os.kill(os.getpid(), sig) + # python 2.7 + self.server._BaseServer__shutdown_request = True + # python 2.6 + self.server._BaseServer__serving = False + + def connection_dropped(self, error, environ=None): + """Called if the connection was closed by the client. By default + nothing happens. + """ + + def handle_one_request(self): + """Handle a single HTTP request.""" + self.raw_requestline = self.rfile.readline() + if not self.raw_requestline: + self.close_connection = 1 + elif self.parse_request(): + return self.run_wsgi() + + def send_response(self, code, message=None): + """Send the response header and log the response code.""" + self.log_request(code) + if message is None: + message = code in self.responses and self.responses[code][0] or "" + if self.request_version != "HTTP/0.9": + hdr = "%s %d %s\r\n" % (self.protocol_version, code, message) + self.wfile.write(hdr.encode("ascii")) + + def version_string(self): + return BaseHTTPRequestHandler.version_string(self).strip() + + def address_string(self): + if getattr(self, "environ", None): + return self.environ["REMOTE_ADDR"] + elif not self.client_address: + return "<local>" + elif isinstance(self.client_address, str): + return self.client_address + else: + return self.client_address[0] + + def port_integer(self): + return self.client_address[1] + + def log_request(self, code="-", size="-"): + try: + path = uri_to_iri(self.path) + msg = "%s %s %s" % (self.command, path, self.request_version) + except AttributeError: + # path isn't set if the requestline was bad + msg = self.requestline + + code = str(code) + + if termcolor: + color = termcolor.colored + + if code[0] == "1": # 1xx - Informational + msg = color(msg, attrs=["bold"]) + elif code[0] == "2": # 2xx - Success + msg = color(msg, color="white") + elif code == "304": # 304 - Resource Not Modified + msg = color(msg, color="cyan") + elif code[0] == "3": # 3xx - Redirection + msg = color(msg, color="green") + elif code == "404": # 404 - Resource Not Found + msg = color(msg, color="yellow") + elif code[0] == "4": # 4xx - Client Error + msg = color(msg, color="red", attrs=["bold"]) + else: # 5xx, or any other response + msg = color(msg, color="magenta", attrs=["bold"]) + + self.log("info", '"%s" %s %s', msg, code, size) + + def log_error(self, *args): + self.log("error", *args) + + def log_message(self, format, *args): + self.log("info", format, *args) + + def log(self, type, message, *args): + _log( + type, + "%s - - [%s] %s\n" + % (self.address_string(), self.log_date_time_string(), message % args), + ) + + def get_header_items(self): + """ + Get an iterable list of key/value pairs representing headers. + + This function provides Python 2/3 compatibility as related to the + parsing of request headers. Python 2.7 is not compliant with + RFC 3875 Section 4.1.18 which requires multiple values for headers + to be provided or RFC 2616 which allows for folding of multi-line + headers. This function will return a matching list regardless + of Python version. It can be removed once Python 2.7 support + is dropped. + + :return: List of tuples containing header hey/value pairs + """ + if PY2: + # For Python 2, process the headers manually according to + # W3C RFC 2616 Section 4.2. + items = [] + for header in self.headers.headers: + # Remove "\r\n" from the header and split on ":" to get + # the field name and value. + try: + key, value = header[0:-2].split(":", 1) + except ValueError: + # If header could not be slit with : but starts with white + # space and it follows an existing header, it's a folded + # header. + if header[0] in ("\t", " ") and items: + # Pop off the last header + key, value = items.pop() + # Append the current header to the value of the last + # header which will be placed back on the end of the + # list + value = value + header + # Otherwise it's just a bad header and should error + else: + # Re-raise the value error + raise + + # Add the key and the value once stripped of leading + # white space. The specification allows for stripping + # trailing white space but the Python 3 code does not + # strip trailing white space. Therefore, trailing space + # will be left as is to match the Python 3 behavior. + items.append((key, value.lstrip())) + else: + items = self.headers.items() + + return items + + +#: backwards compatible name if someone is subclassing it +BaseRequestHandler = WSGIRequestHandler + + +def generate_adhoc_ssl_pair(cn=None): + from random import random + + crypto = _get_openssl_crypto_module() + + # pretty damn sure that this is not actually accepted by anyone + if cn is None: + cn = "*" + + cert = crypto.X509() + cert.set_serial_number(int(random() * sys.maxsize)) + cert.gmtime_adj_notBefore(0) + cert.gmtime_adj_notAfter(60 * 60 * 24 * 365) + + subject = cert.get_subject() + subject.CN = cn + subject.O = "Dummy Certificate" # noqa: E741 + + issuer = cert.get_issuer() + issuer.CN = subject.CN + issuer.O = subject.O # noqa: E741 + + pkey = crypto.PKey() + pkey.generate_key(crypto.TYPE_RSA, 2048) + cert.set_pubkey(pkey) + cert.sign(pkey, "sha256") + + return cert, pkey + + +def make_ssl_devcert(base_path, host=None, cn=None): + """Creates an SSL key for development. This should be used instead of + the ``'adhoc'`` key which generates a new cert on each server start. + It accepts a path for where it should store the key and cert and + either a host or CN. If a host is given it will use the CN + ``*.host/CN=host``. + + For more information see :func:`run_simple`. + + .. versionadded:: 0.9 + + :param base_path: the path to the certificate and key. The extension + ``.crt`` is added for the certificate, ``.key`` is + added for the key. + :param host: the name of the host. This can be used as an alternative + for the `cn`. + :param cn: the `CN` to use. + """ + from OpenSSL import crypto + + if host is not None: + cn = "*.%s/CN=%s" % (host, host) + cert, pkey = generate_adhoc_ssl_pair(cn=cn) + + cert_file = base_path + ".crt" + pkey_file = base_path + ".key" + + with open(cert_file, "wb") as f: + f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) + with open(pkey_file, "wb") as f: + f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, pkey)) + + return cert_file, pkey_file + + +def generate_adhoc_ssl_context(): + """Generates an adhoc SSL context for the development server.""" + crypto = _get_openssl_crypto_module() + import tempfile + import atexit + + cert, pkey = generate_adhoc_ssl_pair() + cert_handle, cert_file = tempfile.mkstemp() + pkey_handle, pkey_file = tempfile.mkstemp() + atexit.register(os.remove, pkey_file) + atexit.register(os.remove, cert_file) + + os.write(cert_handle, crypto.dump_certificate(crypto.FILETYPE_PEM, cert)) + os.write(pkey_handle, crypto.dump_privatekey(crypto.FILETYPE_PEM, pkey)) + os.close(cert_handle) + os.close(pkey_handle) + ctx = load_ssl_context(cert_file, pkey_file) + return ctx + + +def load_ssl_context(cert_file, pkey_file=None, protocol=None): + """Loads SSL context from cert/private key files and optional protocol. + Many parameters are directly taken from the API of + :py:class:`ssl.SSLContext`. + + :param cert_file: Path of the certificate to use. + :param pkey_file: Path of the private key to use. If not given, the key + will be obtained from the certificate file. + :param protocol: One of the ``PROTOCOL_*`` constants in the stdlib ``ssl`` + module. Defaults to ``PROTOCOL_SSLv23``. + """ + if protocol is None: + protocol = ssl.PROTOCOL_SSLv23 + ctx = _SSLContext(protocol) + ctx.load_cert_chain(cert_file, pkey_file) + return ctx + + +class _SSLContext(object): + + """A dummy class with a small subset of Python3's ``ssl.SSLContext``, only + intended to be used with and by Werkzeug.""" + + def __init__(self, protocol): + self._protocol = protocol + self._certfile = None + self._keyfile = None + self._password = None + + def load_cert_chain(self, certfile, keyfile=None, password=None): + self._certfile = certfile + self._keyfile = keyfile or certfile + self._password = password + + def wrap_socket(self, sock, **kwargs): + return ssl.wrap_socket( + sock, + keyfile=self._keyfile, + certfile=self._certfile, + ssl_version=self._protocol, + **kwargs + ) + + +def is_ssl_error(error=None): + """Checks if the given error (or the current one) is an SSL error.""" + exc_types = (ssl.SSLError,) + try: + from OpenSSL.SSL import Error + + exc_types += (Error,) + except ImportError: + pass + + if error is None: + error = sys.exc_info()[1] + return isinstance(error, exc_types) + + +def select_address_family(host, port): + """Return ``AF_INET4``, ``AF_INET6``, or ``AF_UNIX`` depending on + the host and port.""" + # disabled due to problems with current ipv6 implementations + # and various operating systems. Probably this code also is + # not supposed to work, but I can't come up with any other + # ways to implement this. + # try: + # info = socket.getaddrinfo(host, port, socket.AF_UNSPEC, + # socket.SOCK_STREAM, 0, + # socket.AI_PASSIVE) + # if info: + # return info[0][0] + # except socket.gaierror: + # pass + if host.startswith("unix://"): + return socket.AF_UNIX + elif ":" in host and hasattr(socket, "AF_INET6"): + return socket.AF_INET6 + return socket.AF_INET + + +def get_sockaddr(host, port, family): + """Return a fully qualified socket address that can be passed to + :func:`socket.bind`.""" + if family == af_unix: + return host.split("://", 1)[1] + try: + res = socket.getaddrinfo( + host, port, family, socket.SOCK_STREAM, socket.IPPROTO_TCP + ) + except socket.gaierror: + return host, port + return res[0][4] + + +class BaseWSGIServer(HTTPServer, object): + + """Simple single-threaded, single-process WSGI server.""" + + multithread = False + multiprocess = False + request_queue_size = LISTEN_QUEUE + + def __init__( + self, + host, + port, + app, + handler=None, + passthrough_errors=False, + ssl_context=None, + fd=None, + ): + if handler is None: + handler = WSGIRequestHandler + + self.address_family = select_address_family(host, port) + + if fd is not None: + real_sock = socket.fromfd(fd, self.address_family, socket.SOCK_STREAM) + port = 0 + + server_address = get_sockaddr(host, int(port), self.address_family) + + # remove socket file if it already exists + if self.address_family == af_unix and os.path.exists(server_address): + os.unlink(server_address) + HTTPServer.__init__(self, server_address, handler) + + self.app = app + self.passthrough_errors = passthrough_errors + self.shutdown_signal = False + self.host = host + self.port = self.socket.getsockname()[1] + + # Patch in the original socket. + if fd is not None: + self.socket.close() + self.socket = real_sock + self.server_address = self.socket.getsockname() + + if ssl_context is not None: + if isinstance(ssl_context, tuple): + ssl_context = load_ssl_context(*ssl_context) + if ssl_context == "adhoc": + ssl_context = generate_adhoc_ssl_context() + # If we are on Python 2 the return value from socket.fromfd + # is an internal socket object but what we need for ssl wrap + # is the wrapper around it :( + sock = self.socket + if PY2 and not isinstance(sock, socket.socket): + sock = socket.socket(sock.family, sock.type, sock.proto, sock) + self.socket = ssl_context.wrap_socket(sock, server_side=True) + self.ssl_context = ssl_context + else: + self.ssl_context = None + + def log(self, type, message, *args): + _log(type, message, *args) + + def serve_forever(self): + self.shutdown_signal = False + try: + HTTPServer.serve_forever(self) + except KeyboardInterrupt: + pass + finally: + self.server_close() + + def handle_error(self, request, client_address): + if self.passthrough_errors: + raise + # Python 2 still causes a socket.error after the earlier + # handling, so silence it here. + if isinstance(sys.exc_info()[1], _ConnectionError): + return + return HTTPServer.handle_error(self, request, client_address) + + def get_request(self): + con, info = self.socket.accept() + return con, info + + +class ThreadedWSGIServer(ThreadingMixIn, BaseWSGIServer): + + """A WSGI server that does threading.""" + + multithread = True + daemon_threads = True + + +class ForkingWSGIServer(ForkingMixIn, BaseWSGIServer): + + """A WSGI server that does forking.""" + + multiprocess = True + + def __init__( + self, + host, + port, + app, + processes=40, + handler=None, + passthrough_errors=False, + ssl_context=None, + fd=None, + ): + if not can_fork: + raise ValueError("Your platform does not support forking.") + BaseWSGIServer.__init__( + self, host, port, app, handler, passthrough_errors, ssl_context, fd + ) + self.max_children = processes + + +def make_server( + host=None, + port=None, + app=None, + threaded=False, + processes=1, + request_handler=None, + passthrough_errors=False, + ssl_context=None, + fd=None, +): + """Create a new server instance that is either threaded, or forks + or just processes one request after another. + """ + if threaded and processes > 1: + raise ValueError("cannot have a multithreaded and multi process server.") + elif threaded: + return ThreadedWSGIServer( + host, port, app, request_handler, passthrough_errors, ssl_context, fd=fd + ) + elif processes > 1: + return ForkingWSGIServer( + host, + port, + app, + processes, + request_handler, + passthrough_errors, + ssl_context, + fd=fd, + ) + else: + return BaseWSGIServer( + host, port, app, request_handler, passthrough_errors, ssl_context, fd=fd + ) + + +def is_running_from_reloader(): + """Checks if the application is running from within the Werkzeug + reloader subprocess. + + .. versionadded:: 0.10 + """ + return os.environ.get("WERKZEUG_RUN_MAIN") == "true" + + +def run_simple( + hostname, + port, + application, + use_reloader=False, + use_debugger=False, + use_evalex=True, + extra_files=None, + reloader_interval=1, + reloader_type="auto", + threaded=False, + processes=1, + request_handler=None, + static_files=None, + passthrough_errors=False, + ssl_context=None, +): + """Start a WSGI application. Optional features include a reloader, + multithreading and fork support. + + This function has a command-line interface too:: + + python -m werkzeug.serving --help + + .. versionadded:: 0.5 + `static_files` was added to simplify serving of static files as well + as `passthrough_errors`. + + .. versionadded:: 0.6 + support for SSL was added. + + .. versionadded:: 0.8 + Added support for automatically loading a SSL context from certificate + file and private key. + + .. versionadded:: 0.9 + Added command-line interface. + + .. versionadded:: 0.10 + Improved the reloader and added support for changing the backend + through the `reloader_type` parameter. See :ref:`reloader` + for more information. + + .. versionchanged:: 0.15 + Bind to a Unix socket by passing a path that starts with + ``unix://`` as the ``hostname``. + + :param hostname: The host to bind to, for example ``'localhost'``. + If the value is a path that starts with ``unix://`` it will bind + to a Unix socket instead of a TCP socket.. + :param port: The port for the server. eg: ``8080`` + :param application: the WSGI application to execute + :param use_reloader: should the server automatically restart the python + process if modules were changed? + :param use_debugger: should the werkzeug debugging system be used? + :param use_evalex: should the exception evaluation feature be enabled? + :param extra_files: a list of files the reloader should watch + additionally to the modules. For example configuration + files. + :param reloader_interval: the interval for the reloader in seconds. + :param reloader_type: the type of reloader to use. The default is + auto detection. Valid values are ``'stat'`` and + ``'watchdog'``. See :ref:`reloader` for more + information. + :param threaded: should the process handle each request in a separate + thread? + :param processes: if greater than 1 then handle each request in a new process + up to this maximum number of concurrent processes. + :param request_handler: optional parameter that can be used to replace + the default one. You can use this to replace it + with a different + :class:`~BaseHTTPServer.BaseHTTPRequestHandler` + subclass. + :param static_files: a list or dict of paths for static files. This works + exactly like :class:`SharedDataMiddleware`, it's actually + just wrapping the application in that middleware before + serving. + :param passthrough_errors: set this to `True` to disable the error catching. + This means that the server will die on errors but + it can be useful to hook debuggers in (pdb etc.) + :param ssl_context: an SSL context for the connection. Either an + :class:`ssl.SSLContext`, a tuple in the form + ``(cert_file, pkey_file)``, the string ``'adhoc'`` if + the server should automatically create one, or ``None`` + to disable SSL (which is the default). + """ + if not isinstance(port, int): + raise TypeError("port must be an integer") + if use_debugger: + from .debug import DebuggedApplication + + application = DebuggedApplication(application, use_evalex) + if static_files: + from .middleware.shared_data import SharedDataMiddleware + + application = SharedDataMiddleware(application, static_files) + + def log_startup(sock): + display_hostname = hostname if hostname not in ("", "*") else "localhost" + quit_msg = "(Press CTRL+C to quit)" + if sock.family == af_unix: + _log("info", " * Running on %s %s", display_hostname, quit_msg) + else: + if ":" in display_hostname: + display_hostname = "[%s]" % display_hostname + port = sock.getsockname()[1] + _log( + "info", + " * Running on %s://%s:%d/ %s", + "http" if ssl_context is None else "https", + display_hostname, + port, + quit_msg, + ) + + def inner(): + try: + fd = int(os.environ["WERKZEUG_SERVER_FD"]) + except (LookupError, ValueError): + fd = None + srv = make_server( + hostname, + port, + application, + threaded, + processes, + request_handler, + passthrough_errors, + ssl_context, + fd=fd, + ) + if fd is None: + log_startup(srv.socket) + srv.serve_forever() + + if use_reloader: + # If we're not running already in the subprocess that is the + # reloader we want to open up a socket early to make sure the + # port is actually available. + if not is_running_from_reloader(): + if port == 0 and not can_open_by_fd: + raise ValueError( + "Cannot bind to a random port with enabled " + "reloader if the Python interpreter does " + "not support socket opening by fd." + ) + + # Create and destroy a socket so that any exceptions are + # raised before we spawn a separate Python interpreter and + # lose this ability. + address_family = select_address_family(hostname, port) + server_address = get_sockaddr(hostname, port, address_family) + s = socket.socket(address_family, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind(server_address) + if hasattr(s, "set_inheritable"): + s.set_inheritable(True) + + # If we can open the socket by file descriptor, then we can just + # reuse this one and our socket will survive the restarts. + if can_open_by_fd: + os.environ["WERKZEUG_SERVER_FD"] = str(s.fileno()) + s.listen(LISTEN_QUEUE) + log_startup(s) + else: + s.close() + if address_family == af_unix: + _log("info", "Unlinking %s" % server_address) + os.unlink(server_address) + + # Do not use relative imports, otherwise "python -m werkzeug.serving" + # breaks. + from ._reloader import run_with_reloader + + run_with_reloader(inner, extra_files, reloader_interval, reloader_type) + else: + inner() + + +def run_with_reloader(*args, **kwargs): + # People keep using undocumented APIs. Do not use this function + # please, we do not guarantee that it continues working. + from ._reloader import run_with_reloader + + return run_with_reloader(*args, **kwargs) + + +def main(): + """A simple command-line interface for :py:func:`run_simple`.""" + + # in contrast to argparse, this works at least under Python < 2.7 + import optparse + from .utils import import_string + + parser = optparse.OptionParser(usage="Usage: %prog [options] app_module:app_object") + parser.add_option( + "-b", + "--bind", + dest="address", + help="The hostname:port the app should listen on.", + ) + parser.add_option( + "-d", + "--debug", + dest="use_debugger", + action="store_true", + default=False, + help="Use Werkzeug's debugger.", + ) + parser.add_option( + "-r", + "--reload", + dest="use_reloader", + action="store_true", + default=False, + help="Reload Python process if modules change.", + ) + options, args = parser.parse_args() + + hostname, port = None, None + if options.address: + address = options.address.split(":") + hostname = address[0] + if len(address) > 1: + port = address[1] + + if len(args) != 1: + sys.stdout.write("No application supplied, or too much. See --help\n") + sys.exit(1) + app = import_string(args[0]) + + run_simple( + hostname=(hostname or "127.0.0.1"), + port=int(port or 5000), + application=app, + use_reloader=options.use_reloader, + use_debugger=options.use_debugger, + ) + + +if __name__ == "__main__": + main() diff --git a/python/werkzeug/test.py b/python/werkzeug/test.py new file mode 100644 index 0000000..6148665 --- /dev/null +++ b/python/werkzeug/test.py @@ -0,0 +1,1146 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.test + ~~~~~~~~~~~~~ + + This module implements a client to WSGI applications for testing. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import mimetypes +import sys +from io import BytesIO +from itertools import chain +from random import random +from tempfile import TemporaryFile +from time import time + +from ._compat import iteritems +from ._compat import iterlists +from ._compat import itervalues +from ._compat import make_literal_wrapper +from ._compat import reraise +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import wsgi_encoding_dance +from ._internal import _get_environ +from .datastructures import CallbackDict +from .datastructures import CombinedMultiDict +from .datastructures import EnvironHeaders +from .datastructures import FileMultiDict +from .datastructures import FileStorage +from .datastructures import Headers +from .datastructures import MultiDict +from .http import dump_cookie +from .http import dump_options_header +from .http import parse_options_header +from .urls import iri_to_uri +from .urls import url_encode +from .urls import url_fix +from .urls import url_parse +from .urls import url_unparse +from .urls import url_unquote +from .utils import get_content_type +from .wrappers import BaseRequest +from .wsgi import ClosingIterator +from .wsgi import get_current_url + +try: + from urllib.request import Request as U2Request +except ImportError: + from urllib2 import Request as U2Request + +try: + from http.cookiejar import CookieJar +except ImportError: + from cookielib import CookieJar + + +def stream_encode_multipart( + values, use_tempfile=True, threshold=1024 * 500, boundary=None, charset="utf-8" +): + """Encode a dict of values (either strings or file descriptors or + :class:`FileStorage` objects.) into a multipart encoded string stored + in a file descriptor. + """ + if boundary is None: + boundary = "---------------WerkzeugFormPart_%s%s" % (time(), random()) + _closure = [BytesIO(), 0, False] + + if use_tempfile: + + def write_binary(string): + stream, total_length, on_disk = _closure + if on_disk: + stream.write(string) + else: + length = len(string) + if length + _closure[1] <= threshold: + stream.write(string) + else: + new_stream = TemporaryFile("wb+") + new_stream.write(stream.getvalue()) + new_stream.write(string) + _closure[0] = new_stream + _closure[2] = True + _closure[1] = total_length + length + + else: + write_binary = _closure[0].write + + def write(string): + write_binary(string.encode(charset)) + + if not isinstance(values, MultiDict): + values = MultiDict(values) + + for key, values in iterlists(values): + for value in values: + write('--%s\r\nContent-Disposition: form-data; name="%s"' % (boundary, key)) + reader = getattr(value, "read", None) + if reader is not None: + filename = getattr(value, "filename", getattr(value, "name", None)) + content_type = getattr(value, "content_type", None) + if content_type is None: + content_type = ( + filename + and mimetypes.guess_type(filename)[0] + or "application/octet-stream" + ) + if filename is not None: + write('; filename="%s"\r\n' % filename) + else: + write("\r\n") + write("Content-Type: %s\r\n\r\n" % content_type) + while 1: + chunk = reader(16384) + if not chunk: + break + write_binary(chunk) + else: + if not isinstance(value, string_types): + value = str(value) + + value = to_bytes(value, charset) + write("\r\n\r\n") + write_binary(value) + write("\r\n") + write("--%s--\r\n" % boundary) + + length = int(_closure[0].tell()) + _closure[0].seek(0) + return _closure[0], length, boundary + + +def encode_multipart(values, boundary=None, charset="utf-8"): + """Like `stream_encode_multipart` but returns a tuple in the form + (``boundary``, ``data``) where data is a bytestring. + """ + stream, length, boundary = stream_encode_multipart( + values, use_tempfile=False, boundary=boundary, charset=charset + ) + return boundary, stream.read() + + +def File(fd, filename=None, mimetype=None): + """Backwards compat. + + .. deprecated:: 0.5 + """ + from warnings import warn + + warn( + "'werkzeug.test.File' is deprecated as of version 0.5 and will" + " be removed in version 1.0. Use 'EnvironBuilder' or" + " 'FileStorage' instead.", + DeprecationWarning, + stacklevel=2, + ) + return FileStorage(fd, filename=filename, content_type=mimetype) + + +class _TestCookieHeaders(object): + + """A headers adapter for cookielib + """ + + def __init__(self, headers): + self.headers = headers + + def getheaders(self, name): + headers = [] + name = name.lower() + for k, v in self.headers: + if k.lower() == name: + headers.append(v) + return headers + + def get_all(self, name, default=None): + rv = [] + for k, v in self.headers: + if k.lower() == name.lower(): + rv.append(v) + return rv or default or [] + + +class _TestCookieResponse(object): + + """Something that looks like a httplib.HTTPResponse, but is actually just an + adapter for our test responses to make them available for cookielib. + """ + + def __init__(self, headers): + self.headers = _TestCookieHeaders(headers) + + def info(self): + return self.headers + + +class _TestCookieJar(CookieJar): + + """A cookielib.CookieJar modified to inject and read cookie headers from + and to wsgi environments, and wsgi application responses. + """ + + def inject_wsgi(self, environ): + """Inject the cookies as client headers into the server's wsgi + environment. + """ + cvals = ["%s=%s" % (c.name, c.value) for c in self] + + if cvals: + environ["HTTP_COOKIE"] = "; ".join(cvals) + else: + environ.pop("HTTP_COOKIE", None) + + def extract_wsgi(self, environ, headers): + """Extract the server's set-cookie headers as cookies into the + cookie jar. + """ + self.extract_cookies( + _TestCookieResponse(headers), U2Request(get_current_url(environ)) + ) + + +def _iter_data(data): + """Iterates over a `dict` or :class:`MultiDict` yielding all keys and + values. + This is used to iterate over the data passed to the + :class:`EnvironBuilder`. + """ + if isinstance(data, MultiDict): + for key, values in iterlists(data): + for value in values: + yield key, value + else: + for key, values in iteritems(data): + if isinstance(values, list): + for value in values: + yield key, value + else: + yield key, values + + +class EnvironBuilder(object): + """This class can be used to conveniently create a WSGI environment + for testing purposes. It can be used to quickly create WSGI environments + or request objects from arbitrary data. + + The signature of this class is also used in some other places as of + Werkzeug 0.5 (:func:`create_environ`, :meth:`BaseResponse.from_values`, + :meth:`Client.open`). Because of this most of the functionality is + available through the constructor alone. + + Files and regular form data can be manipulated independently of each + other with the :attr:`form` and :attr:`files` attributes, but are + passed with the same argument to the constructor: `data`. + + `data` can be any of these values: + + - a `str` or `bytes` object: The object is converted into an + :attr:`input_stream`, the :attr:`content_length` is set and you have to + provide a :attr:`content_type`. + - a `dict` or :class:`MultiDict`: The keys have to be strings. The values + have to be either any of the following objects, or a list of any of the + following objects: + + - a :class:`file`-like object: These are converted into + :class:`FileStorage` objects automatically. + - a `tuple`: The :meth:`~FileMultiDict.add_file` method is called + with the key and the unpacked `tuple` items as positional + arguments. + - a `str`: The string is set as form data for the associated key. + - a file-like object: The object content is loaded in memory and then + handled like a regular `str` or a `bytes`. + + :param path: the path of the request. In the WSGI environment this will + end up as `PATH_INFO`. If the `query_string` is not defined + and there is a question mark in the `path` everything after + it is used as query string. + :param base_url: the base URL is a URL that is used to extract the WSGI + URL scheme, host (server name + server port) and the + script root (`SCRIPT_NAME`). + :param query_string: an optional string or dict with URL parameters. + :param method: the HTTP method to use, defaults to `GET`. + :param input_stream: an optional input stream. Do not specify this and + `data`. As soon as an input stream is set you can't + modify :attr:`args` and :attr:`files` unless you + set the :attr:`input_stream` to `None` again. + :param content_type: The content type for the request. As of 0.5 you + don't have to provide this when specifying files + and form data via `data`. + :param content_length: The content length for the request. You don't + have to specify this when providing data via + `data`. + :param errors_stream: an optional error stream that is used for + `wsgi.errors`. Defaults to :data:`stderr`. + :param multithread: controls `wsgi.multithread`. Defaults to `False`. + :param multiprocess: controls `wsgi.multiprocess`. Defaults to `False`. + :param run_once: controls `wsgi.run_once`. Defaults to `False`. + :param headers: an optional list or :class:`Headers` object of headers. + :param data: a string or dict of form data or a file-object. + See explanation above. + :param json: An object to be serialized and assigned to ``data``. + Defaults the content type to ``"application/json"``. + Serialized with the function assigned to :attr:`json_dumps`. + :param environ_base: an optional dict of environment defaults. + :param environ_overrides: an optional dict of environment overrides. + :param charset: the charset used to encode unicode data. + + .. versionadded:: 0.15 + The ``json`` param and :meth:`json_dumps` method. + + .. versionadded:: 0.15 + The environ has keys ``REQUEST_URI`` and ``RAW_URI`` containing + the path before perecent-decoding. This is not part of the WSGI + PEP, but many WSGI servers include it. + + .. versionchanged:: 0.6 + ``path`` and ``base_url`` can now be unicode strings that are + encoded with :func:`iri_to_uri`. + """ + + #: the server protocol to use. defaults to HTTP/1.1 + server_protocol = "HTTP/1.1" + + #: the wsgi version to use. defaults to (1, 0) + wsgi_version = (1, 0) + + #: the default request class for :meth:`get_request` + request_class = BaseRequest + + import json + + #: The serialization function used when ``json`` is passed. + json_dumps = staticmethod(json.dumps) + del json + + def __init__( + self, + path="/", + base_url=None, + query_string=None, + method="GET", + input_stream=None, + content_type=None, + content_length=None, + errors_stream=None, + multithread=False, + multiprocess=False, + run_once=False, + headers=None, + data=None, + environ_base=None, + environ_overrides=None, + charset="utf-8", + mimetype=None, + json=None, + ): + path_s = make_literal_wrapper(path) + if query_string is not None and path_s("?") in path: + raise ValueError("Query string is defined in the path and as an argument") + if query_string is None and path_s("?") in path: + path, query_string = path.split(path_s("?"), 1) + self.charset = charset + self.path = iri_to_uri(path) + if base_url is not None: + base_url = url_fix(iri_to_uri(base_url, charset), charset) + self.base_url = base_url + if isinstance(query_string, (bytes, text_type)): + self.query_string = query_string + else: + if query_string is None: + query_string = MultiDict() + elif not isinstance(query_string, MultiDict): + query_string = MultiDict(query_string) + self.args = query_string + self.method = method + if headers is None: + headers = Headers() + elif not isinstance(headers, Headers): + headers = Headers(headers) + self.headers = headers + if content_type is not None: + self.content_type = content_type + if errors_stream is None: + errors_stream = sys.stderr + self.errors_stream = errors_stream + self.multithread = multithread + self.multiprocess = multiprocess + self.run_once = run_once + self.environ_base = environ_base + self.environ_overrides = environ_overrides + self.input_stream = input_stream + self.content_length = content_length + self.closed = False + + if json is not None: + if data is not None: + raise TypeError("can't provide both json and data") + + data = self.json_dumps(json) + + if self.content_type is None: + self.content_type = "application/json" + + if data: + if input_stream is not None: + raise TypeError("can't provide input stream and data") + if hasattr(data, "read"): + data = data.read() + if isinstance(data, text_type): + data = data.encode(self.charset) + if isinstance(data, bytes): + self.input_stream = BytesIO(data) + if self.content_length is None: + self.content_length = len(data) + else: + for key, value in _iter_data(data): + if isinstance(value, (tuple, dict)) or hasattr(value, "read"): + self._add_file_from_data(key, value) + else: + self.form.setlistdefault(key).append(value) + + if mimetype is not None: + self.mimetype = mimetype + + @classmethod + def from_environ(cls, environ, **kwargs): + """Turn an environ dict back into a builder. Any extra kwargs + override the args extracted from the environ. + + .. versionadded:: 0.15 + """ + headers = Headers(EnvironHeaders(environ)) + out = { + "path": environ["PATH_INFO"], + "base_url": cls._make_base_url( + environ["wsgi.url_scheme"], headers.pop("Host"), environ["SCRIPT_NAME"] + ), + "query_string": environ["QUERY_STRING"], + "method": environ["REQUEST_METHOD"], + "input_stream": environ["wsgi.input"], + "content_type": headers.pop("Content-Type", None), + "content_length": headers.pop("Content-Length", None), + "errors_stream": environ["wsgi.errors"], + "multithread": environ["wsgi.multithread"], + "multiprocess": environ["wsgi.multiprocess"], + "run_once": environ["wsgi.run_once"], + "headers": headers, + } + out.update(kwargs) + return cls(**out) + + def _add_file_from_data(self, key, value): + """Called in the EnvironBuilder to add files from the data dict.""" + if isinstance(value, tuple): + self.files.add_file(key, *value) + elif isinstance(value, dict): + from warnings import warn + + warn( + "Passing a dict as file data is deprecated as of" + " version 0.5 and will be removed in version 1.0. Use" + " a tuple or 'FileStorage' object instead.", + DeprecationWarning, + stacklevel=2, + ) + value = dict(value) + mimetype = value.pop("mimetype", None) + if mimetype is not None: + value["content_type"] = mimetype + self.files.add_file(key, **value) + else: + self.files.add_file(key, value) + + @staticmethod + def _make_base_url(scheme, host, script_root): + return url_unparse((scheme, host, script_root, "", "")).rstrip("/") + "/" + + @property + def base_url(self): + """The base URL is used to extract the URL scheme, host name, + port, and root path. + """ + return self._make_base_url(self.url_scheme, self.host, self.script_root) + + @base_url.setter + def base_url(self, value): + if value is None: + scheme = "http" + netloc = "localhost" + script_root = "" + else: + scheme, netloc, script_root, qs, anchor = url_parse(value) + if qs or anchor: + raise ValueError("base url must not contain a query string or fragment") + self.script_root = script_root.rstrip("/") + self.host = netloc + self.url_scheme = scheme + + def _get_content_type(self): + ct = self.headers.get("Content-Type") + if ct is None and not self._input_stream: + if self._files: + return "multipart/form-data" + elif self._form: + return "application/x-www-form-urlencoded" + return None + return ct + + def _set_content_type(self, value): + if value is None: + self.headers.pop("Content-Type", None) + else: + self.headers["Content-Type"] = value + + content_type = property( + _get_content_type, + _set_content_type, + doc="""The content type for the request. Reflected from and to + the :attr:`headers`. Do not set if you set :attr:`files` or + :attr:`form` for auto detection.""", + ) + del _get_content_type, _set_content_type + + def _get_content_length(self): + return self.headers.get("Content-Length", type=int) + + def _get_mimetype(self): + ct = self.content_type + if ct: + return ct.split(";")[0].strip() + + def _set_mimetype(self, value): + self.content_type = get_content_type(value, self.charset) + + def _get_mimetype_params(self): + def on_update(d): + self.headers["Content-Type"] = dump_options_header(self.mimetype, d) + + d = parse_options_header(self.headers.get("content-type", ""))[1] + return CallbackDict(d, on_update) + + mimetype = property( + _get_mimetype, + _set_mimetype, + doc="""The mimetype (content type without charset etc.) + + .. versionadded:: 0.14 + """, + ) + mimetype_params = property( + _get_mimetype_params, + doc=""" The mimetype parameters as dict. For example if the + content type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.14 + """, + ) + del _get_mimetype, _set_mimetype, _get_mimetype_params + + def _set_content_length(self, value): + if value is None: + self.headers.pop("Content-Length", None) + else: + self.headers["Content-Length"] = str(value) + + content_length = property( + _get_content_length, + _set_content_length, + doc="""The content length as integer. Reflected from and to the + :attr:`headers`. Do not set if you set :attr:`files` or + :attr:`form` for auto detection.""", + ) + del _get_content_length, _set_content_length + + def form_property(name, storage, doc): # noqa: B902 + key = "_" + name + + def getter(self): + if self._input_stream is not None: + raise AttributeError("an input stream is defined") + rv = getattr(self, key) + if rv is None: + rv = storage() + setattr(self, key, rv) + + return rv + + def setter(self, value): + self._input_stream = None + setattr(self, key, value) + + return property(getter, setter, doc=doc) + + form = form_property("form", MultiDict, doc="A :class:`MultiDict` of form values.") + files = form_property( + "files", + FileMultiDict, + doc="""A :class:`FileMultiDict` of uploaded files. You can use + the :meth:`~FileMultiDict.add_file` method to add new files to + the dict.""", + ) + del form_property + + def _get_input_stream(self): + return self._input_stream + + def _set_input_stream(self, value): + self._input_stream = value + self._form = self._files = None + + input_stream = property( + _get_input_stream, + _set_input_stream, + doc="""An optional input stream. If you set this it will clear + :attr:`form` and :attr:`files`.""", + ) + del _get_input_stream, _set_input_stream + + def _get_query_string(self): + if self._query_string is None: + if self._args is not None: + return url_encode(self._args, charset=self.charset) + return "" + return self._query_string + + def _set_query_string(self, value): + self._query_string = value + self._args = None + + query_string = property( + _get_query_string, + _set_query_string, + doc="""The query string. If you set this to a string + :attr:`args` will no longer be available.""", + ) + del _get_query_string, _set_query_string + + def _get_args(self): + if self._query_string is not None: + raise AttributeError("a query string is defined") + if self._args is None: + self._args = MultiDict() + return self._args + + def _set_args(self, value): + self._query_string = None + self._args = value + + args = property( + _get_args, _set_args, doc="The URL arguments as :class:`MultiDict`." + ) + del _get_args, _set_args + + @property + def server_name(self): + """The server name (read-only, use :attr:`host` to set)""" + return self.host.split(":", 1)[0] + + @property + def server_port(self): + """The server port as integer (read-only, use :attr:`host` to set)""" + pieces = self.host.split(":", 1) + if len(pieces) == 2 and pieces[1].isdigit(): + return int(pieces[1]) + elif self.url_scheme == "https": + return 443 + return 80 + + def __del__(self): + try: + self.close() + except Exception: + pass + + def close(self): + """Closes all files. If you put real :class:`file` objects into the + :attr:`files` dict you can call this method to automatically close + them all in one go. + """ + if self.closed: + return + try: + files = itervalues(self.files) + except AttributeError: + files = () + for f in files: + try: + f.close() + except Exception: + pass + self.closed = True + + def get_environ(self): + """Return the built environ. + + .. versionchanged:: 0.15 + The content type and length headers are set based on + input stream detection. Previously this only set the WSGI + keys. + """ + input_stream = self.input_stream + content_length = self.content_length + + mimetype = self.mimetype + content_type = self.content_type + + if input_stream is not None: + start_pos = input_stream.tell() + input_stream.seek(0, 2) + end_pos = input_stream.tell() + input_stream.seek(start_pos) + content_length = end_pos - start_pos + elif mimetype == "multipart/form-data": + values = CombinedMultiDict([self.form, self.files]) + input_stream, content_length, boundary = stream_encode_multipart( + values, charset=self.charset + ) + content_type = mimetype + '; boundary="%s"' % boundary + elif mimetype == "application/x-www-form-urlencoded": + # XXX: py2v3 review + values = url_encode(self.form, charset=self.charset) + values = values.encode("ascii") + content_length = len(values) + input_stream = BytesIO(values) + else: + input_stream = BytesIO() + + result = {} + if self.environ_base: + result.update(self.environ_base) + + def _path_encode(x): + return wsgi_encoding_dance(url_unquote(x, self.charset), self.charset) + + qs = wsgi_encoding_dance(self.query_string) + + result.update( + { + "REQUEST_METHOD": self.method, + "SCRIPT_NAME": _path_encode(self.script_root), + "PATH_INFO": _path_encode(self.path), + "QUERY_STRING": qs, + # Non-standard, added by mod_wsgi, uWSGI + "REQUEST_URI": wsgi_encoding_dance(self.path), + # Non-standard, added by gunicorn + "RAW_URI": wsgi_encoding_dance(self.path), + "SERVER_NAME": self.server_name, + "SERVER_PORT": str(self.server_port), + "HTTP_HOST": self.host, + "SERVER_PROTOCOL": self.server_protocol, + "wsgi.version": self.wsgi_version, + "wsgi.url_scheme": self.url_scheme, + "wsgi.input": input_stream, + "wsgi.errors": self.errors_stream, + "wsgi.multithread": self.multithread, + "wsgi.multiprocess": self.multiprocess, + "wsgi.run_once": self.run_once, + } + ) + + headers = self.headers.copy() + + if content_type is not None: + result["CONTENT_TYPE"] = content_type + headers.set("Content-Type", content_type) + + if content_length is not None: + result["CONTENT_LENGTH"] = str(content_length) + headers.set("Content-Length", content_length) + + for key, value in headers.to_wsgi_list(): + result["HTTP_%s" % key.upper().replace("-", "_")] = value + + if self.environ_overrides: + result.update(self.environ_overrides) + + return result + + def get_request(self, cls=None): + """Returns a request with the data. If the request class is not + specified :attr:`request_class` is used. + + :param cls: The request wrapper to use. + """ + if cls is None: + cls = self.request_class + return cls(self.get_environ()) + + +class ClientRedirectError(Exception): + """If a redirect loop is detected when using follow_redirects=True with + the :cls:`Client`, then this exception is raised. + """ + + +class Client(object): + """This class allows you to send requests to a wrapped application. + + The response wrapper can be a class or factory function that takes + three arguments: app_iter, status and headers. The default response + wrapper just returns a tuple. + + Example:: + + class ClientResponse(BaseResponse): + ... + + client = Client(MyApplication(), response_wrapper=ClientResponse) + + The use_cookies parameter indicates whether cookies should be stored and + sent for subsequent requests. This is True by default, but passing False + will disable this behaviour. + + If you want to request some subdomain of your application you may set + `allow_subdomain_redirects` to `True` as if not no external redirects + are allowed. + + .. versionadded:: 0.5 + `use_cookies` is new in this version. Older versions did not provide + builtin cookie support. + + .. versionadded:: 0.14 + The `mimetype` parameter was added. + + .. versionadded:: 0.15 + The ``json`` parameter. + """ + + def __init__( + self, + application, + response_wrapper=None, + use_cookies=True, + allow_subdomain_redirects=False, + ): + self.application = application + self.response_wrapper = response_wrapper + if use_cookies: + self.cookie_jar = _TestCookieJar() + else: + self.cookie_jar = None + self.allow_subdomain_redirects = allow_subdomain_redirects + + def set_cookie( + self, + server_name, + key, + value="", + max_age=None, + expires=None, + path="/", + domain=None, + secure=None, + httponly=False, + charset="utf-8", + ): + """Sets a cookie in the client's cookie jar. The server name + is required and has to match the one that is also passed to + the open call. + """ + assert self.cookie_jar is not None, "cookies disabled" + header = dump_cookie( + key, value, max_age, expires, path, domain, secure, httponly, charset + ) + environ = create_environ(path, base_url="http://" + server_name) + headers = [("Set-Cookie", header)] + self.cookie_jar.extract_wsgi(environ, headers) + + def delete_cookie(self, server_name, key, path="/", domain=None): + """Deletes a cookie in the test client.""" + self.set_cookie( + server_name, key, expires=0, max_age=0, path=path, domain=domain + ) + + def run_wsgi_app(self, environ, buffered=False): + """Runs the wrapped WSGI app with the given environment.""" + if self.cookie_jar is not None: + self.cookie_jar.inject_wsgi(environ) + rv = run_wsgi_app(self.application, environ, buffered=buffered) + if self.cookie_jar is not None: + self.cookie_jar.extract_wsgi(environ, rv[2]) + return rv + + def resolve_redirect(self, response, new_location, environ, buffered=False): + """Perform a new request to the location given by the redirect + response to the previous request. + """ + scheme, netloc, path, qs, anchor = url_parse(new_location) + builder = EnvironBuilder.from_environ(environ, query_string=qs) + + to_name_parts = netloc.split(":", 1)[0].split(".") + from_name_parts = builder.server_name.split(".") + + if to_name_parts != [""]: + # The new location has a host, use it for the base URL. + builder.url_scheme = scheme + builder.host = netloc + else: + # A local redirect with autocorrect_location_header=False + # doesn't have a host, so use the request's host. + to_name_parts = from_name_parts + + # Explain why a redirect to a different server name won't be followed. + if to_name_parts != from_name_parts: + if to_name_parts[-len(from_name_parts) :] == from_name_parts: + if not self.allow_subdomain_redirects: + raise RuntimeError("Following subdomain redirects is not enabled.") + else: + raise RuntimeError("Following external redirects is not supported.") + + path_parts = path.split("/") + root_parts = builder.script_root.split("/") + + if path_parts[: len(root_parts)] == root_parts: + # Strip the script root from the path. + builder.path = path[len(builder.script_root) :] + else: + # The new location is not under the script root, so use the + # whole path and clear the previous root. + builder.path = path + builder.script_root = "" + + status_code = int(response[1].split(None, 1)[0]) + + # Only 307 and 308 preserve all of the original request. + if status_code not in {307, 308}: + # HEAD is preserved, everything else becomes GET. + if builder.method != "HEAD": + builder.method = "GET" + + # Clear the body and the headers that describe it. + builder.input_stream = None + builder.content_type = None + builder.content_length = None + builder.headers.pop("Transfer-Encoding", None) + + # Disable the response wrapper while handling redirects. Not + # thread safe, but the client should not be shared anyway. + old_response_wrapper = self.response_wrapper + self.response_wrapper = None + + try: + return self.open(builder, as_tuple=True, buffered=buffered) + finally: + self.response_wrapper = old_response_wrapper + + def open(self, *args, **kwargs): + """Takes the same arguments as the :class:`EnvironBuilder` class with + some additions: You can provide a :class:`EnvironBuilder` or a WSGI + environment as only argument instead of the :class:`EnvironBuilder` + arguments and two optional keyword arguments (`as_tuple`, `buffered`) + that change the type of the return value or the way the application is + executed. + + .. versionchanged:: 0.5 + If a dict is provided as file in the dict for the `data` parameter + the content type has to be called `content_type` now instead of + `mimetype`. This change was made for consistency with + :class:`werkzeug.FileWrapper`. + + The `follow_redirects` parameter was added to :func:`open`. + + Additional parameters: + + :param as_tuple: Returns a tuple in the form ``(environ, result)`` + :param buffered: Set this to True to buffer the application run. + This will automatically close the application for + you as well. + :param follow_redirects: Set this to True if the `Client` should + follow HTTP redirects. + """ + as_tuple = kwargs.pop("as_tuple", False) + buffered = kwargs.pop("buffered", False) + follow_redirects = kwargs.pop("follow_redirects", False) + environ = None + if not kwargs and len(args) == 1: + if isinstance(args[0], EnvironBuilder): + environ = args[0].get_environ() + elif isinstance(args[0], dict): + environ = args[0] + if environ is None: + builder = EnvironBuilder(*args, **kwargs) + try: + environ = builder.get_environ() + finally: + builder.close() + + response = self.run_wsgi_app(environ.copy(), buffered=buffered) + + # handle redirects + redirect_chain = [] + while 1: + status_code = int(response[1].split(None, 1)[0]) + if ( + status_code not in {301, 302, 303, 305, 307, 308} + or not follow_redirects + ): + break + + # Exhaust intermediate response bodies to ensure middleware + # that returns an iterator runs any cleanup code. + if not buffered: + for _ in response[0]: + pass + + new_location = response[2]["location"] + new_redirect_entry = (new_location, status_code) + if new_redirect_entry in redirect_chain: + raise ClientRedirectError("loop detected") + redirect_chain.append(new_redirect_entry) + environ, response = self.resolve_redirect( + response, new_location, environ, buffered=buffered + ) + + if self.response_wrapper is not None: + response = self.response_wrapper(*response) + if as_tuple: + return environ, response + return response + + def get(self, *args, **kw): + """Like open but method is enforced to GET.""" + kw["method"] = "GET" + return self.open(*args, **kw) + + def patch(self, *args, **kw): + """Like open but method is enforced to PATCH.""" + kw["method"] = "PATCH" + return self.open(*args, **kw) + + def post(self, *args, **kw): + """Like open but method is enforced to POST.""" + kw["method"] = "POST" + return self.open(*args, **kw) + + def head(self, *args, **kw): + """Like open but method is enforced to HEAD.""" + kw["method"] = "HEAD" + return self.open(*args, **kw) + + def put(self, *args, **kw): + """Like open but method is enforced to PUT.""" + kw["method"] = "PUT" + return self.open(*args, **kw) + + def delete(self, *args, **kw): + """Like open but method is enforced to DELETE.""" + kw["method"] = "DELETE" + return self.open(*args, **kw) + + def options(self, *args, **kw): + """Like open but method is enforced to OPTIONS.""" + kw["method"] = "OPTIONS" + return self.open(*args, **kw) + + def trace(self, *args, **kw): + """Like open but method is enforced to TRACE.""" + kw["method"] = "TRACE" + return self.open(*args, **kw) + + def __repr__(self): + return "<%s %r>" % (self.__class__.__name__, self.application) + + +def create_environ(*args, **kwargs): + """Create a new WSGI environ dict based on the values passed. The first + parameter should be the path of the request which defaults to '/'. The + second one can either be an absolute path (in that case the host is + localhost:80) or a full path to the request with scheme, netloc port and + the path to the script. + + This accepts the same arguments as the :class:`EnvironBuilder` + constructor. + + .. versionchanged:: 0.5 + This function is now a thin wrapper over :class:`EnvironBuilder` which + was added in 0.5. The `headers`, `environ_base`, `environ_overrides` + and `charset` parameters were added. + """ + builder = EnvironBuilder(*args, **kwargs) + try: + return builder.get_environ() + finally: + builder.close() + + +def run_wsgi_app(app, environ, buffered=False): + """Return a tuple in the form (app_iter, status, headers) of the + application output. This works best if you pass it an application that + returns an iterator all the time. + + Sometimes applications may use the `write()` callable returned + by the `start_response` function. This tries to resolve such edge + cases automatically. But if you don't get the expected output you + should set `buffered` to `True` which enforces buffering. + + If passed an invalid WSGI application the behavior of this function is + undefined. Never pass non-conforming WSGI applications to this function. + + :param app: the application to execute. + :param buffered: set to `True` to enforce buffering. + :return: tuple in the form ``(app_iter, status, headers)`` + """ + environ = _get_environ(environ) + response = [] + buffer = [] + + def start_response(status, headers, exc_info=None): + if exc_info is not None: + reraise(*exc_info) + response[:] = [status, headers] + return buffer.append + + app_rv = app(environ, start_response) + close_func = getattr(app_rv, "close", None) + app_iter = iter(app_rv) + + # when buffering we emit the close call early and convert the + # application iterator into a regular list + if buffered: + try: + app_iter = list(app_iter) + finally: + if close_func is not None: + close_func() + + # otherwise we iterate the application iter until we have a response, chain + # the already received data with the already collected data and wrap it in + # a new `ClosingIterator` if we need to restore a `close` callable from the + # original return value. + else: + for item in app_iter: + buffer.append(item) + if response: + break + if buffer: + app_iter = chain(buffer, app_iter) + if close_func is not None and app_iter is not app_rv: + app_iter = ClosingIterator(app_iter, close_func) + + return app_iter, response[0], Headers(response[1]) diff --git a/python/werkzeug/testapp.py b/python/werkzeug/testapp.py new file mode 100644 index 0000000..8ea23be --- /dev/null +++ b/python/werkzeug/testapp.py @@ -0,0 +1,241 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.testapp + ~~~~~~~~~~~~~~~~ + + Provide a small test application that can be used to test a WSGI server + and check it for WSGI compliance. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import base64 +import os +import sys +from textwrap import wrap + +import werkzeug +from .utils import escape +from .wrappers import BaseRequest as Request +from .wrappers import BaseResponse as Response + +logo = Response( + base64.b64decode( + """ +R0lGODlhoACgAOMIAAEDACwpAEpCAGdgAJaKAM28AOnVAP3rAP///////// +//////////////////////yH5BAEKAAgALAAAAACgAKAAAAT+EMlJq704680R+F0ojmRpnuj0rWnrv +nB8rbRs33gu0bzu/0AObxgsGn3D5HHJbCUFyqZ0ukkSDlAidctNFg7gbI9LZlrBaHGtzAae0eloe25 +7w9EDOX2fst/xenyCIn5/gFqDiVVDV4aGeYiKkhSFjnCQY5OTlZaXgZp8nJ2ekaB0SQOjqphrpnOiq +ncEn65UsLGytLVmQ6m4sQazpbtLqL/HwpnER8bHyLrLOc3Oz8PRONPU1crXN9na263dMt/g4SzjMeX +m5yDpLqgG7OzJ4u8lT/P69ej3JPn69kHzN2OIAHkB9RUYSFCFQYQJFTIkCDBiwoXWGnowaLEjRm7+G +p9A7Hhx4rUkAUaSLJlxHMqVMD/aSycSZkyTplCqtGnRAM5NQ1Ly5OmzZc6gO4d6DGAUKA+hSocWYAo +SlM6oUWX2O/o0KdaVU5vuSQLAa0ADwQgMEMB2AIECZhVSnTno6spgbtXmHcBUrQACcc2FrTrWS8wAf +78cMFBgwIBgbN+qvTt3ayikRBk7BoyGAGABAdYyfdzRQGV3l4coxrqQ84GpUBmrdR3xNIDUPAKDBSA +ADIGDhhqTZIWaDcrVX8EsbNzbkvCOxG8bN5w8ly9H8jyTJHC6DFndQydbguh2e/ctZJFXRxMAqqPVA +tQH5E64SPr1f0zz7sQYjAHg0In+JQ11+N2B0XXBeeYZgBZFx4tqBToiTCPv0YBgQv8JqA6BEf6RhXx +w1ENhRBnWV8ctEX4Ul2zc3aVGcQNC2KElyTDYyYUWvShdjDyMOGMuFjqnII45aogPhz/CodUHFwaDx +lTgsaOjNyhGWJQd+lFoAGk8ObghI0kawg+EV5blH3dr+digkYuAGSaQZFHFz2P/cTaLmhF52QeSb45 +Jwxd+uSVGHlqOZpOeJpCFZ5J+rkAkFjQ0N1tah7JJSZUFNsrkeJUJMIBi8jyaEKIhKPomnC91Uo+NB +yyaJ5umnnpInIFh4t6ZSpGaAVmizqjpByDegYl8tPE0phCYrhcMWSv+uAqHfgH88ak5UXZmlKLVJhd +dj78s1Fxnzo6yUCrV6rrDOkluG+QzCAUTbCwf9SrmMLzK6p+OPHx7DF+bsfMRq7Ec61Av9i6GLw23r +idnZ+/OO0a99pbIrJkproCQMA17OPG6suq3cca5ruDfXCCDoS7BEdvmJn5otdqscn+uogRHHXs8cbh +EIfYaDY1AkrC0cqwcZpnM6ludx72x0p7Fo/hZAcpJDjax0UdHavMKAbiKltMWCF3xxh9k25N/Viud8 +ba78iCvUkt+V6BpwMlErmcgc502x+u1nSxJSJP9Mi52awD1V4yB/QHONsnU3L+A/zR4VL/indx/y64 +gqcj+qgTeweM86f0Qy1QVbvmWH1D9h+alqg254QD8HJXHvjQaGOqEqC22M54PcftZVKVSQG9jhkv7C +JyTyDoAJfPdu8v7DRZAxsP/ky9MJ3OL36DJfCFPASC3/aXlfLOOON9vGZZHydGf8LnxYJuuVIbl83y +Az5n/RPz07E+9+zw2A2ahz4HxHo9Kt79HTMx1Q7ma7zAzHgHqYH0SoZWyTuOLMiHwSfZDAQTn0ajk9 +YQqodnUYjByQZhZak9Wu4gYQsMyEpIOAOQKze8CmEF45KuAHTvIDOfHJNipwoHMuGHBnJElUoDmAyX +c2Qm/R8Ah/iILCCJOEokGowdhDYc/yoL+vpRGwyVSCWFYZNljkhEirGXsalWcAgOdeAdoXcktF2udb +qbUhjWyMQxYO01o6KYKOr6iK3fE4MaS+DsvBsGOBaMb0Y6IxADaJhFICaOLmiWTlDAnY1KzDG4ambL +cWBA8mUzjJsN2KjSaSXGqMCVXYpYkj33mcIApyhQf6YqgeNAmNvuC0t4CsDbSshZJkCS1eNisKqlyG +cF8G2JeiDX6tO6Mv0SmjCa3MFb0bJaGPMU0X7c8XcpvMaOQmCajwSeY9G0WqbBmKv34DsMIEztU6Y2 +KiDlFdt6jnCSqx7Dmt6XnqSKaFFHNO5+FmODxMCWBEaco77lNDGXBM0ECYB/+s7nKFdwSF5hgXumQe +EZ7amRg39RHy3zIjyRCykQh8Zo2iviRKyTDn/zx6EefptJj2Cw+Ep2FSc01U5ry4KLPYsTyWnVGnvb +UpyGlhjBUljyjHhWpf8OFaXwhp9O4T1gU9UeyPPa8A2l0p1kNqPXEVRm1AOs1oAGZU596t6SOR2mcB +Oco1srWtkaVrMUzIErrKri85keKqRQYX9VX0/eAUK1hrSu6HMEX3Qh2sCh0q0D2CtnUqS4hj62sE/z +aDs2Sg7MBS6xnQeooc2R2tC9YrKpEi9pLXfYXp20tDCpSP8rKlrD4axprb9u1Df5hSbz9QU0cRpfgn +kiIzwKucd0wsEHlLpe5yHXuc6FrNelOl7pY2+11kTWx7VpRu97dXA3DO1vbkhcb4zyvERYajQgAADs +=""" + ), + mimetype="image/png", +) + + +TEMPLATE = u"""\ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" + "http://www.w3.org/TR/html4/loose.dtd"> +<title>WSGI Information</title> +<style type="text/css"> + @import url(https://fonts.googleapis.com/css?family=Ubuntu); + + body { font-family: 'Lucida Grande', 'Lucida Sans Unicode', 'Geneva', + 'Verdana', sans-serif; background-color: white; color: #000; + font-size: 15px; text-align: center; } + #logo { float: right; padding: 0 0 10px 10px; } + div.box { text-align: left; width: 45em; margin: auto; padding: 50px 0; + background-color: white; } + h1, h2 { font-family: 'Ubuntu', 'Lucida Grande', 'Lucida Sans Unicode', + 'Geneva', 'Verdana', sans-serif; font-weight: normal; } + h1 { margin: 0 0 30px 0; } + h2 { font-size: 1.4em; margin: 1em 0 0.5em 0; } + table { width: 100%%; border-collapse: collapse; border: 1px solid #AFC5C9 } + table th { background-color: #AFC1C4; color: white; font-size: 0.72em; + font-weight: normal; width: 18em; vertical-align: top; + padding: 0.5em 0 0.1em 0.5em; } + table td { border: 1px solid #AFC5C9; padding: 0.1em 0 0.1em 0.5em; } + code { font-family: 'Consolas', 'Monaco', 'Bitstream Vera Sans Mono', + monospace; font-size: 0.7em; } + ul li { line-height: 1.5em; } + ul.path { font-size: 0.7em; margin: 0 -30px; padding: 8px 30px; + list-style: none; background: #E8EFF0; } + ul.path li { line-height: 1.6em; } + li.virtual { color: #999; text-decoration: underline; } + li.exp { background: white; } +</style> +<div class="box"> + <img src="?resource=logo" id="logo" alt="[The Werkzeug Logo]" /> + <h1>WSGI Information</h1> + <p> + This page displays all available information about the WSGI server and + the underlying Python interpreter. + <h2 id="python-interpreter">Python Interpreter</h2> + <table> + <tr> + <th>Python Version + <td>%(python_version)s + <tr> + <th>Platform + <td>%(platform)s [%(os)s] + <tr> + <th>API Version + <td>%(api_version)s + <tr> + <th>Byteorder + <td>%(byteorder)s + <tr> + <th>Werkzeug Version + <td>%(werkzeug_version)s + </table> + <h2 id="wsgi-environment">WSGI Environment</h2> + <table>%(wsgi_env)s</table> + <h2 id="installed-eggs">Installed Eggs</h2> + <p> + The following python packages were installed on the system as + Python eggs: + <ul>%(python_eggs)s</ul> + <h2 id="sys-path">System Path</h2> + <p> + The following paths are the current contents of the load path. The + following entries are looked up for Python packages. Note that not + all items in this path are folders. Gray and underlined items are + entries pointing to invalid resources or used by custom import hooks + such as the zip importer. + <p> + Items with a bright background were expanded for display from a relative + path. If you encounter such paths in the output you might want to check + your setup as relative paths are usually problematic in multithreaded + environments. + <ul class="path">%(sys_path)s</ul> +</div> +""" + + +def iter_sys_path(): + if os.name == "posix": + + def strip(x): + prefix = os.path.expanduser("~") + if x.startswith(prefix): + x = "~" + x[len(prefix) :] + return x + + else: + + def strip(x): + return x + + cwd = os.path.abspath(os.getcwd()) + for item in sys.path: + path = os.path.join(cwd, item or os.path.curdir) + yield strip(os.path.normpath(path)), not os.path.isdir(path), path != item + + +def render_testapp(req): + try: + import pkg_resources + except ImportError: + eggs = () + else: + eggs = sorted(pkg_resources.working_set, key=lambda x: x.project_name.lower()) + python_eggs = [] + for egg in eggs: + try: + version = egg.version + except (ValueError, AttributeError): + version = "unknown" + python_eggs.append( + "<li>%s <small>[%s]</small>" % (escape(egg.project_name), escape(version)) + ) + + wsgi_env = [] + sorted_environ = sorted(req.environ.items(), key=lambda x: repr(x[0]).lower()) + for key, value in sorted_environ: + wsgi_env.append( + "<tr><th>%s<td><code>%s</code>" + % (escape(str(key)), " ".join(wrap(escape(repr(value))))) + ) + + sys_path = [] + for item, virtual, expanded in iter_sys_path(): + class_ = [] + if virtual: + class_.append("virtual") + if expanded: + class_.append("exp") + sys_path.append( + "<li%s>%s" + % (' class="%s"' % " ".join(class_) if class_ else "", escape(item)) + ) + + return ( + TEMPLATE + % { + "python_version": "<br>".join(escape(sys.version).splitlines()), + "platform": escape(sys.platform), + "os": escape(os.name), + "api_version": sys.api_version, + "byteorder": sys.byteorder, + "werkzeug_version": werkzeug.__version__, + "python_eggs": "\n".join(python_eggs), + "wsgi_env": "\n".join(wsgi_env), + "sys_path": "\n".join(sys_path), + } + ).encode("utf-8") + + +def test_app(environ, start_response): + """Simple test application that dumps the environment. You can use + it to check if Werkzeug is working properly: + + .. sourcecode:: pycon + + >>> from werkzeug.serving import run_simple + >>> from werkzeug.testapp import test_app + >>> run_simple('localhost', 3000, test_app) + * Running on http://localhost:3000/ + + The application displays important information from the WSGI environment, + the Python interpreter and the installed libraries. + """ + req = Request(environ, populate_request=False) + if req.args.get("resource") == "logo": + response = logo + else: + response = Response(render_testapp(req), mimetype="text/html") + return response(environ, start_response) + + +if __name__ == "__main__": + from .serving import run_simple + + run_simple("localhost", 5000, test_app, use_reloader=True) diff --git a/python/werkzeug/urls.py b/python/werkzeug/urls.py new file mode 100644 index 0000000..38e9e5a --- /dev/null +++ b/python/werkzeug/urls.py @@ -0,0 +1,1134 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.urls + ~~~~~~~~~~~~~ + + ``werkzeug.urls`` used to provide several wrapper functions for Python 2 + urlparse, whose main purpose were to work around the behavior of the Py2 + stdlib and its lack of unicode support. While this was already a somewhat + inconvenient situation, it got even more complicated because Python 3's + ``urllib.parse`` actually does handle unicode properly. In other words, + this module would wrap two libraries with completely different behavior. So + now this module contains a 2-and-3-compatible backport of Python 3's + ``urllib.parse``, which is mostly API-compatible. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import os +import re +from collections import namedtuple + +from ._compat import fix_tuple_repr +from ._compat import implements_to_string +from ._compat import make_literal_wrapper +from ._compat import normalize_string_tuple +from ._compat import PY2 +from ._compat import text_type +from ._compat import to_native +from ._compat import to_unicode +from ._compat import try_coerce_native +from ._internal import _decode_idna +from ._internal import _encode_idna +from .datastructures import iter_multi_items +from .datastructures import MultiDict + +# A regular expression for what a valid schema looks like +_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$") + +# Characters that are safe in any part of an URL. +_always_safe = frozenset( + bytearray( + b"abcdefghijklmnopqrstuvwxyz" + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"0123456789" + b"-._~" + ) +) + +_hexdigits = "0123456789ABCDEFabcdef" +_hextobyte = dict( + ((a + b).encode(), int(a + b, 16)) for a in _hexdigits for b in _hexdigits +) +_bytetohex = [("%%%02X" % char).encode("ascii") for char in range(256)] + + +_URLTuple = fix_tuple_repr( + namedtuple("_URLTuple", ["scheme", "netloc", "path", "query", "fragment"]) +) + + +class BaseURL(_URLTuple): + """Superclass of :py:class:`URL` and :py:class:`BytesURL`.""" + + __slots__ = () + + def replace(self, **kwargs): + """Return an URL with the same values, except for those parameters + given new values by whichever keyword arguments are specified.""" + return self._replace(**kwargs) + + @property + def host(self): + """The host part of the URL if available, otherwise `None`. The + host is either the hostname or the IP address mentioned in the + URL. It will not contain the port. + """ + return self._split_host()[0] + + @property + def ascii_host(self): + """Works exactly like :attr:`host` but will return a result that + is restricted to ASCII. If it finds a netloc that is not ASCII + it will attempt to idna decode it. This is useful for socket + operations when the URL might include internationalized characters. + """ + rv = self.host + if rv is not None and isinstance(rv, text_type): + try: + rv = _encode_idna(rv) + except UnicodeError: + rv = rv.encode("ascii", "ignore") + return to_native(rv, "ascii", "ignore") + + @property + def port(self): + """The port in the URL as an integer if it was present, `None` + otherwise. This does not fill in default ports. + """ + try: + rv = int(to_native(self._split_host()[1])) + if 0 <= rv <= 65535: + return rv + except (ValueError, TypeError): + pass + + @property + def auth(self): + """The authentication part in the URL if available, `None` + otherwise. + """ + return self._split_netloc()[0] + + @property + def username(self): + """The username if it was part of the URL, `None` otherwise. + This undergoes URL decoding and will always be a unicode string. + """ + rv = self._split_auth()[0] + if rv is not None: + return _url_unquote_legacy(rv) + + @property + def raw_username(self): + """The username if it was part of the URL, `None` otherwise. + Unlike :attr:`username` this one is not being decoded. + """ + return self._split_auth()[0] + + @property + def password(self): + """The password if it was part of the URL, `None` otherwise. + This undergoes URL decoding and will always be a unicode string. + """ + rv = self._split_auth()[1] + if rv is not None: + return _url_unquote_legacy(rv) + + @property + def raw_password(self): + """The password if it was part of the URL, `None` otherwise. + Unlike :attr:`password` this one is not being decoded. + """ + return self._split_auth()[1] + + def decode_query(self, *args, **kwargs): + """Decodes the query part of the URL. Ths is a shortcut for + calling :func:`url_decode` on the query argument. The arguments and + keyword arguments are forwarded to :func:`url_decode` unchanged. + """ + return url_decode(self.query, *args, **kwargs) + + def join(self, *args, **kwargs): + """Joins this URL with another one. This is just a convenience + function for calling into :meth:`url_join` and then parsing the + return value again. + """ + return url_parse(url_join(self, *args, **kwargs)) + + def to_url(self): + """Returns a URL string or bytes depending on the type of the + information stored. This is just a convenience function + for calling :meth:`url_unparse` for this URL. + """ + return url_unparse(self) + + def decode_netloc(self): + """Decodes the netloc part into a string.""" + rv = _decode_idna(self.host or "") + + if ":" in rv: + rv = "[%s]" % rv + port = self.port + if port is not None: + rv = "%s:%d" % (rv, port) + auth = ":".join( + filter( + None, + [ + _url_unquote_legacy(self.raw_username or "", "/:%@"), + _url_unquote_legacy(self.raw_password or "", "/:%@"), + ], + ) + ) + if auth: + rv = "%s@%s" % (auth, rv) + return rv + + def to_uri_tuple(self): + """Returns a :class:`BytesURL` tuple that holds a URI. This will + encode all the information in the URL properly to ASCII using the + rules a web browser would follow. + + It's usually more interesting to directly call :meth:`iri_to_uri` which + will return a string. + """ + return url_parse(iri_to_uri(self).encode("ascii")) + + def to_iri_tuple(self): + """Returns a :class:`URL` tuple that holds a IRI. This will try + to decode as much information as possible in the URL without + losing information similar to how a web browser does it for the + URL bar. + + It's usually more interesting to directly call :meth:`uri_to_iri` which + will return a string. + """ + return url_parse(uri_to_iri(self)) + + def get_file_location(self, pathformat=None): + """Returns a tuple with the location of the file in the form + ``(server, location)``. If the netloc is empty in the URL or + points to localhost, it's represented as ``None``. + + The `pathformat` by default is autodetection but needs to be set + when working with URLs of a specific system. The supported values + are ``'windows'`` when working with Windows or DOS paths and + ``'posix'`` when working with posix paths. + + If the URL does not point to a local file, the server and location + are both represented as ``None``. + + :param pathformat: The expected format of the path component. + Currently ``'windows'`` and ``'posix'`` are + supported. Defaults to ``None`` which is + autodetect. + """ + if self.scheme != "file": + return None, None + + path = url_unquote(self.path) + host = self.netloc or None + + if pathformat is None: + if os.name == "nt": + pathformat = "windows" + else: + pathformat = "posix" + + if pathformat == "windows": + if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:": + path = path[1:2] + ":" + path[3:] + windows_share = path[:3] in ("\\" * 3, "/" * 3) + import ntpath + + path = ntpath.normpath(path) + # Windows shared drives are represented as ``\\host\\directory``. + # That results in a URL like ``file://///host/directory``, and a + # path like ``///host/directory``. We need to special-case this + # because the path contains the hostname. + if windows_share and host is None: + parts = path.lstrip("\\").split("\\", 1) + if len(parts) == 2: + host, path = parts + else: + host = parts[0] + path = "" + elif pathformat == "posix": + import posixpath + + path = posixpath.normpath(path) + else: + raise TypeError("Invalid path format %s" % repr(pathformat)) + + if host in ("127.0.0.1", "::1", "localhost"): + host = None + + return host, path + + def _split_netloc(self): + if self._at in self.netloc: + return self.netloc.split(self._at, 1) + return None, self.netloc + + def _split_auth(self): + auth = self._split_netloc()[0] + if not auth: + return None, None + if self._colon not in auth: + return auth, None + return auth.split(self._colon, 1) + + def _split_host(self): + rv = self._split_netloc()[1] + if not rv: + return None, None + + if not rv.startswith(self._lbracket): + if self._colon in rv: + return rv.split(self._colon, 1) + return rv, None + + idx = rv.find(self._rbracket) + if idx < 0: + return rv, None + + host = rv[1:idx] + rest = rv[idx + 1 :] + if rest.startswith(self._colon): + return host, rest[1:] + return host, None + + +@implements_to_string +class URL(BaseURL): + """Represents a parsed URL. This behaves like a regular tuple but + also has some extra attributes that give further insight into the + URL. + """ + + __slots__ = () + _at = "@" + _colon = ":" + _lbracket = "[" + _rbracket = "]" + + def __str__(self): + return self.to_url() + + def encode_netloc(self): + """Encodes the netloc part to an ASCII safe URL as bytes.""" + rv = self.ascii_host or "" + if ":" in rv: + rv = "[%s]" % rv + port = self.port + if port is not None: + rv = "%s:%d" % (rv, port) + auth = ":".join( + filter( + None, + [ + url_quote(self.raw_username or "", "utf-8", "strict", "/:%"), + url_quote(self.raw_password or "", "utf-8", "strict", "/:%"), + ], + ) + ) + if auth: + rv = "%s@%s" % (auth, rv) + return to_native(rv) + + def encode(self, charset="utf-8", errors="replace"): + """Encodes the URL to a tuple made out of bytes. The charset is + only being used for the path, query and fragment. + """ + return BytesURL( + self.scheme.encode("ascii"), + self.encode_netloc(), + self.path.encode(charset, errors), + self.query.encode(charset, errors), + self.fragment.encode(charset, errors), + ) + + +class BytesURL(BaseURL): + """Represents a parsed URL in bytes.""" + + __slots__ = () + _at = b"@" + _colon = b":" + _lbracket = b"[" + _rbracket = b"]" + + def __str__(self): + return self.to_url().decode("utf-8", "replace") + + def encode_netloc(self): + """Returns the netloc unchanged as bytes.""" + return self.netloc + + def decode(self, charset="utf-8", errors="replace"): + """Decodes the URL to a tuple made out of strings. The charset is + only being used for the path, query and fragment. + """ + return URL( + self.scheme.decode("ascii"), + self.decode_netloc(), + self.path.decode(charset, errors), + self.query.decode(charset, errors), + self.fragment.decode(charset, errors), + ) + + +_unquote_maps = {frozenset(): _hextobyte} + + +def _unquote_to_bytes(string, unsafe=""): + if isinstance(string, text_type): + string = string.encode("utf-8") + + if isinstance(unsafe, text_type): + unsafe = unsafe.encode("utf-8") + + unsafe = frozenset(bytearray(unsafe)) + groups = iter(string.split(b"%")) + result = bytearray(next(groups, b"")) + + try: + hex_to_byte = _unquote_maps[unsafe] + except KeyError: + hex_to_byte = _unquote_maps[unsafe] = { + h: b for h, b in _hextobyte.items() if b not in unsafe + } + + for group in groups: + code = group[:2] + + if code in hex_to_byte: + result.append(hex_to_byte[code]) + result.extend(group[2:]) + else: + result.append(37) # % + result.extend(group) + + return bytes(result) + + +def _url_encode_impl(obj, charset, encode_keys, sort, key): + iterable = iter_multi_items(obj) + if sort: + iterable = sorted(iterable, key=key) + for key, value in iterable: + if value is None: + continue + if not isinstance(key, bytes): + key = text_type(key).encode(charset) + if not isinstance(value, bytes): + value = text_type(value).encode(charset) + yield _fast_url_quote_plus(key) + "=" + _fast_url_quote_plus(value) + + +def _url_unquote_legacy(value, unsafe=""): + try: + return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe) + except UnicodeError: + return url_unquote(value, charset="latin1", unsafe=unsafe) + + +def url_parse(url, scheme=None, allow_fragments=True): + """Parses a URL from a string into a :class:`URL` tuple. If the URL + is lacking a scheme it can be provided as second argument. Otherwise, + it is ignored. Optionally fragments can be stripped from the URL + by setting `allow_fragments` to `False`. + + The inverse of this function is :func:`url_unparse`. + + :param url: the URL to parse. + :param scheme: the default schema to use if the URL is schemaless. + :param allow_fragments: if set to `False` a fragment will be removed + from the URL. + """ + s = make_literal_wrapper(url) + is_text_based = isinstance(url, text_type) + + if scheme is None: + scheme = s("") + netloc = query = fragment = s("") + i = url.find(s(":")) + if i > 0 and _scheme_re.match(to_native(url[:i], errors="replace")): + # make sure "iri" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i + 1 :] + if not rest or any(c not in s("0123456789") for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == s("//"): + delim = len(url) + for c in s("/?#"): + wdelim = url.find(c, 2) + if wdelim >= 0: + delim = min(delim, wdelim) + netloc, url = url[2:delim], url[delim:] + if (s("[") in netloc and s("]") not in netloc) or ( + s("]") in netloc and s("[") not in netloc + ): + raise ValueError("Invalid IPv6 URL") + + if allow_fragments and s("#") in url: + url, fragment = url.split(s("#"), 1) + if s("?") in url: + url, query = url.split(s("?"), 1) + + result_type = URL if is_text_based else BytesURL + return result_type(scheme, netloc, url, query, fragment) + + +def _make_fast_url_quote(charset="utf-8", errors="strict", safe="/:", unsafe=""): + """Precompile the translation table for a URL encoding function. + + Unlike :func:`url_quote`, the generated function only takes the + string to quote. + + :param charset: The charset to encode the result with. + :param errors: How to handle encoding errors. + :param safe: An optional sequence of safe characters to never encode. + :param unsafe: An optional sequence of unsafe characters to always encode. + """ + if isinstance(safe, text_type): + safe = safe.encode(charset, errors) + + if isinstance(unsafe, text_type): + unsafe = unsafe.encode(charset, errors) + + safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) + table = [chr(c) if c in safe else "%%%02X" % c for c in range(256)] + + if not PY2: + + def quote(string): + return "".join([table[c] for c in string]) + + else: + + def quote(string): + return "".join([table[c] for c in bytearray(string)]) + + return quote + + +_fast_url_quote = _make_fast_url_quote() +_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+") + + +def _fast_url_quote_plus(string): + return _fast_quote_plus(string).replace(" ", "+") + + +def url_quote(string, charset="utf-8", errors="strict", safe="/:", unsafe=""): + """URL encode a single string with a given encoding. + + :param s: the string to quote. + :param charset: the charset to be used. + :param safe: an optional sequence of safe characters. + :param unsafe: an optional sequence of unsafe characters. + + .. versionadded:: 0.9.2 + The `unsafe` parameter was added. + """ + if not isinstance(string, (text_type, bytes, bytearray)): + string = text_type(string) + if isinstance(string, text_type): + string = string.encode(charset, errors) + if isinstance(safe, text_type): + safe = safe.encode(charset, errors) + if isinstance(unsafe, text_type): + unsafe = unsafe.encode(charset, errors) + safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe)) + rv = bytearray() + for char in bytearray(string): + if char in safe: + rv.append(char) + else: + rv.extend(_bytetohex[char]) + return to_native(bytes(rv)) + + +def url_quote_plus(string, charset="utf-8", errors="strict", safe=""): + """URL encode a single string with the given encoding and convert + whitespace to "+". + + :param s: The string to quote. + :param charset: The charset to be used. + :param safe: An optional sequence of safe characters. + """ + return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+") + + +def url_unparse(components): + """The reverse operation to :meth:`url_parse`. This accepts arbitrary + as well as :class:`URL` tuples and returns a URL as a string. + + :param components: the parsed URL as tuple which should be converted + into a URL string. + """ + scheme, netloc, path, query, fragment = normalize_string_tuple(components) + s = make_literal_wrapper(scheme) + url = s("") + + # We generally treat file:///x and file:/x the same which is also + # what browsers seem to do. This also allows us to ignore a schema + # register for netloc utilization or having to differenciate between + # empty and missing netloc. + if netloc or (scheme and path.startswith(s("/"))): + if path and path[:1] != s("/"): + path = s("/") + path + url = s("//") + (netloc or s("")) + path + elif path: + url += path + if scheme: + url = scheme + s(":") + url + if query: + url = url + s("?") + query + if fragment: + url = url + s("#") + fragment + return url + + +def url_unquote(string, charset="utf-8", errors="replace", unsafe=""): + """URL decode a single string with a given encoding. If the charset + is set to `None` no unicode decoding is performed and raw bytes + are returned. + + :param s: the string to unquote. + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param errors: the error handling for the charset decoding. + """ + rv = _unquote_to_bytes(string, unsafe) + if charset is not None: + rv = rv.decode(charset, errors) + return rv + + +def url_unquote_plus(s, charset="utf-8", errors="replace"): + """URL decode a single string with the given `charset` and decode "+" to + whitespace. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a + :exc:`HTTPUnicodeError` is raised. + + :param s: The string to unquote. + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param errors: The error handling for the `charset` decoding. + """ + if isinstance(s, text_type): + s = s.replace(u"+", u" ") + else: + s = s.replace(b"+", b" ") + return url_unquote(s, charset, errors) + + +def url_fix(s, charset="utf-8"): + r"""Sometimes you get an URL by a user that just isn't a real URL because + it contains unsafe characters like ' ' and so on. This function can fix + some of the problems in a similar way browsers handle data entered by the + user: + + >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)') + 'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)' + + :param s: the string with the URL to fix. + :param charset: The target charset for the URL if the url was given as + unicode string. + """ + # First step is to switch to unicode processing and to convert + # backslashes (which are invalid in URLs anyways) to slashes. This is + # consistent with what Chrome does. + s = to_unicode(s, charset, "replace").replace("\\", "/") + + # For the specific case that we look like a malformed windows URL + # we want to fix this up manually: + if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"): + s = "file:///" + s[7:] + + url = url_parse(s) + path = url_quote(url.path, charset, safe="/%+$!*'(),") + qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),") + anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),") + return to_native(url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor))) + + +# not-unreserved characters remain quoted when unquoting to IRI +_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe]) + + +def _codec_error_url_quote(e): + """Used in :func:`uri_to_iri` after unquoting to re-quote any + invalid bytes. + """ + out = _fast_url_quote(e.object[e.start : e.end]) + + if PY2: + out = out.decode("utf-8") + + return out, e.end + + +codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) + + +def uri_to_iri(uri, charset="utf-8", errors="werkzeug.url_quote"): + """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, + leaving all reserved and invalid characters quoted. If the URL has + a domain, it is decoded from Punycode. + + >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") + 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' + + :param uri: The URI to convert. + :param charset: The encoding to encode unquoted bytes with. + :param errors: Error handler to use during ``bytes.encode``. By + default, invalid bytes are left quoted. + + .. versionchanged:: 0.15 + All reserved and invalid characters remain quoted. Previously, + only some reserved characters were preserved, and invalid bytes + were replaced instead of left quoted. + + .. versionadded:: 0.6 + """ + if isinstance(uri, tuple): + uri = url_unparse(uri) + + uri = url_parse(to_unicode(uri, charset)) + path = url_unquote(uri.path, charset, errors, _to_iri_unsafe) + query = url_unquote(uri.query, charset, errors, _to_iri_unsafe) + fragment = url_unquote(uri.fragment, charset, errors, _to_iri_unsafe) + return url_unparse((uri.scheme, uri.decode_netloc(), path, query, fragment)) + + +# reserved characters remain unquoted when quoting to URI +_to_uri_safe = ":/?#[]@!$&'()*+,;=%" + + +def iri_to_uri(iri, charset="utf-8", errors="strict", safe_conversion=False): + """Convert an IRI to a URI. All non-ASCII and unsafe characters are + quoted. If the URL has a domain, it is encoded to Punycode. + + >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF') + 'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF' + + :param iri: The IRI to convert. + :param charset: The encoding of the IRI. + :param errors: Error handler to use during ``bytes.encode``. + :param safe_conversion: Return the URL unchanged if it only contains + ASCII characters and no whitespace. See the explanation below. + + There is a general problem with IRI conversion with some protocols + that are in violation of the URI specification. Consider the + following two IRIs:: + + magnet:?xt=uri:whatever + itms-services://?action=download-manifest + + After parsing, we don't know if the scheme requires the ``//``, + which is dropped if empty, but conveys different meanings in the + final URL if it's present or not. In this case, you can use + ``safe_conversion``, which will return the URL unchanged if it only + contains ASCII characters and no whitespace. This can result in a + URI with unquoted characters if it was not already quoted correctly, + but preserves the URL's semantics. Werkzeug uses this for the + ``Location`` header for redirects. + + .. versionchanged:: 0.15 + All reserved characters remain unquoted. Previously, only some + reserved characters were left unquoted. + + .. versionchanged:: 0.9.6 + The ``safe_conversion`` parameter was added. + + .. versionadded:: 0.6 + """ + if isinstance(iri, tuple): + iri = url_unparse(iri) + + if safe_conversion: + # If we're not sure if it's safe to convert the URL, and it only + # contains ASCII characters, return it unconverted. + try: + native_iri = to_native(iri) + ascii_iri = native_iri.encode("ascii") + + # Only return if it doesn't have whitespace. (Why?) + if len(ascii_iri.split()) == 1: + return native_iri + except UnicodeError: + pass + + iri = url_parse(to_unicode(iri, charset, errors)) + path = url_quote(iri.path, charset, errors, _to_uri_safe) + query = url_quote(iri.query, charset, errors, _to_uri_safe) + fragment = url_quote(iri.fragment, charset, errors, _to_uri_safe) + return to_native( + url_unparse((iri.scheme, iri.encode_netloc(), path, query, fragment)) + ) + + +def url_decode( + s, + charset="utf-8", + decode_keys=False, + include_empty=True, + errors="replace", + separator="&", + cls=None, +): + """ + Parse a querystring and return it as :class:`MultiDict`. There is a + difference in key decoding on different Python versions. On Python 3 + keys will always be fully decoded whereas on Python 2, keys will + remain bytestrings if they fit into ASCII. On 2.x keys can be forced + to be unicode by setting `decode_keys` to `True`. + + If the charset is set to `None` no unicode decoding will happen and + raw bytes will be returned. + + Per default a missing value for a key will default to an empty key. If + you don't want that behavior you can set `include_empty` to `False`. + + Per default encoding errors are ignored. If you want a different behavior + you can set `errors` to ``'replace'`` or ``'strict'``. In strict mode a + `HTTPUnicodeError` is raised. + + .. versionchanged:: 0.5 + In previous versions ";" and "&" could be used for url decoding. + This changed in 0.5 where only "&" is supported. If you want to + use ";" instead a different `separator` can be provided. + + The `cls` parameter was added. + + :param s: a string with the query string to decode. + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param decode_keys: Used on Python 2.x to control whether keys should + be forced to be unicode objects. If set to `True` + then keys will be unicode in all cases. Otherwise, + they remain `str` if they fit into ASCII. + :param include_empty: Set to `False` if you don't want empty values to + appear in the dict. + :param errors: the decoding error behavior. + :param separator: the pair separator to be used, defaults to ``&`` + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + """ + if cls is None: + cls = MultiDict + if isinstance(s, text_type) and not isinstance(separator, text_type): + separator = separator.decode(charset or "ascii") + elif isinstance(s, bytes) and not isinstance(separator, bytes): + separator = separator.encode(charset or "ascii") + return cls( + _url_decode_impl( + s.split(separator), charset, decode_keys, include_empty, errors + ) + ) + + +def url_decode_stream( + stream, + charset="utf-8", + decode_keys=False, + include_empty=True, + errors="replace", + separator="&", + cls=None, + limit=None, + return_iterator=False, +): + """Works like :func:`url_decode` but decodes a stream. The behavior + of stream and limit follows functions like + :func:`~werkzeug.wsgi.make_line_iter`. The generator of pairs is + directly fed to the `cls` so you can consume the data while it's + parsed. + + .. versionadded:: 0.8 + + :param stream: a stream with the encoded querystring + :param charset: the charset of the query string. If set to `None` + no unicode decoding will take place. + :param decode_keys: Used on Python 2.x to control whether keys should + be forced to be unicode objects. If set to `True`, + keys will be unicode in all cases. Otherwise, they + remain `str` if they fit into ASCII. + :param include_empty: Set to `False` if you don't want empty values to + appear in the dict. + :param errors: the decoding error behavior. + :param separator: the pair separator to be used, defaults to ``&`` + :param cls: an optional dict class to use. If this is not specified + or `None` the default :class:`MultiDict` is used. + :param limit: the content length of the URL data. Not necessary if + a limited stream is provided. + :param return_iterator: if set to `True` the `cls` argument is ignored + and an iterator over all decoded pairs is + returned + """ + from .wsgi import make_chunk_iter + + pair_iter = make_chunk_iter(stream, separator, limit) + decoder = _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors) + + if return_iterator: + return decoder + + if cls is None: + cls = MultiDict + + return cls(decoder) + + +def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors): + for pair in pair_iter: + if not pair: + continue + s = make_literal_wrapper(pair) + equal = s("=") + if equal in pair: + key, value = pair.split(equal, 1) + else: + if not include_empty: + continue + key = pair + value = s("") + key = url_unquote_plus(key, charset, errors) + if charset is not None and PY2 and not decode_keys: + key = try_coerce_native(key) + yield key, url_unquote_plus(value, charset, errors) + + +def url_encode( + obj, charset="utf-8", encode_keys=False, sort=False, key=None, separator=b"&" +): + """URL encode a dict/`MultiDict`. If a value is `None` it will not appear + in the result string. Per default only values are encoded into the target + charset strings. If `encode_keys` is set to ``True`` unicode keys are + supported too. + + If `sort` is set to `True` the items are sorted by `key` or the default + sorting algorithm. + + .. versionadded:: 0.5 + `sort`, `key`, and `separator` were added. + + :param obj: the object to encode into a query string. + :param charset: the charset of the query string. + :param encode_keys: set to `True` if you have unicode keys. (Ignored on + Python 3.x) + :param sort: set to `True` if you want parameters to be sorted by `key`. + :param separator: the separator to be used for the pairs. + :param key: an optional function to be used for sorting. For more details + check out the :func:`sorted` documentation. + """ + separator = to_native(separator, "ascii") + return separator.join(_url_encode_impl(obj, charset, encode_keys, sort, key)) + + +def url_encode_stream( + obj, + stream=None, + charset="utf-8", + encode_keys=False, + sort=False, + key=None, + separator=b"&", +): + """Like :meth:`url_encode` but writes the results to a stream + object. If the stream is `None` a generator over all encoded + pairs is returned. + + .. versionadded:: 0.8 + + :param obj: the object to encode into a query string. + :param stream: a stream to write the encoded object into or `None` if + an iterator over the encoded pairs should be returned. In + that case the separator argument is ignored. + :param charset: the charset of the query string. + :param encode_keys: set to `True` if you have unicode keys. (Ignored on + Python 3.x) + :param sort: set to `True` if you want parameters to be sorted by `key`. + :param separator: the separator to be used for the pairs. + :param key: an optional function to be used for sorting. For more details + check out the :func:`sorted` documentation. + """ + separator = to_native(separator, "ascii") + gen = _url_encode_impl(obj, charset, encode_keys, sort, key) + if stream is None: + return gen + for idx, chunk in enumerate(gen): + if idx: + stream.write(separator) + stream.write(chunk) + + +def url_join(base, url, allow_fragments=True): + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter. + + :param base: the base URL for the join operation. + :param url: the URL to join. + :param allow_fragments: indicates whether fragments should be allowed. + """ + if isinstance(base, tuple): + base = url_unparse(base) + if isinstance(url, tuple): + url = url_unparse(url) + + base, url = normalize_string_tuple((base, url)) + s = make_literal_wrapper(base) + + if not base: + return url + if not url: + return base + + bscheme, bnetloc, bpath, bquery, bfragment = url_parse( + base, allow_fragments=allow_fragments + ) + scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments) + if scheme != bscheme: + return url + if netloc: + return url_unparse((scheme, netloc, path, query, fragment)) + netloc = bnetloc + + if path[:1] == s("/"): + segments = path.split(s("/")) + elif not path: + segments = bpath.split(s("/")) + if not query: + query = bquery + else: + segments = bpath.split(s("/"))[:-1] + path.split(s("/")) + + # If the rightmost part is "./" we want to keep the slash but + # remove the dot. + if segments[-1] == s("."): + segments[-1] = s("") + + # Resolve ".." and "." + segments = [segment for segment in segments if segment != s(".")] + while 1: + i = 1 + n = len(segments) - 1 + while i < n: + if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")): + del segments[i - 1 : i + 1] + break + i += 1 + else: + break + + # Remove trailing ".." if the URL is absolute + unwanted_marker = [s(""), s("..")] + while segments[:2] == unwanted_marker: + del segments[1] + + path = s("/").join(segments) + return url_unparse((scheme, netloc, path, query, fragment)) + + +class Href(object): + """Implements a callable that constructs URLs with the given base. The + function can be called with any number of positional and keyword + arguments which than are used to assemble the URL. Works with URLs + and posix paths. + + Positional arguments are appended as individual segments to + the path of the URL: + + >>> href = Href('/foo') + >>> href('bar', 23) + '/foo/bar/23' + >>> href('foo', bar=23) + '/foo/foo?bar=23' + + If any of the arguments (positional or keyword) evaluates to `None` it + will be skipped. If no keyword arguments are given the last argument + can be a :class:`dict` or :class:`MultiDict` (or any other dict subclass), + otherwise the keyword arguments are used for the query parameters, cutting + off the first trailing underscore of the parameter name: + + >>> href(is_=42) + '/foo?is=42' + >>> href({'foo': 'bar'}) + '/foo?foo=bar' + + Combining of both methods is not allowed: + + >>> href({'foo': 'bar'}, bar=42) + Traceback (most recent call last): + ... + TypeError: keyword arguments and query-dicts can't be combined + + Accessing attributes on the href object creates a new href object with + the attribute name as prefix: + + >>> bar_href = href.bar + >>> bar_href("blub") + '/foo/bar/blub' + + If `sort` is set to `True` the items are sorted by `key` or the default + sorting algorithm: + + >>> href = Href("/", sort=True) + >>> href(a=1, b=2, c=3) + '/?a=1&b=2&c=3' + + .. versionadded:: 0.5 + `sort` and `key` were added. + """ + + def __init__(self, base="./", charset="utf-8", sort=False, key=None): + if not base: + base = "./" + self.base = base + self.charset = charset + self.sort = sort + self.key = key + + def __getattr__(self, name): + if name[:2] == "__": + raise AttributeError(name) + base = self.base + if base[-1:] != "/": + base += "/" + return Href(url_join(base, name), self.charset, self.sort, self.key) + + def __call__(self, *path, **query): + if path and isinstance(path[-1], dict): + if query: + raise TypeError("keyword arguments and query-dicts can't be combined") + query, path = path[-1], path[:-1] + elif query: + query = dict( + [(k.endswith("_") and k[:-1] or k, v) for k, v in query.items()] + ) + path = "/".join( + [ + to_unicode(url_quote(x, self.charset), "ascii") + for x in path + if x is not None + ] + ).lstrip("/") + rv = self.base + if path: + if not rv.endswith("/"): + rv += "/" + rv = url_join(rv, "./" + path) + if query: + rv += "?" + to_unicode( + url_encode(query, self.charset, sort=self.sort, key=self.key), "ascii" + ) + return to_native(rv) diff --git a/python/werkzeug/useragents.py b/python/werkzeug/useragents.py new file mode 100644 index 0000000..e265e09 --- /dev/null +++ b/python/werkzeug/useragents.py @@ -0,0 +1,220 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.useragents + ~~~~~~~~~~~~~~~~~~~ + + This module provides a helper to inspect user agent strings. This module + is far from complete but should work for most of the currently available + browsers. + + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import re +import warnings + + +class UserAgentParser(object): + """A simple user agent parser. Used by the `UserAgent`.""" + + platforms = ( + ("cros", "chromeos"), + ("iphone|ios", "iphone"), + ("ipad", "ipad"), + (r"darwin|mac|os\s*x", "macos"), + ("win", "windows"), + (r"android", "android"), + ("netbsd", "netbsd"), + ("openbsd", "openbsd"), + ("freebsd", "freebsd"), + ("dragonfly", "dragonflybsd"), + ("(sun|i86)os", "solaris"), + (r"x11|lin(\b|ux)?", "linux"), + (r"nintendo\s+wii", "wii"), + ("irix", "irix"), + ("hp-?ux", "hpux"), + ("aix", "aix"), + ("sco|unix_sv", "sco"), + ("bsd", "bsd"), + ("amiga", "amiga"), + ("blackberry|playbook", "blackberry"), + ("symbian", "symbian"), + ) + browsers = ( + ("googlebot", "google"), + ("msnbot", "msn"), + ("yahoo", "yahoo"), + ("ask jeeves", "ask"), + (r"aol|america\s+online\s+browser", "aol"), + ("opera", "opera"), + ("edge", "edge"), + ("chrome|crios", "chrome"), + ("seamonkey", "seamonkey"), + ("firefox|firebird|phoenix|iceweasel", "firefox"), + ("galeon", "galeon"), + ("safari|version", "safari"), + ("webkit", "webkit"), + ("camino", "camino"), + ("konqueror", "konqueror"), + ("k-meleon", "kmeleon"), + ("netscape", "netscape"), + (r"msie|microsoft\s+internet\s+explorer|trident/.+? rv:", "msie"), + ("lynx", "lynx"), + ("links", "links"), + ("Baiduspider", "baidu"), + ("bingbot", "bing"), + ("mozilla", "mozilla"), + ) + + _browser_version_re = r"(?:%s)[/\sa-z(]*(\d+[.\da-z]+)?" + _language_re = re.compile( + r"(?:;\s*|\s+)(\b\w{2}\b(?:-\b\w{2}\b)?)\s*;|" + r"(?:\(|\[|;)\s*(\b\w{2}\b(?:-\b\w{2}\b)?)\s*(?:\]|\)|;)" + ) + + def __init__(self): + self.platforms = [(b, re.compile(a, re.I)) for a, b in self.platforms] + self.browsers = [ + (b, re.compile(self._browser_version_re % a, re.I)) + for a, b in self.browsers + ] + + def __call__(self, user_agent): + for platform, regex in self.platforms: # noqa: B007 + match = regex.search(user_agent) + if match is not None: + break + else: + platform = None + for browser, regex in self.browsers: # noqa: B007 + match = regex.search(user_agent) + if match is not None: + version = match.group(1) + break + else: + browser = version = None + match = self._language_re.search(user_agent) + if match is not None: + language = match.group(1) or match.group(2) + else: + language = None + return platform, browser, version, language + + +class UserAgent(object): + """Represents a user agent. Pass it a WSGI environment or a user agent + string and you can inspect some of the details from the user agent + string via the attributes. The following attributes exist: + + .. attribute:: string + + the raw user agent string + + .. attribute:: platform + + the browser platform. The following platforms are currently + recognized: + + - `aix` + - `amiga` + - `android` + - `blackberry` + - `bsd` + - `chromeos` + - `dragonflybsd` + - `freebsd` + - `hpux` + - `ipad` + - `iphone` + - `irix` + - `linux` + - `macos` + - `netbsd` + - `openbsd` + - `sco` + - `solaris` + - `symbian` + - `wii` + - `windows` + + .. attribute:: browser + + the name of the browser. The following browsers are currently + recognized: + + - `aol` * + - `ask` * + - `baidu` * + - `bing` * + - `camino` + - `chrome` + - `edge` + - `firefox` + - `galeon` + - `google` * + - `kmeleon` + - `konqueror` + - `links` + - `lynx` + - `mozilla` + - `msie` + - `msn` + - `netscape` + - `opera` + - `safari` + - `seamonkey` + - `webkit` + - `yahoo` * + + (Browsers marked with a star (``*``) are crawlers.) + + .. attribute:: version + + the version of the browser + + .. attribute:: language + + the language of the browser + """ + + _parser = UserAgentParser() + + def __init__(self, environ_or_string): + if isinstance(environ_or_string, dict): + environ_or_string = environ_or_string.get("HTTP_USER_AGENT", "") + self.string = environ_or_string + self.platform, self.browser, self.version, self.language = self._parser( + environ_or_string + ) + + def to_header(self): + return self.string + + def __str__(self): + return self.string + + def __nonzero__(self): + return bool(self.browser) + + __bool__ = __nonzero__ + + def __repr__(self): + return "<%s %r/%s>" % (self.__class__.__name__, self.browser, self.version) + + +# DEPRECATED +from .wrappers import UserAgentMixin as _UserAgentMixin + + +class UserAgentMixin(_UserAgentMixin): + @property + def user_agent(self, *args, **kwargs): + warnings.warn( + "'werkzeug.useragents.UserAgentMixin' should be imported" + " from 'werkzeug.wrappers.UserAgentMixin'. This old import" + " will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return super(_UserAgentMixin, self).user_agent diff --git a/python/werkzeug/utils.py b/python/werkzeug/utils.py new file mode 100644 index 0000000..2062057 --- /dev/null +++ b/python/werkzeug/utils.py @@ -0,0 +1,836 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.utils + ~~~~~~~~~~~~~~ + + This module implements various utilities for WSGI applications. Most of + them are used by the request and response wrappers but especially for + middleware development it makes sense to use them without the wrappers. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import codecs +import os +import pkgutil +import re +import sys +import warnings + +from ._compat import iteritems +from ._compat import PY2 +from ._compat import reraise +from ._compat import string_types +from ._compat import text_type +from ._compat import unichr +from ._internal import _DictAccessorProperty +from ._internal import _missing +from ._internal import _parse_signature + +try: + from html.entities import name2codepoint +except ImportError: + from htmlentitydefs import name2codepoint + + +_format_re = re.compile(r"\$(?:(%s)|\{(%s)\})" % (("[a-zA-Z_][a-zA-Z0-9_]*",) * 2)) +_entity_re = re.compile(r"&([^;]+);") +_filename_ascii_strip_re = re.compile(r"[^A-Za-z0-9_.-]") +_windows_device_files = ( + "CON", + "AUX", + "COM1", + "COM2", + "COM3", + "COM4", + "LPT1", + "LPT2", + "LPT3", + "PRN", + "NUL", +) + + +class cached_property(property): + """A decorator that converts a function into a lazy property. The + function wrapped is called the first time to retrieve the result + and then that calculated result is used the next time you access + the value:: + + class Foo(object): + + @cached_property + def foo(self): + # calculate something important here + return 42 + + The class has to have a `__dict__` in order for this property to + work. + """ + + # implementation detail: A subclass of python's builtin property + # decorator, we override __get__ to check for a cached value. If one + # chooses to invoke __get__ by hand the property will still work as + # expected because the lookup logic is replicated in __get__ for + # manual invocation. + + def __init__(self, func, name=None, doc=None): + self.__name__ = name or func.__name__ + self.__module__ = func.__module__ + self.__doc__ = doc or func.__doc__ + self.func = func + + def __set__(self, obj, value): + obj.__dict__[self.__name__] = value + + def __get__(self, obj, type=None): + if obj is None: + return self + value = obj.__dict__.get(self.__name__, _missing) + if value is _missing: + value = self.func(obj) + obj.__dict__[self.__name__] = value + return value + + +class environ_property(_DictAccessorProperty): + """Maps request attributes to environment variables. This works not only + for the Werzeug request object, but also any other class with an + environ attribute: + + >>> class Test(object): + ... environ = {'key': 'value'} + ... test = environ_property('key') + >>> var = Test() + >>> var.test + 'value' + + If you pass it a second value it's used as default if the key does not + exist, the third one can be a converter that takes a value and converts + it. If it raises :exc:`ValueError` or :exc:`TypeError` the default value + is used. If no default value is provided `None` is used. + + Per default the property is read only. You have to explicitly enable it + by passing ``read_only=False`` to the constructor. + """ + + read_only = True + + def lookup(self, obj): + return obj.environ + + +class header_property(_DictAccessorProperty): + """Like `environ_property` but for headers.""" + + def lookup(self, obj): + return obj.headers + + +class HTMLBuilder(object): + """Helper object for HTML generation. + + Per default there are two instances of that class. The `html` one, and + the `xhtml` one for those two dialects. The class uses keyword parameters + and positional parameters to generate small snippets of HTML. + + Keyword parameters are converted to XML/SGML attributes, positional + arguments are used as children. Because Python accepts positional + arguments before keyword arguments it's a good idea to use a list with the + star-syntax for some children: + + >>> html.p(class_='foo', *[html.a('foo', href='foo.html'), ' ', + ... html.a('bar', href='bar.html')]) + u'<p class="foo"><a href="foo.html">foo</a> <a href="bar.html">bar</a></p>' + + This class works around some browser limitations and can not be used for + arbitrary SGML/XML generation. For that purpose lxml and similar + libraries exist. + + Calling the builder escapes the string passed: + + >>> html.p(html("<foo>")) + u'<p><foo></p>' + """ + + _entity_re = re.compile(r"&([^;]+);") + _entities = name2codepoint.copy() + _entities["apos"] = 39 + _empty_elements = { + "area", + "base", + "basefont", + "br", + "col", + "command", + "embed", + "frame", + "hr", + "img", + "input", + "keygen", + "isindex", + "link", + "meta", + "param", + "source", + "wbr", + } + _boolean_attributes = { + "selected", + "checked", + "compact", + "declare", + "defer", + "disabled", + "ismap", + "multiple", + "nohref", + "noresize", + "noshade", + "nowrap", + } + _plaintext_elements = {"textarea"} + _c_like_cdata = {"script", "style"} + + def __init__(self, dialect): + self._dialect = dialect + + def __call__(self, s): + return escape(s) + + def __getattr__(self, tag): + if tag[:2] == "__": + raise AttributeError(tag) + + def proxy(*children, **arguments): + buffer = "<" + tag + for key, value in iteritems(arguments): + if value is None: + continue + if key[-1] == "_": + key = key[:-1] + if key in self._boolean_attributes: + if not value: + continue + if self._dialect == "xhtml": + value = '="' + key + '"' + else: + value = "" + else: + value = '="' + escape(value) + '"' + buffer += " " + key + value + if not children and tag in self._empty_elements: + if self._dialect == "xhtml": + buffer += " />" + else: + buffer += ">" + return buffer + buffer += ">" + + children_as_string = "".join( + [text_type(x) for x in children if x is not None] + ) + + if children_as_string: + if tag in self._plaintext_elements: + children_as_string = escape(children_as_string) + elif tag in self._c_like_cdata and self._dialect == "xhtml": + children_as_string = ( + "/*<![CDATA[*/" + children_as_string + "/*]]>*/" + ) + buffer += children_as_string + "</" + tag + ">" + return buffer + + return proxy + + def __repr__(self): + return "<%s for %r>" % (self.__class__.__name__, self._dialect) + + +html = HTMLBuilder("html") +xhtml = HTMLBuilder("xhtml") + +# https://cgit.freedesktop.org/xdg/shared-mime-info/tree/freedesktop.org.xml.in +# https://www.iana.org/assignments/media-types/media-types.xhtml +# Types listed in the XDG mime info that have a charset in the IANA registration. +_charset_mimetypes = { + "application/ecmascript", + "application/javascript", + "application/sql", + "application/xml", + "application/xml-dtd", + "application/xml-external-parsed-entity", +} + + +def get_content_type(mimetype, charset): + """Returns the full content type string with charset for a mimetype. + + If the mimetype represents text, the charset parameter will be + appended, otherwise the mimetype is returned unchanged. + + :param mimetype: The mimetype to be used as content type. + :param charset: The charset to be appended for text mimetypes. + :return: The content type. + + .. verionchanged:: 0.15 + Any type that ends with ``+xml`` gets a charset, not just those + that start with ``application/``. Known text types such as + ``application/javascript`` are also given charsets. + """ + if ( + mimetype.startswith("text/") + or mimetype in _charset_mimetypes + or mimetype.endswith("+xml") + ): + mimetype += "; charset=" + charset + + return mimetype + + +def detect_utf_encoding(data): + """Detect which UTF encoding was used to encode the given bytes. + + The latest JSON standard (:rfc:`8259`) suggests that only UTF-8 is + accepted. Older documents allowed 8, 16, or 32. 16 and 32 can be big + or little endian. Some editors or libraries may prepend a BOM. + + :internal: + + :param data: Bytes in unknown UTF encoding. + :return: UTF encoding name + + .. versionadded:: 0.15 + """ + head = data[:4] + + if head[:3] == codecs.BOM_UTF8: + return "utf-8-sig" + + if b"\x00" not in head: + return "utf-8" + + if head in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE): + return "utf-32" + + if head[:2] in (codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE): + return "utf-16" + + if len(head) == 4: + if head[:3] == b"\x00\x00\x00": + return "utf-32-be" + + if head[::2] == b"\x00\x00": + return "utf-16-be" + + if head[1:] == b"\x00\x00\x00": + return "utf-32-le" + + if head[1::2] == b"\x00\x00": + return "utf-16-le" + + if len(head) == 2: + return "utf-16-be" if head.startswith(b"\x00") else "utf-16-le" + + return "utf-8" + + +def format_string(string, context): + """String-template format a string: + + >>> format_string('$foo and ${foo}s', dict(foo=42)) + '42 and 42s' + + This does not do any attribute lookup etc. For more advanced string + formattings have a look at the `werkzeug.template` module. + + :param string: the format string. + :param context: a dict with the variables to insert. + """ + + def lookup_arg(match): + x = context[match.group(1) or match.group(2)] + if not isinstance(x, string_types): + x = type(string)(x) + return x + + return _format_re.sub(lookup_arg, string) + + +def secure_filename(filename): + r"""Pass it a filename and it will return a secure version of it. This + filename can then safely be stored on a regular file system and passed + to :func:`os.path.join`. The filename returned is an ASCII only string + for maximum portability. + + On windows systems the function also makes sure that the file is not + named after one of the special device files. + + >>> secure_filename("My cool movie.mov") + 'My_cool_movie.mov' + >>> secure_filename("../../../etc/passwd") + 'etc_passwd' + >>> secure_filename(u'i contain cool \xfcml\xe4uts.txt') + 'i_contain_cool_umlauts.txt' + + The function might return an empty filename. It's your responsibility + to ensure that the filename is unique and that you abort or + generate a random filename if the function returned an empty one. + + .. versionadded:: 0.5 + + :param filename: the filename to secure + """ + if isinstance(filename, text_type): + from unicodedata import normalize + + filename = normalize("NFKD", filename).encode("ascii", "ignore") + if not PY2: + filename = filename.decode("ascii") + for sep in os.path.sep, os.path.altsep: + if sep: + filename = filename.replace(sep, " ") + filename = str(_filename_ascii_strip_re.sub("", "_".join(filename.split()))).strip( + "._" + ) + + # on nt a couple of special files are present in each folder. We + # have to ensure that the target file is not such a filename. In + # this case we prepend an underline + if ( + os.name == "nt" + and filename + and filename.split(".")[0].upper() in _windows_device_files + ): + filename = "_" + filename + + return filename + + +def escape(s, quote=None): + """Replace special characters "&", "<", ">" and (") to HTML-safe sequences. + + There is a special handling for `None` which escapes to an empty string. + + .. versionchanged:: 0.9 + `quote` is now implicitly on. + + :param s: the string to escape. + :param quote: ignored. + """ + if s is None: + return "" + elif hasattr(s, "__html__"): + return text_type(s.__html__()) + elif not isinstance(s, string_types): + s = text_type(s) + if quote is not None: + from warnings import warn + + warn( + "The 'quote' parameter is no longer used as of version 0.9" + " and will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + s = ( + s.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + ) + return s + + +def unescape(s): + """The reverse function of `escape`. This unescapes all the HTML + entities, not only the XML entities inserted by `escape`. + + :param s: the string to unescape. + """ + + def handle_match(m): + name = m.group(1) + if name in HTMLBuilder._entities: + return unichr(HTMLBuilder._entities[name]) + try: + if name[:2] in ("#x", "#X"): + return unichr(int(name[2:], 16)) + elif name.startswith("#"): + return unichr(int(name[1:])) + except ValueError: + pass + return u"" + + return _entity_re.sub(handle_match, s) + + +def redirect(location, code=302, Response=None): + """Returns a response object (a WSGI application) that, if called, + redirects the client to the target location. Supported codes are + 301, 302, 303, 305, 307, and 308. 300 is not supported because + it's not a real redirect and 304 because it's the answer for a + request with a request with defined If-Modified-Since headers. + + .. versionadded:: 0.6 + The location can now be a unicode string that is encoded using + the :func:`iri_to_uri` function. + + .. versionadded:: 0.10 + The class used for the Response object can now be passed in. + + :param location: the location the response should redirect to. + :param code: the redirect status code. defaults to 302. + :param class Response: a Response class to use when instantiating a + response. The default is :class:`werkzeug.wrappers.Response` if + unspecified. + """ + if Response is None: + from .wrappers import Response + + display_location = escape(location) + if isinstance(location, text_type): + # Safe conversion is necessary here as we might redirect + # to a broken URI scheme (for instance itms-services). + from .urls import iri_to_uri + + location = iri_to_uri(location, safe_conversion=True) + response = Response( + '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">\n' + "<title>Redirecting...</title>\n" + "<h1>Redirecting...</h1>\n" + "<p>You should be redirected automatically to target URL: " + '<a href="%s">%s</a>. If not click the link.' + % (escape(location), display_location), + code, + mimetype="text/html", + ) + response.headers["Location"] = location + return response + + +def append_slash_redirect(environ, code=301): + """Redirects to the same URL but with a slash appended. The behavior + of this function is undefined if the path ends with a slash already. + + :param environ: the WSGI environment for the request that triggers + the redirect. + :param code: the status code for the redirect. + """ + new_path = environ["PATH_INFO"].strip("/") + "/" + query_string = environ.get("QUERY_STRING") + if query_string: + new_path += "?" + query_string + return redirect(new_path, code) + + +def import_string(import_name, silent=False): + """Imports an object based on a string. This is useful if you want to + use import paths as endpoints or something similar. An import path can + be specified either in dotted notation (``xml.sax.saxutils.escape``) + or with a colon as object delimiter (``xml.sax.saxutils:escape``). + + If `silent` is True the return value will be `None` if the import fails. + + :param import_name: the dotted name for the object to import. + :param silent: if set to `True` import errors are ignored and + `None` is returned instead. + :return: imported object + """ + # force the import name to automatically convert to strings + # __import__ is not able to handle unicode strings in the fromlist + # if the module is a package + import_name = str(import_name).replace(":", ".") + try: + try: + __import__(import_name) + except ImportError: + if "." not in import_name: + raise + else: + return sys.modules[import_name] + + module_name, obj_name = import_name.rsplit(".", 1) + module = __import__(module_name, globals(), locals(), [obj_name]) + try: + return getattr(module, obj_name) + except AttributeError as e: + raise ImportError(e) + + except ImportError as e: + if not silent: + reraise( + ImportStringError, ImportStringError(import_name, e), sys.exc_info()[2] + ) + + +def find_modules(import_path, include_packages=False, recursive=False): + """Finds all the modules below a package. This can be useful to + automatically import all views / controllers so that their metaclasses / + function decorators have a chance to register themselves on the + application. + + Packages are not returned unless `include_packages` is `True`. This can + also recursively list modules but in that case it will import all the + packages to get the correct load path of that module. + + :param import_path: the dotted name for the package to find child modules. + :param include_packages: set to `True` if packages should be returned, too. + :param recursive: set to `True` if recursion should happen. + :return: generator + """ + module = import_string(import_path) + path = getattr(module, "__path__", None) + if path is None: + raise ValueError("%r is not a package" % import_path) + basename = module.__name__ + "." + for _importer, modname, ispkg in pkgutil.iter_modules(path): + modname = basename + modname + if ispkg: + if include_packages: + yield modname + if recursive: + for item in find_modules(modname, include_packages, True): + yield item + else: + yield modname + + +def validate_arguments(func, args, kwargs, drop_extra=True): + """Checks if the function accepts the arguments and keyword arguments. + Returns a new ``(args, kwargs)`` tuple that can safely be passed to + the function without causing a `TypeError` because the function signature + is incompatible. If `drop_extra` is set to `True` (which is the default) + any extra positional or keyword arguments are dropped automatically. + + The exception raised provides three attributes: + + `missing` + A set of argument names that the function expected but where + missing. + + `extra` + A dict of keyword arguments that the function can not handle but + where provided. + + `extra_positional` + A list of values that where given by positional argument but the + function cannot accept. + + This can be useful for decorators that forward user submitted data to + a view function:: + + from werkzeug.utils import ArgumentValidationError, validate_arguments + + def sanitize(f): + def proxy(request): + data = request.values.to_dict() + try: + args, kwargs = validate_arguments(f, (request,), data) + except ArgumentValidationError: + raise BadRequest('The browser failed to transmit all ' + 'the data expected.') + return f(*args, **kwargs) + return proxy + + :param func: the function the validation is performed against. + :param args: a tuple of positional arguments. + :param kwargs: a dict of keyword arguments. + :param drop_extra: set to `False` if you don't want extra arguments + to be silently dropped. + :return: tuple in the form ``(args, kwargs)``. + """ + parser = _parse_signature(func) + args, kwargs, missing, extra, extra_positional = parser(args, kwargs)[:5] + if missing: + raise ArgumentValidationError(tuple(missing)) + elif (extra or extra_positional) and not drop_extra: + raise ArgumentValidationError(None, extra, extra_positional) + return tuple(args), kwargs + + +def bind_arguments(func, args, kwargs): + """Bind the arguments provided into a dict. When passed a function, + a tuple of arguments and a dict of keyword arguments `bind_arguments` + returns a dict of names as the function would see it. This can be useful + to implement a cache decorator that uses the function arguments to build + the cache key based on the values of the arguments. + + :param func: the function the arguments should be bound for. + :param args: tuple of positional arguments. + :param kwargs: a dict of keyword arguments. + :return: a :class:`dict` of bound keyword arguments. + """ + ( + args, + kwargs, + missing, + extra, + extra_positional, + arg_spec, + vararg_var, + kwarg_var, + ) = _parse_signature(func)(args, kwargs) + values = {} + for (name, _has_default, _default), value in zip(arg_spec, args): + values[name] = value + if vararg_var is not None: + values[vararg_var] = tuple(extra_positional) + elif extra_positional: + raise TypeError("too many positional arguments") + if kwarg_var is not None: + multikw = set(extra) & set([x[0] for x in arg_spec]) + if multikw: + raise TypeError( + "got multiple values for keyword argument " + repr(next(iter(multikw))) + ) + values[kwarg_var] = extra + elif extra: + raise TypeError("got unexpected keyword argument " + repr(next(iter(extra)))) + return values + + +class ArgumentValidationError(ValueError): + + """Raised if :func:`validate_arguments` fails to validate""" + + def __init__(self, missing=None, extra=None, extra_positional=None): + self.missing = set(missing or ()) + self.extra = extra or {} + self.extra_positional = extra_positional or [] + ValueError.__init__( + self, + "function arguments invalid. (%d missing, %d additional)" + % (len(self.missing), len(self.extra) + len(self.extra_positional)), + ) + + +class ImportStringError(ImportError): + """Provides information about a failed :func:`import_string` attempt.""" + + #: String in dotted notation that failed to be imported. + import_name = None + #: Wrapped exception. + exception = None + + def __init__(self, import_name, exception): + self.import_name = import_name + self.exception = exception + + msg = ( + "import_string() failed for %r. Possible reasons are:\n\n" + "- missing __init__.py in a package;\n" + "- package or module path not included in sys.path;\n" + "- duplicated package or module name taking precedence in " + "sys.path;\n" + "- missing module, class, function or variable;\n\n" + "Debugged import:\n\n%s\n\n" + "Original exception:\n\n%s: %s" + ) + + name = "" + tracked = [] + for part in import_name.replace(":", ".").split("."): + name += (name and ".") + part + imported = import_string(name, silent=True) + if imported: + tracked.append((name, getattr(imported, "__file__", None))) + else: + track = ["- %r found in %r." % (n, i) for n, i in tracked] + track.append("- %r not found." % name) + msg = msg % ( + import_name, + "\n".join(track), + exception.__class__.__name__, + str(exception), + ) + break + + ImportError.__init__(self, msg) + + def __repr__(self): + return "<%s(%r, %r)>" % ( + self.__class__.__name__, + self.import_name, + self.exception, + ) + + +# DEPRECATED +from .datastructures import CombinedMultiDict as _CombinedMultiDict +from .datastructures import EnvironHeaders as _EnvironHeaders +from .datastructures import Headers as _Headers +from .datastructures import MultiDict as _MultiDict +from .http import dump_cookie as _dump_cookie +from .http import parse_cookie as _parse_cookie + + +class MultiDict(_MultiDict): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.MultiDict' has moved to 'werkzeug" + ".datastructures.MultiDict' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(MultiDict, self).__init__(*args, **kwargs) + + +class CombinedMultiDict(_CombinedMultiDict): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.CombinedMultiDict' has moved to 'werkzeug" + ".datastructures.CombinedMultiDict' as of version 0.5. This" + " old import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(CombinedMultiDict, self).__init__(*args, **kwargs) + + +class Headers(_Headers): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.Headers' has moved to 'werkzeug" + ".datastructures.Headers' as of version 0.5. This old" + " import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(Headers, self).__init__(*args, **kwargs) + + +class EnvironHeaders(_EnvironHeaders): + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.utils.EnvironHeaders' has moved to 'werkzeug" + ".datastructures.EnvironHeaders' as of version 0.5. This" + " old import will be removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(EnvironHeaders, self).__init__(*args, **kwargs) + + +def parse_cookie(*args, **kwargs): + warnings.warn( + "'werkzeug.utils.parse_cookie' as moved to 'werkzeug.http" + ".parse_cookie' as of version 0.5. This old import will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return _parse_cookie(*args, **kwargs) + + +def dump_cookie(*args, **kwargs): + warnings.warn( + "'werkzeug.utils.dump_cookie' as moved to 'werkzeug.http" + ".dump_cookie' as of version 0.5. This old import will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + return _dump_cookie(*args, **kwargs) diff --git a/python/werkzeug/wrappers/__init__.py b/python/werkzeug/wrappers/__init__.py new file mode 100644 index 0000000..56c764a --- /dev/null +++ b/python/werkzeug/wrappers/__init__.py @@ -0,0 +1,36 @@ +""" +werkzeug.wrappers +~~~~~~~~~~~~~~~~~ + +The wrappers are simple request and response objects which you can +subclass to do whatever you want them to do. The request object contains +the information transmitted by the client (webbrowser) and the response +object contains all the information sent back to the browser. + +An important detail is that the request object is created with the WSGI +environ and will act as high-level proxy whereas the response object is an +actual WSGI application. + +Like everything else in Werkzeug these objects will work correctly with +unicode data. Incoming form data parsed by the response object will be +decoded into an unicode object if possible and if it makes sense. + +:copyright: 2007 Pallets +:license: BSD-3-Clause +""" +from .accept import AcceptMixin +from .auth import AuthorizationMixin +from .auth import WWWAuthenticateMixin +from .base_request import BaseRequest +from .base_response import BaseResponse +from .common_descriptors import CommonRequestDescriptorsMixin +from .common_descriptors import CommonResponseDescriptorsMixin +from .etag import ETagRequestMixin +from .etag import ETagResponseMixin +from .request import PlainRequest +from .request import Request +from .request import StreamOnlyMixin +from .response import Response +from .response import ResponseStream +from .response import ResponseStreamMixin +from .user_agent import UserAgentMixin diff --git a/python/werkzeug/wrappers/accept.py b/python/werkzeug/wrappers/accept.py new file mode 100644 index 0000000..d0620a0 --- /dev/null +++ b/python/werkzeug/wrappers/accept.py @@ -0,0 +1,50 @@ +from ..datastructures import CharsetAccept +from ..datastructures import LanguageAccept +from ..datastructures import MIMEAccept +from ..http import parse_accept_header +from ..utils import cached_property + + +class AcceptMixin(object): + """A mixin for classes with an :attr:`~BaseResponse.environ` attribute + to get all the HTTP accept headers as + :class:`~werkzeug.datastructures.Accept` objects (or subclasses + thereof). + """ + + @cached_property + def accept_mimetypes(self): + """List of mimetypes this client supports as + :class:`~werkzeug.datastructures.MIMEAccept` object. + """ + return parse_accept_header(self.environ.get("HTTP_ACCEPT"), MIMEAccept) + + @cached_property + def accept_charsets(self): + """List of charsets this client supports as + :class:`~werkzeug.datastructures.CharsetAccept` object. + """ + return parse_accept_header( + self.environ.get("HTTP_ACCEPT_CHARSET"), CharsetAccept + ) + + @cached_property + def accept_encodings(self): + """List of encodings this client accepts. Encodings in a HTTP term + are compression encodings such as gzip. For charsets have a look at + :attr:`accept_charset`. + """ + return parse_accept_header(self.environ.get("HTTP_ACCEPT_ENCODING")) + + @cached_property + def accept_languages(self): + """List of languages this client accepts as + :class:`~werkzeug.datastructures.LanguageAccept` object. + + .. versionchanged 0.5 + In previous versions this was a regular + :class:`~werkzeug.datastructures.Accept` object. + """ + return parse_accept_header( + self.environ.get("HTTP_ACCEPT_LANGUAGE"), LanguageAccept + ) diff --git a/python/werkzeug/wrappers/auth.py b/python/werkzeug/wrappers/auth.py new file mode 100644 index 0000000..714f755 --- /dev/null +++ b/python/werkzeug/wrappers/auth.py @@ -0,0 +1,33 @@ +from ..http import parse_authorization_header +from ..http import parse_www_authenticate_header +from ..utils import cached_property + + +class AuthorizationMixin(object): + """Adds an :attr:`authorization` property that represents the parsed + value of the `Authorization` header as + :class:`~werkzeug.datastructures.Authorization` object. + """ + + @cached_property + def authorization(self): + """The `Authorization` object in parsed form.""" + header = self.environ.get("HTTP_AUTHORIZATION") + return parse_authorization_header(header) + + +class WWWAuthenticateMixin(object): + """Adds a :attr:`www_authenticate` property to a response object.""" + + @property + def www_authenticate(self): + """The `WWW-Authenticate` header in a parsed form.""" + + def on_update(www_auth): + if not www_auth and "www-authenticate" in self.headers: + del self.headers["www-authenticate"] + elif www_auth: + self.headers["WWW-Authenticate"] = www_auth.to_header() + + header = self.headers.get("www-authenticate") + return parse_www_authenticate_header(header, on_update) diff --git a/python/werkzeug/wrappers/base_request.py b/python/werkzeug/wrappers/base_request.py new file mode 100644 index 0000000..41e8aad --- /dev/null +++ b/python/werkzeug/wrappers/base_request.py @@ -0,0 +1,693 @@ +import warnings +from functools import update_wrapper +from io import BytesIO + +from .._compat import to_native +from .._compat import to_unicode +from .._compat import wsgi_decoding_dance +from .._compat import wsgi_get_bytes +from ..datastructures import CombinedMultiDict +from ..datastructures import EnvironHeaders +from ..datastructures import ImmutableList +from ..datastructures import ImmutableMultiDict +from ..datastructures import ImmutableTypeConversionDict +from ..datastructures import iter_multi_items +from ..datastructures import MultiDict +from ..formparser import default_stream_factory +from ..formparser import FormDataParser +from ..http import parse_cookie +from ..http import parse_options_header +from ..urls import url_decode +from ..utils import cached_property +from ..utils import environ_property +from ..wsgi import get_content_length +from ..wsgi import get_current_url +from ..wsgi import get_host +from ..wsgi import get_input_stream + + +class BaseRequest(object): + """Very basic request object. This does not implement advanced stuff like + entity tag parsing or cache controls. The request object is created with + the WSGI environment as first argument and will add itself to the WSGI + environment as ``'werkzeug.request'`` unless it's created with + `populate_request` set to False. + + There are a couple of mixins available that add additional functionality + to the request object, there is also a class called `Request` which + subclasses `BaseRequest` and all the important mixins. + + It's a good idea to create a custom subclass of the :class:`BaseRequest` + and add missing functionality either via mixins or direct implementation. + Here an example for such subclasses:: + + from werkzeug.wrappers import BaseRequest, ETagRequestMixin + + class Request(BaseRequest, ETagRequestMixin): + pass + + Request objects are **read only**. As of 0.5 modifications are not + allowed in any place. Unlike the lower level parsing functions the + request object will use immutable objects everywhere possible. + + Per default the request object will assume all the text data is `utf-8` + encoded. Please refer to :doc:`the unicode chapter </unicode>` for more + details about customizing the behavior. + + Per default the request object will be added to the WSGI + environment as `werkzeug.request` to support the debugging system. + If you don't want that, set `populate_request` to `False`. + + If `shallow` is `True` the environment is initialized as shallow + object around the environ. Every operation that would modify the + environ in any way (such as consuming form data) raises an exception + unless the `shallow` attribute is explicitly set to `False`. This + is useful for middlewares where you don't want to consume the form + data by accident. A shallow request is not populated to the WSGI + environment. + + .. versionchanged:: 0.5 + read-only mode was enforced by using immutables classes for all + data. + """ + + #: the charset for the request, defaults to utf-8 + charset = "utf-8" + + #: the error handling procedure for errors, defaults to 'replace' + encoding_errors = "replace" + + #: the maximum content length. This is forwarded to the form data + #: parsing function (:func:`parse_form_data`). When set and the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: parsing fails because more than the specified value is transmitted + #: a :exc:`~werkzeug.exceptions.RequestEntityTooLarge` exception is raised. + #: + #: Have a look at :ref:`dealing-with-request-data` for more details. + #: + #: .. versionadded:: 0.5 + max_content_length = None + + #: the maximum form field size. This is forwarded to the form data + #: parsing function (:func:`parse_form_data`). When set and the + #: :attr:`form` or :attr:`files` attribute is accessed and the + #: data in memory for post data is longer than the specified value a + #: :exc:`~werkzeug.exceptions.RequestEntityTooLarge` exception is raised. + #: + #: Have a look at :ref:`dealing-with-request-data` for more details. + #: + #: .. versionadded:: 0.5 + max_form_memory_size = None + + #: the class to use for `args` and `form`. The default is an + #: :class:`~werkzeug.datastructures.ImmutableMultiDict` which supports + #: multiple values per key. alternatively it makes sense to use an + #: :class:`~werkzeug.datastructures.ImmutableOrderedMultiDict` which + #: preserves order or a :class:`~werkzeug.datastructures.ImmutableDict` + #: which is the fastest but only remembers the last key. It is also + #: possible to use mutable structures, but this is not recommended. + #: + #: .. versionadded:: 0.6 + parameter_storage_class = ImmutableMultiDict + + #: the type to be used for list values from the incoming WSGI environment. + #: By default an :class:`~werkzeug.datastructures.ImmutableList` is used + #: (for example for :attr:`access_list`). + #: + #: .. versionadded:: 0.6 + list_storage_class = ImmutableList + + #: the type to be used for dict values from the incoming WSGI environment. + #: By default an + #: :class:`~werkzeug.datastructures.ImmutableTypeConversionDict` is used + #: (for example for :attr:`cookies`). + #: + #: .. versionadded:: 0.6 + dict_storage_class = ImmutableTypeConversionDict + + #: The form data parser that shoud be used. Can be replaced to customize + #: the form date parsing. + form_data_parser_class = FormDataParser + + #: Optionally a list of hosts that is trusted by this request. By default + #: all hosts are trusted which means that whatever the client sends the + #: host is will be accepted. + #: + #: Because `Host` and `X-Forwarded-Host` headers can be set to any value by + #: a malicious client, it is recommended to either set this property or + #: implement similar validation in the proxy (if application is being run + #: behind one). + #: + #: .. versionadded:: 0.9 + trusted_hosts = None + + #: Indicates whether the data descriptor should be allowed to read and + #: buffer up the input stream. By default it's enabled. + #: + #: .. versionadded:: 0.9 + disable_data_descriptor = False + + def __init__(self, environ, populate_request=True, shallow=False): + self.environ = environ + if populate_request and not shallow: + self.environ["werkzeug.request"] = self + self.shallow = shallow + + def __repr__(self): + # make sure the __repr__ even works if the request was created + # from an invalid WSGI environment. If we display the request + # in a debug session we don't want the repr to blow up. + args = [] + try: + args.append("'%s'" % to_native(self.url, self.url_charset)) + args.append("[%s]" % self.method) + except Exception: + args.append("(invalid WSGI environ)") + + return "<%s %s>" % (self.__class__.__name__, " ".join(args)) + + @property + def url_charset(self): + """The charset that is assumed for URLs. Defaults to the value + of :attr:`charset`. + + .. versionadded:: 0.6 + """ + return self.charset + + @classmethod + def from_values(cls, *args, **kwargs): + """Create a new request object based on the values provided. If + environ is given missing values are filled from there. This method is + useful for small scripts when you need to simulate a request from an URL. + Do not use this method for unittesting, there is a full featured client + object (:class:`Client`) that allows to create multipart requests, + support for cookies etc. + + This accepts the same options as the + :class:`~werkzeug.test.EnvironBuilder`. + + .. versionchanged:: 0.5 + This method now accepts the same arguments as + :class:`~werkzeug.test.EnvironBuilder`. Because of this the + `environ` parameter is now called `environ_overrides`. + + :return: request object + """ + from ..test import EnvironBuilder + + charset = kwargs.pop("charset", cls.charset) + kwargs["charset"] = charset + builder = EnvironBuilder(*args, **kwargs) + try: + return builder.get_request(cls) + finally: + builder.close() + + @classmethod + def application(cls, f): + """Decorate a function as responder that accepts the request as first + argument. This works like the :func:`responder` decorator but the + function is passed the request object as first argument and the + request object will be closed automatically:: + + @Request.application + def my_wsgi_app(request): + return Response('Hello World!') + + As of Werkzeug 0.14 HTTP exceptions are automatically caught and + converted to responses instead of failing. + + :param f: the WSGI callable to decorate + :return: a new WSGI callable + """ + #: return a callable that wraps the -2nd argument with the request + #: and calls the function with all the arguments up to that one and + #: the request. The return value is then called with the latest + #: two arguments. This makes it possible to use this decorator for + #: both methods and standalone WSGI functions. + from ..exceptions import HTTPException + + def application(*args): + request = cls(args[-2]) + with request: + try: + resp = f(*args[:-2] + (request,)) + except HTTPException as e: + resp = e.get_response(args[-2]) + return resp(*args[-2:]) + + return update_wrapper(application, f) + + def _get_file_stream( + self, total_content_length, content_type, filename=None, content_length=None + ): + """Called to get a stream for the file upload. + + This must provide a file-like class with `read()`, `readline()` + and `seek()` methods that is both writeable and readable. + + The default implementation returns a temporary file if the total + content length is higher than 500KB. Because many browsers do not + provide a content length for the files only the total content + length matters. + + :param total_content_length: the total content length of all the + data in the request combined. This value + is guaranteed to be there. + :param content_type: the mimetype of the uploaded file. + :param filename: the filename of the uploaded file. May be `None`. + :param content_length: the length of this file. This value is usually + not provided because webbrowsers do not provide + this value. + """ + return default_stream_factory( + total_content_length=total_content_length, + filename=filename, + content_type=content_type, + content_length=content_length, + ) + + @property + def want_form_data_parsed(self): + """Returns True if the request method carries content. As of + Werkzeug 0.9 this will be the case if a content type is transmitted. + + .. versionadded:: 0.8 + """ + return bool(self.environ.get("CONTENT_TYPE")) + + def make_form_data_parser(self): + """Creates the form data parser. Instantiates the + :attr:`form_data_parser_class` with some parameters. + + .. versionadded:: 0.8 + """ + return self.form_data_parser_class( + self._get_file_stream, + self.charset, + self.encoding_errors, + self.max_form_memory_size, + self.max_content_length, + self.parameter_storage_class, + ) + + def _load_form_data(self): + """Method used internally to retrieve submitted data. After calling + this sets `form` and `files` on the request object to multi dicts + filled with the incoming form data. As a matter of fact the input + stream will be empty afterwards. You can also call this method to + force the parsing of the form data. + + .. versionadded:: 0.8 + """ + # abort early if we have already consumed the stream + if "form" in self.__dict__: + return + + _assert_not_shallow(self) + + if self.want_form_data_parsed: + content_type = self.environ.get("CONTENT_TYPE", "") + content_length = get_content_length(self.environ) + mimetype, options = parse_options_header(content_type) + parser = self.make_form_data_parser() + data = parser.parse( + self._get_stream_for_parsing(), mimetype, content_length, options + ) + else: + data = ( + self.stream, + self.parameter_storage_class(), + self.parameter_storage_class(), + ) + + # inject the values into the instance dict so that we bypass + # our cached_property non-data descriptor. + d = self.__dict__ + d["stream"], d["form"], d["files"] = data + + def _get_stream_for_parsing(self): + """This is the same as accessing :attr:`stream` with the difference + that if it finds cached data from calling :meth:`get_data` first it + will create a new stream out of the cached data. + + .. versionadded:: 0.9.3 + """ + cached_data = getattr(self, "_cached_data", None) + if cached_data is not None: + return BytesIO(cached_data) + return self.stream + + def close(self): + """Closes associated resources of this request object. This + closes all file handles explicitly. You can also use the request + object in a with statement which will automatically close it. + + .. versionadded:: 0.9 + """ + files = self.__dict__.get("files") + for _key, value in iter_multi_items(files or ()): + value.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.close() + + @cached_property + def stream(self): + """ + If the incoming form data was not encoded with a known mimetype + the data is stored unmodified in this stream for consumption. Most + of the time it is a better idea to use :attr:`data` which will give + you that data as a string. The stream only returns the data once. + + Unlike :attr:`input_stream` this stream is properly guarded that you + can't accidentally read past the length of the input. Werkzeug will + internally always refer to this stream to read data which makes it + possible to wrap this object with a stream that does filtering. + + .. versionchanged:: 0.9 + This stream is now always available but might be consumed by the + form parser later on. Previously the stream was only set if no + parsing happened. + """ + _assert_not_shallow(self) + return get_input_stream(self.environ) + + input_stream = environ_property( + "wsgi.input", + """The WSGI input stream. + + In general it's a bad idea to use this one because you can + easily read past the boundary. Use the :attr:`stream` + instead.""", + ) + + @cached_property + def args(self): + """The parsed URL parameters (the part in the URL after the question + mark). + + By default an + :class:`~werkzeug.datastructures.ImmutableMultiDict` + is returned from this function. This can be changed by setting + :attr:`parameter_storage_class` to a different type. This might + be necessary if the order of the form data is important. + """ + return url_decode( + wsgi_get_bytes(self.environ.get("QUERY_STRING", "")), + self.url_charset, + errors=self.encoding_errors, + cls=self.parameter_storage_class, + ) + + @cached_property + def data(self): + """ + Contains the incoming request data as string in case it came with + a mimetype Werkzeug does not handle. + """ + + if self.disable_data_descriptor: + raise AttributeError("data descriptor is disabled") + # XXX: this should eventually be deprecated. + + # We trigger form data parsing first which means that the descriptor + # will not cache the data that would otherwise be .form or .files + # data. This restores the behavior that was there in Werkzeug + # before 0.9. New code should use :meth:`get_data` explicitly as + # this will make behavior explicit. + return self.get_data(parse_form_data=True) + + def get_data(self, cache=True, as_text=False, parse_form_data=False): + """This reads the buffered incoming data from the client into one + bytestring. By default this is cached but that behavior can be + changed by setting `cache` to `False`. + + Usually it's a bad idea to call this method without checking the + content length first as a client could send dozens of megabytes or more + to cause memory problems on the server. + + Note that if the form data was already parsed this method will not + return anything as form data parsing does not cache the data like + this method does. To implicitly invoke form data parsing function + set `parse_form_data` to `True`. When this is done the return value + of this method will be an empty string if the form parser handles + the data. This generally is not necessary as if the whole data is + cached (which is the default) the form parser will used the cached + data to parse the form data. Please be generally aware of checking + the content length first in any case before calling this method + to avoid exhausting server memory. + + If `as_text` is set to `True` the return value will be a decoded + unicode string. + + .. versionadded:: 0.9 + """ + rv = getattr(self, "_cached_data", None) + if rv is None: + if parse_form_data: + self._load_form_data() + rv = self.stream.read() + if cache: + self._cached_data = rv + if as_text: + rv = rv.decode(self.charset, self.encoding_errors) + return rv + + @cached_property + def form(self): + """The form parameters. By default an + :class:`~werkzeug.datastructures.ImmutableMultiDict` + is returned from this function. This can be changed by setting + :attr:`parameter_storage_class` to a different type. This might + be necessary if the order of the form data is important. + + Please keep in mind that file uploads will not end up here, but instead + in the :attr:`files` attribute. + + .. versionchanged:: 0.9 + + Previous to Werkzeug 0.9 this would only contain form data for POST + and PUT requests. + """ + self._load_form_data() + return self.form + + @cached_property + def values(self): + """A :class:`werkzeug.datastructures.CombinedMultiDict` that combines + :attr:`args` and :attr:`form`.""" + args = [] + for d in self.args, self.form: + if not isinstance(d, MultiDict): + d = MultiDict(d) + args.append(d) + return CombinedMultiDict(args) + + @cached_property + def files(self): + """:class:`~werkzeug.datastructures.MultiDict` object containing + all uploaded files. Each key in :attr:`files` is the name from the + ``<input type="file" name="">``. Each value in :attr:`files` is a + Werkzeug :class:`~werkzeug.datastructures.FileStorage` object. + + It basically behaves like a standard file object you know from Python, + with the difference that it also has a + :meth:`~werkzeug.datastructures.FileStorage.save` function that can + store the file on the filesystem. + + Note that :attr:`files` will only contain data if the request method was + POST, PUT or PATCH and the ``<form>`` that posted to the request had + ``enctype="multipart/form-data"``. It will be empty otherwise. + + See the :class:`~werkzeug.datastructures.MultiDict` / + :class:`~werkzeug.datastructures.FileStorage` documentation for + more details about the used data structure. + """ + self._load_form_data() + return self.files + + @cached_property + def cookies(self): + """A :class:`dict` with the contents of all cookies transmitted with + the request.""" + return parse_cookie( + self.environ, + self.charset, + self.encoding_errors, + cls=self.dict_storage_class, + ) + + @cached_property + def headers(self): + """The headers from the WSGI environ as immutable + :class:`~werkzeug.datastructures.EnvironHeaders`. + """ + return EnvironHeaders(self.environ) + + @cached_property + def path(self): + """Requested path as unicode. This works a bit like the regular path + info in the WSGI environment but will always include a leading slash, + even if the URL root is accessed. + """ + raw_path = wsgi_decoding_dance( + self.environ.get("PATH_INFO") or "", self.charset, self.encoding_errors + ) + return "/" + raw_path.lstrip("/") + + @cached_property + def full_path(self): + """Requested path as unicode, including the query string.""" + return self.path + u"?" + to_unicode(self.query_string, self.url_charset) + + @cached_property + def script_root(self): + """The root path of the script without the trailing slash.""" + raw_path = wsgi_decoding_dance( + self.environ.get("SCRIPT_NAME") or "", self.charset, self.encoding_errors + ) + return raw_path.rstrip("/") + + @cached_property + def url(self): + """The reconstructed current URL as IRI. + See also: :attr:`trusted_hosts`. + """ + return get_current_url(self.environ, trusted_hosts=self.trusted_hosts) + + @cached_property + def base_url(self): + """Like :attr:`url` but without the querystring + See also: :attr:`trusted_hosts`. + """ + return get_current_url( + self.environ, strip_querystring=True, trusted_hosts=self.trusted_hosts + ) + + @cached_property + def url_root(self): + """The full URL root (with hostname), this is the application + root as IRI. + See also: :attr:`trusted_hosts`. + """ + return get_current_url(self.environ, True, trusted_hosts=self.trusted_hosts) + + @cached_property + def host_url(self): + """Just the host with scheme as IRI. + See also: :attr:`trusted_hosts`. + """ + return get_current_url( + self.environ, host_only=True, trusted_hosts=self.trusted_hosts + ) + + @cached_property + def host(self): + """Just the host including the port if available. + See also: :attr:`trusted_hosts`. + """ + return get_host(self.environ, trusted_hosts=self.trusted_hosts) + + query_string = environ_property( + "QUERY_STRING", + "", + read_only=True, + load_func=wsgi_get_bytes, + doc="The URL parameters as raw bytestring.", + ) + method = environ_property( + "REQUEST_METHOD", + "GET", + read_only=True, + load_func=lambda x: x.upper(), + doc="The request method. (For example ``'GET'`` or ``'POST'``).", + ) + + @cached_property + def access_route(self): + """If a forwarded header exists this is a list of all ip addresses + from the client ip to the last proxy server. + """ + if "HTTP_X_FORWARDED_FOR" in self.environ: + addr = self.environ["HTTP_X_FORWARDED_FOR"].split(",") + return self.list_storage_class([x.strip() for x in addr]) + elif "REMOTE_ADDR" in self.environ: + return self.list_storage_class([self.environ["REMOTE_ADDR"]]) + return self.list_storage_class() + + @property + def remote_addr(self): + """The remote address of the client.""" + return self.environ.get("REMOTE_ADDR") + + remote_user = environ_property( + "REMOTE_USER", + doc="""If the server supports user authentication, and the + script is protected, this attribute contains the username the + user has authenticated as.""", + ) + + scheme = environ_property( + "wsgi.url_scheme", + doc=""" + URL scheme (http or https). + + .. versionadded:: 0.7""", + ) + + @property + def is_xhr(self): + """True if the request was triggered via a JavaScript XMLHttpRequest. + This only works with libraries that support the ``X-Requested-With`` + header and set it to "XMLHttpRequest". Libraries that do that are + prototype, jQuery and Mochikit and probably some more. + + .. deprecated:: 0.13 + ``X-Requested-With`` is not standard and is unreliable. You + may be able to use :attr:`AcceptMixin.accept_mimetypes` + instead. + """ + warnings.warn( + "'Request.is_xhr' is deprecated as of version 0.13 and will" + " be removed in version 1.0. The 'X-Requested-With' header" + " is not standard and is unreliable. You may be able to use" + " 'accept_mimetypes' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.environ.get("HTTP_X_REQUESTED_WITH", "").lower() == "xmlhttprequest" + + is_secure = property( + lambda self: self.environ["wsgi.url_scheme"] == "https", + doc="`True` if the request is secure.", + ) + is_multithread = environ_property( + "wsgi.multithread", + doc="""boolean that is `True` if the application is served by a + multithreaded WSGI server.""", + ) + is_multiprocess = environ_property( + "wsgi.multiprocess", + doc="""boolean that is `True` if the application is served by a + WSGI server that spawns multiple processes.""", + ) + is_run_once = environ_property( + "wsgi.run_once", + doc="""boolean that is `True` if the application will be + executed only once in a process lifetime. This is the case for + CGI for example, but it's not guaranteed that the execution only + happens one time.""", + ) + + +def _assert_not_shallow(request): + if request.shallow: + raise RuntimeError( + "A shallow request tried to consume form data. If you really" + " want to do that, set `shallow` to False." + ) diff --git a/python/werkzeug/wrappers/base_response.py b/python/werkzeug/wrappers/base_response.py new file mode 100644 index 0000000..d944a7d --- /dev/null +++ b/python/werkzeug/wrappers/base_response.py @@ -0,0 +1,702 @@ +import warnings + +from .._compat import integer_types +from .._compat import string_types +from .._compat import text_type +from .._compat import to_bytes +from .._compat import to_native +from ..datastructures import Headers +from ..http import dump_cookie +from ..http import HTTP_STATUS_CODES +from ..http import remove_entity_headers +from ..urls import iri_to_uri +from ..urls import url_join +from ..utils import get_content_type +from ..wsgi import ClosingIterator +from ..wsgi import get_current_url + + +def _run_wsgi_app(*args): + """This function replaces itself to ensure that the test module is not + imported unless required. DO NOT USE! + """ + global _run_wsgi_app + from ..test import run_wsgi_app as _run_wsgi_app + + return _run_wsgi_app(*args) + + +def _warn_if_string(iterable): + """Helper for the response objects to check if the iterable returned + to the WSGI server is not a string. + """ + if isinstance(iterable, string_types): + warnings.warn( + "Response iterable was set to a string. This will appear to" + " work but means that the server will send the data to the" + " client one character at a time. This is almost never" + " intended behavior, use 'response.data' to assign strings" + " to the response object.", + stacklevel=2, + ) + + +def _iter_encoded(iterable, charset): + for item in iterable: + if isinstance(item, text_type): + yield item.encode(charset) + else: + yield item + + +def _clean_accept_ranges(accept_ranges): + if accept_ranges is True: + return "bytes" + elif accept_ranges is False: + return "none" + elif isinstance(accept_ranges, text_type): + return to_native(accept_ranges) + raise ValueError("Invalid accept_ranges value") + + +class BaseResponse(object): + """Base response class. The most important fact about a response object + is that it's a regular WSGI application. It's initialized with a couple + of response parameters (headers, body, status code etc.) and will start a + valid WSGI response when called with the environ and start response + callable. + + Because it's a WSGI application itself processing usually ends before the + actual response is sent to the server. This helps debugging systems + because they can catch all the exceptions before responses are started. + + Here a small example WSGI application that takes advantage of the + response objects:: + + from werkzeug.wrappers import BaseResponse as Response + + def index(): + return Response('Index page') + + def application(environ, start_response): + path = environ.get('PATH_INFO') or '/' + if path == '/': + response = index() + else: + response = Response('Not Found', status=404) + return response(environ, start_response) + + Like :class:`BaseRequest` which object is lacking a lot of functionality + implemented in mixins. This gives you a better control about the actual + API of your response objects, so you can create subclasses and add custom + functionality. A full featured response object is available as + :class:`Response` which implements a couple of useful mixins. + + To enforce a new type of already existing responses you can use the + :meth:`force_type` method. This is useful if you're working with different + subclasses of response objects and you want to post process them with a + known interface. + + Per default the response object will assume all the text data is `utf-8` + encoded. Please refer to :doc:`the unicode chapter </unicode>` for more + details about customizing the behavior. + + Response can be any kind of iterable or string. If it's a string it's + considered being an iterable with one item which is the string passed. + Headers can be a list of tuples or a + :class:`~werkzeug.datastructures.Headers` object. + + Special note for `mimetype` and `content_type`: For most mime types + `mimetype` and `content_type` work the same, the difference affects + only 'text' mimetypes. If the mimetype passed with `mimetype` is a + mimetype starting with `text/`, the charset parameter of the response + object is appended to it. In contrast the `content_type` parameter is + always added as header unmodified. + + .. versionchanged:: 0.5 + the `direct_passthrough` parameter was added. + + :param response: a string or response iterable. + :param status: a string with a status or an integer with the status code. + :param headers: a list of headers or a + :class:`~werkzeug.datastructures.Headers` object. + :param mimetype: the mimetype for the response. See notice above. + :param content_type: the content type for the response. See notice above. + :param direct_passthrough: if set to `True` :meth:`iter_encoded` is not + called before iteration which makes it + possible to pass special iterators through + unchanged (see :func:`wrap_file` for more + details.) + """ + + #: the charset of the response. + charset = "utf-8" + + #: the default status if none is provided. + default_status = 200 + + #: the default mimetype if none is provided. + default_mimetype = "text/plain" + + #: if set to `False` accessing properties on the response object will + #: not try to consume the response iterator and convert it into a list. + #: + #: .. versionadded:: 0.6.2 + #: + #: That attribute was previously called `implicit_seqence_conversion`. + #: (Notice the typo). If you did use this feature, you have to adapt + #: your code to the name change. + implicit_sequence_conversion = True + + #: Should this response object correct the location header to be RFC + #: conformant? This is true by default. + #: + #: .. versionadded:: 0.8 + autocorrect_location_header = True + + #: Should this response object automatically set the content-length + #: header if possible? This is true by default. + #: + #: .. versionadded:: 0.8 + automatically_set_content_length = True + + #: Warn if a cookie header exceeds this size. The default, 4093, should be + #: safely `supported by most browsers <cookie_>`_. A cookie larger than + #: this size will still be sent, but it may be ignored or handled + #: incorrectly by some browsers. Set to 0 to disable this check. + #: + #: .. versionadded:: 0.13 + #: + #: .. _`cookie`: http://browsercookielimits.squawky.net/ + max_cookie_size = 4093 + + def __init__( + self, + response=None, + status=None, + headers=None, + mimetype=None, + content_type=None, + direct_passthrough=False, + ): + if isinstance(headers, Headers): + self.headers = headers + elif not headers: + self.headers = Headers() + else: + self.headers = Headers(headers) + + if content_type is None: + if mimetype is None and "content-type" not in self.headers: + mimetype = self.default_mimetype + if mimetype is not None: + mimetype = get_content_type(mimetype, self.charset) + content_type = mimetype + if content_type is not None: + self.headers["Content-Type"] = content_type + if status is None: + status = self.default_status + if isinstance(status, integer_types): + self.status_code = status + else: + self.status = status + + self.direct_passthrough = direct_passthrough + self._on_close = [] + + # we set the response after the headers so that if a class changes + # the charset attribute, the data is set in the correct charset. + if response is None: + self.response = [] + elif isinstance(response, (text_type, bytes, bytearray)): + self.set_data(response) + else: + self.response = response + + def call_on_close(self, func): + """Adds a function to the internal list of functions that should + be called as part of closing down the response. Since 0.7 this + function also returns the function that was passed so that this + can be used as a decorator. + + .. versionadded:: 0.6 + """ + self._on_close.append(func) + return func + + def __repr__(self): + if self.is_sequence: + body_info = "%d bytes" % sum(map(len, self.iter_encoded())) + else: + body_info = "streamed" if self.is_streamed else "likely-streamed" + return "<%s %s [%s]>" % (self.__class__.__name__, body_info, self.status) + + @classmethod + def force_type(cls, response, environ=None): + """Enforce that the WSGI response is a response object of the current + type. Werkzeug will use the :class:`BaseResponse` internally in many + situations like the exceptions. If you call :meth:`get_response` on an + exception you will get back a regular :class:`BaseResponse` object, even + if you are using a custom subclass. + + This method can enforce a given response type, and it will also + convert arbitrary WSGI callables into response objects if an environ + is provided:: + + # convert a Werkzeug response object into an instance of the + # MyResponseClass subclass. + response = MyResponseClass.force_type(response) + + # convert any WSGI application into a response object + response = MyResponseClass.force_type(response, environ) + + This is especially useful if you want to post-process responses in + the main dispatcher and use functionality provided by your subclass. + + Keep in mind that this will modify response objects in place if + possible! + + :param response: a response object or wsgi application. + :param environ: a WSGI environment object. + :return: a response object. + """ + if not isinstance(response, BaseResponse): + if environ is None: + raise TypeError( + "cannot convert WSGI application into response" + " objects without an environ" + ) + response = BaseResponse(*_run_wsgi_app(response, environ)) + response.__class__ = cls + return response + + @classmethod + def from_app(cls, app, environ, buffered=False): + """Create a new response object from an application output. This + works best if you pass it an application that returns a generator all + the time. Sometimes applications may use the `write()` callable + returned by the `start_response` function. This tries to resolve such + edge cases automatically. But if you don't get the expected output + you should set `buffered` to `True` which enforces buffering. + + :param app: the WSGI application to execute. + :param environ: the WSGI environment to execute against. + :param buffered: set to `True` to enforce buffering. + :return: a response object. + """ + return cls(*_run_wsgi_app(app, environ, buffered)) + + def _get_status_code(self): + return self._status_code + + def _set_status_code(self, code): + self._status_code = code + try: + self._status = "%d %s" % (code, HTTP_STATUS_CODES[code].upper()) + except KeyError: + self._status = "%d UNKNOWN" % code + + status_code = property( + _get_status_code, _set_status_code, doc="The HTTP Status code as number" + ) + del _get_status_code, _set_status_code + + def _get_status(self): + return self._status + + def _set_status(self, value): + try: + self._status = to_native(value) + except AttributeError: + raise TypeError("Invalid status argument") + + try: + self._status_code = int(self._status.split(None, 1)[0]) + except ValueError: + self._status_code = 0 + self._status = "0 %s" % self._status + except IndexError: + raise ValueError("Empty status argument") + + status = property(_get_status, _set_status, doc="The HTTP Status code") + del _get_status, _set_status + + def get_data(self, as_text=False): + """The string representation of the request body. Whenever you call + this property the request iterable is encoded and flattened. This + can lead to unwanted behavior if you stream big data. + + This behavior can be disabled by setting + :attr:`implicit_sequence_conversion` to `False`. + + If `as_text` is set to `True` the return value will be a decoded + unicode string. + + .. versionadded:: 0.9 + """ + self._ensure_sequence() + rv = b"".join(self.iter_encoded()) + if as_text: + rv = rv.decode(self.charset) + return rv + + def set_data(self, value): + """Sets a new string as response. The value set must either by a + unicode or bytestring. If a unicode string is set it's encoded + automatically to the charset of the response (utf-8 by default). + + .. versionadded:: 0.9 + """ + # if an unicode string is set, it's encoded directly so that we + # can set the content length + if isinstance(value, text_type): + value = value.encode(self.charset) + else: + value = bytes(value) + self.response = [value] + if self.automatically_set_content_length: + self.headers["Content-Length"] = str(len(value)) + + data = property( + get_data, + set_data, + doc="A descriptor that calls :meth:`get_data` and :meth:`set_data`.", + ) + + def calculate_content_length(self): + """Returns the content length if available or `None` otherwise.""" + try: + self._ensure_sequence() + except RuntimeError: + return None + return sum(len(x) for x in self.iter_encoded()) + + def _ensure_sequence(self, mutable=False): + """This method can be called by methods that need a sequence. If + `mutable` is true, it will also ensure that the response sequence + is a standard Python list. + + .. versionadded:: 0.6 + """ + if self.is_sequence: + # if we need a mutable object, we ensure it's a list. + if mutable and not isinstance(self.response, list): + self.response = list(self.response) + return + if self.direct_passthrough: + raise RuntimeError( + "Attempted implicit sequence conversion but the" + " response object is in direct passthrough mode." + ) + if not self.implicit_sequence_conversion: + raise RuntimeError( + "The response object required the iterable to be a" + " sequence, but the implicit conversion was disabled." + " Call make_sequence() yourself." + ) + self.make_sequence() + + def make_sequence(self): + """Converts the response iterator in a list. By default this happens + automatically if required. If `implicit_sequence_conversion` is + disabled, this method is not automatically called and some properties + might raise exceptions. This also encodes all the items. + + .. versionadded:: 0.6 + """ + if not self.is_sequence: + # if we consume an iterable we have to ensure that the close + # method of the iterable is called if available when we tear + # down the response + close = getattr(self.response, "close", None) + self.response = list(self.iter_encoded()) + if close is not None: + self.call_on_close(close) + + def iter_encoded(self): + """Iter the response encoded with the encoding of the response. + If the response object is invoked as WSGI application the return + value of this method is used as application iterator unless + :attr:`direct_passthrough` was activated. + """ + if __debug__: + _warn_if_string(self.response) + # Encode in a separate function so that self.response is fetched + # early. This allows us to wrap the response with the return + # value from get_app_iter or iter_encoded. + return _iter_encoded(self.response, self.charset) + + def set_cookie( + self, + key, + value="", + max_age=None, + expires=None, + path="/", + domain=None, + secure=False, + httponly=False, + samesite=None, + ): + """Sets a cookie. The parameters are the same as in the cookie `Morsel` + object in the Python standard library but it accepts unicode data, too. + + A warning is raised if the size of the cookie header exceeds + :attr:`max_cookie_size`, but the header will still be set. + + :param key: the key (name) of the cookie to be set. + :param value: the value of the cookie. + :param max_age: should be a number of seconds, or `None` (default) if + the cookie should last only as long as the client's + browser session. + :param expires: should be a `datetime` object or UNIX timestamp. + :param path: limits the cookie to a given path, per default it will + span the whole domain. + :param domain: if you want to set a cross-domain cookie. For example, + ``domain=".example.com"`` will set a cookie that is + readable by the domain ``www.example.com``, + ``foo.example.com`` etc. Otherwise, a cookie will only + be readable by the domain that set it. + :param secure: If `True`, the cookie will only be available via HTTPS + :param httponly: disallow JavaScript to access the cookie. This is an + extension to the cookie standard and probably not + supported by all browsers. + :param samesite: Limits the scope of the cookie such that it will only + be attached to requests if those requests are + "same-site". + """ + self.headers.add( + "Set-Cookie", + dump_cookie( + key, + value=value, + max_age=max_age, + expires=expires, + path=path, + domain=domain, + secure=secure, + httponly=httponly, + charset=self.charset, + max_size=self.max_cookie_size, + samesite=samesite, + ), + ) + + def delete_cookie(self, key, path="/", domain=None): + """Delete a cookie. Fails silently if key doesn't exist. + + :param key: the key (name) of the cookie to be deleted. + :param path: if the cookie that should be deleted was limited to a + path, the path has to be defined here. + :param domain: if the cookie that should be deleted was limited to a + domain, that domain has to be defined here. + """ + self.set_cookie(key, expires=0, max_age=0, path=path, domain=domain) + + @property + def is_streamed(self): + """If the response is streamed (the response is not an iterable with + a length information) this property is `True`. In this case streamed + means that there is no information about the number of iterations. + This is usually `True` if a generator is passed to the response object. + + This is useful for checking before applying some sort of post + filtering that should not take place for streamed responses. + """ + try: + len(self.response) + except (TypeError, AttributeError): + return True + return False + + @property + def is_sequence(self): + """If the iterator is buffered, this property will be `True`. A + response object will consider an iterator to be buffered if the + response attribute is a list or tuple. + + .. versionadded:: 0.6 + """ + return isinstance(self.response, (tuple, list)) + + def close(self): + """Close the wrapped response if possible. You can also use the object + in a with statement which will automatically close it. + + .. versionadded:: 0.9 + Can now be used in a with statement. + """ + if hasattr(self.response, "close"): + self.response.close() + for func in self._on_close: + func() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, tb): + self.close() + + def freeze(self): + """Call this method if you want to make your response object ready for + being pickled. This buffers the generator if there is one. It will + also set the `Content-Length` header to the length of the body. + + .. versionchanged:: 0.6 + The `Content-Length` header is now set. + """ + # we explicitly set the length to a list of the *encoded* response + # iterator. Even if the implicit sequence conversion is disabled. + self.response = list(self.iter_encoded()) + self.headers["Content-Length"] = str(sum(map(len, self.response))) + + def get_wsgi_headers(self, environ): + """This is automatically called right before the response is started + and returns headers modified for the given environment. It returns a + copy of the headers from the response with some modifications applied + if necessary. + + For example the location header (if present) is joined with the root + URL of the environment. Also the content length is automatically set + to zero here for certain status codes. + + .. versionchanged:: 0.6 + Previously that function was called `fix_headers` and modified + the response object in place. Also since 0.6, IRIs in location + and content-location headers are handled properly. + + Also starting with 0.6, Werkzeug will attempt to set the content + length if it is able to figure it out on its own. This is the + case if all the strings in the response iterable are already + encoded and the iterable is buffered. + + :param environ: the WSGI environment of the request. + :return: returns a new :class:`~werkzeug.datastructures.Headers` + object. + """ + headers = Headers(self.headers) + location = None + content_location = None + content_length = None + status = self.status_code + + # iterate over the headers to find all values in one go. Because + # get_wsgi_headers is used each response that gives us a tiny + # speedup. + for key, value in headers: + ikey = key.lower() + if ikey == u"location": + location = value + elif ikey == u"content-location": + content_location = value + elif ikey == u"content-length": + content_length = value + + # make sure the location header is an absolute URL + if location is not None: + old_location = location + if isinstance(location, text_type): + # Safe conversion is necessary here as we might redirect + # to a broken URI scheme (for instance itms-services). + location = iri_to_uri(location, safe_conversion=True) + + if self.autocorrect_location_header: + current_url = get_current_url(environ, strip_querystring=True) + if isinstance(current_url, text_type): + current_url = iri_to_uri(current_url) + location = url_join(current_url, location) + if location != old_location: + headers["Location"] = location + + # make sure the content location is a URL + if content_location is not None and isinstance(content_location, text_type): + headers["Content-Location"] = iri_to_uri(content_location) + + if 100 <= status < 200 or status == 204: + # Per section 3.3.2 of RFC 7230, "a server MUST NOT send a + # Content-Length header field in any response with a status + # code of 1xx (Informational) or 204 (No Content)." + headers.remove("Content-Length") + elif status == 304: + remove_entity_headers(headers) + + # if we can determine the content length automatically, we + # should try to do that. But only if this does not involve + # flattening the iterator or encoding of unicode strings in + # the response. We however should not do that if we have a 304 + # response. + if ( + self.automatically_set_content_length + and self.is_sequence + and content_length is None + and status not in (204, 304) + and not (100 <= status < 200) + ): + try: + content_length = sum(len(to_bytes(x, "ascii")) for x in self.response) + except UnicodeError: + # aha, something non-bytestringy in there, too bad, we + # can't safely figure out the length of the response. + pass + else: + headers["Content-Length"] = str(content_length) + + return headers + + def get_app_iter(self, environ): + """Returns the application iterator for the given environ. Depending + on the request method and the current status code the return value + might be an empty response rather than the one from the response. + + If the request method is `HEAD` or the status code is in a range + where the HTTP specification requires an empty response, an empty + iterable is returned. + + .. versionadded:: 0.6 + + :param environ: the WSGI environment of the request. + :return: a response iterable. + """ + status = self.status_code + if ( + environ["REQUEST_METHOD"] == "HEAD" + or 100 <= status < 200 + or status in (204, 304) + ): + iterable = () + elif self.direct_passthrough: + if __debug__: + _warn_if_string(self.response) + return self.response + else: + iterable = self.iter_encoded() + return ClosingIterator(iterable, self.close) + + def get_wsgi_response(self, environ): + """Returns the final WSGI response as tuple. The first item in + the tuple is the application iterator, the second the status and + the third the list of headers. The response returned is created + specially for the given environment. For example if the request + method in the WSGI environment is ``'HEAD'`` the response will + be empty and only the headers and status code will be present. + + .. versionadded:: 0.6 + + :param environ: the WSGI environment of the request. + :return: an ``(app_iter, status, headers)`` tuple. + """ + headers = self.get_wsgi_headers(environ) + app_iter = self.get_app_iter(environ) + return app_iter, self.status, headers.to_wsgi_list() + + def __call__(self, environ, start_response): + """Process this response as WSGI application. + + :param environ: the WSGI environment. + :param start_response: the response callable provided by the WSGI + server. + :return: an application iterator + """ + app_iter, status, headers = self.get_wsgi_response(environ) + start_response(status, headers) + return app_iter diff --git a/python/werkzeug/wrappers/common_descriptors.py b/python/werkzeug/wrappers/common_descriptors.py new file mode 100644 index 0000000..e4107ee --- /dev/null +++ b/python/werkzeug/wrappers/common_descriptors.py @@ -0,0 +1,322 @@ +from datetime import datetime +from datetime import timedelta + +from .._compat import string_types +from ..datastructures import CallbackDict +from ..http import dump_age +from ..http import dump_header +from ..http import dump_options_header +from ..http import http_date +from ..http import parse_age +from ..http import parse_date +from ..http import parse_options_header +from ..http import parse_set_header +from ..utils import cached_property +from ..utils import environ_property +from ..utils import get_content_type +from ..utils import header_property +from ..wsgi import get_content_length + + +class CommonRequestDescriptorsMixin(object): + """A mixin for :class:`BaseRequest` subclasses. Request objects that + mix this class in will automatically get descriptors for a couple of + HTTP headers with automatic type conversion. + + .. versionadded:: 0.5 + """ + + content_type = environ_property( + "CONTENT_TYPE", + doc="""The Content-Type entity-header field indicates the media + type of the entity-body sent to the recipient or, in the case of + the HEAD method, the media type that would have been sent had + the request been a GET.""", + ) + + @cached_property + def content_length(self): + """The Content-Length entity-header field indicates the size of the + entity-body in bytes or, in the case of the HEAD method, the size of + the entity-body that would have been sent had the request been a + GET. + """ + return get_content_length(self.environ) + + content_encoding = environ_property( + "HTTP_CONTENT_ENCODING", + doc="""The Content-Encoding entity-header field is used as a + modifier to the media-type. When present, its value indicates + what additional content codings have been applied to the + entity-body, and thus what decoding mechanisms must be applied + in order to obtain the media-type referenced by the Content-Type + header field. + + .. versionadded:: 0.9""", + ) + content_md5 = environ_property( + "HTTP_CONTENT_MD5", + doc="""The Content-MD5 entity-header field, as defined in + RFC 1864, is an MD5 digest of the entity-body for the purpose of + providing an end-to-end message integrity check (MIC) of the + entity-body. (Note: a MIC is good for detecting accidental + modification of the entity-body in transit, but is not proof + against malicious attacks.) + + .. versionadded:: 0.9""", + ) + referrer = environ_property( + "HTTP_REFERER", + doc="""The Referer[sic] request-header field allows the client + to specify, for the server's benefit, the address (URI) of the + resource from which the Request-URI was obtained (the + "referrer", although the header field is misspelled).""", + ) + date = environ_property( + "HTTP_DATE", + None, + parse_date, + doc="""The Date general-header field represents the date and + time at which the message was originated, having the same + semantics as orig-date in RFC 822.""", + ) + max_forwards = environ_property( + "HTTP_MAX_FORWARDS", + None, + int, + doc="""The Max-Forwards request-header field provides a + mechanism with the TRACE and OPTIONS methods to limit the number + of proxies or gateways that can forward the request to the next + inbound server.""", + ) + + def _parse_content_type(self): + if not hasattr(self, "_parsed_content_type"): + self._parsed_content_type = parse_options_header( + self.environ.get("CONTENT_TYPE", "") + ) + + @property + def mimetype(self): + """Like :attr:`content_type`, but without parameters (eg, without + charset, type etc.) and always lowercase. For example if the content + type is ``text/HTML; charset=utf-8`` the mimetype would be + ``'text/html'``. + """ + self._parse_content_type() + return self._parsed_content_type[0].lower() + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the content + type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + """ + self._parse_content_type() + return self._parsed_content_type[1] + + @cached_property + def pragma(self): + """The Pragma general-header field is used to include + implementation-specific directives that might apply to any recipient + along the request/response chain. All pragma directives specify + optional behavior from the viewpoint of the protocol; however, some + systems MAY require that behavior be consistent with the directives. + """ + return parse_set_header(self.environ.get("HTTP_PRAGMA", "")) + + +class CommonResponseDescriptorsMixin(object): + """A mixin for :class:`BaseResponse` subclasses. Response objects that + mix this class in will automatically get descriptors for a couple of + HTTP headers with automatic type conversion. + """ + + @property + def mimetype(self): + """The mimetype (content type without charset etc.)""" + ct = self.headers.get("content-type") + if ct: + return ct.split(";")[0].strip() + + @mimetype.setter + def mimetype(self, value): + self.headers["Content-Type"] = get_content_type(value, self.charset) + + @property + def mimetype_params(self): + """The mimetype parameters as dict. For example if the + content type is ``text/html; charset=utf-8`` the params would be + ``{'charset': 'utf-8'}``. + + .. versionadded:: 0.5 + """ + + def on_update(d): + self.headers["Content-Type"] = dump_options_header(self.mimetype, d) + + d = parse_options_header(self.headers.get("content-type", ""))[1] + return CallbackDict(d, on_update) + + location = header_property( + "Location", + doc="""The Location response-header field is used to redirect + the recipient to a location other than the Request-URI for + completion of the request or identification of a new + resource.""", + ) + age = header_property( + "Age", + None, + parse_age, + dump_age, + doc="""The Age response-header field conveys the sender's + estimate of the amount of time since the response (or its + revalidation) was generated at the origin server. + + Age values are non-negative decimal integers, representing time + in seconds.""", + ) + content_type = header_property( + "Content-Type", + doc="""The Content-Type entity-header field indicates the media + type of the entity-body sent to the recipient or, in the case of + the HEAD method, the media type that would have been sent had + the request been a GET.""", + ) + content_length = header_property( + "Content-Length", + None, + int, + str, + doc="""The Content-Length entity-header field indicates the size + of the entity-body, in decimal number of OCTETs, sent to the + recipient or, in the case of the HEAD method, the size of the + entity-body that would have been sent had the request been a + GET.""", + ) + content_location = header_property( + "Content-Location", + doc="""The Content-Location entity-header field MAY be used to + supply the resource location for the entity enclosed in the + message when that entity is accessible from a location separate + from the requested resource's URI.""", + ) + content_encoding = header_property( + "Content-Encoding", + doc="""The Content-Encoding entity-header field is used as a + modifier to the media-type. When present, its value indicates + what additional content codings have been applied to the + entity-body, and thus what decoding mechanisms must be applied + in order to obtain the media-type referenced by the Content-Type + header field.""", + ) + content_md5 = header_property( + "Content-MD5", + doc="""The Content-MD5 entity-header field, as defined in + RFC 1864, is an MD5 digest of the entity-body for the purpose of + providing an end-to-end message integrity check (MIC) of the + entity-body. (Note: a MIC is good for detecting accidental + modification of the entity-body in transit, but is not proof + against malicious attacks.)""", + ) + date = header_property( + "Date", + None, + parse_date, + http_date, + doc="""The Date general-header field represents the date and + time at which the message was originated, having the same + semantics as orig-date in RFC 822.""", + ) + expires = header_property( + "Expires", + None, + parse_date, + http_date, + doc="""The Expires entity-header field gives the date/time after + which the response is considered stale. A stale cache entry may + not normally be returned by a cache.""", + ) + last_modified = header_property( + "Last-Modified", + None, + parse_date, + http_date, + doc="""The Last-Modified entity-header field indicates the date + and time at which the origin server believes the variant was + last modified.""", + ) + + @property + def retry_after(self): + """The Retry-After response-header field can be used with a + 503 (Service Unavailable) response to indicate how long the + service is expected to be unavailable to the requesting client. + + Time in seconds until expiration or date. + """ + value = self.headers.get("retry-after") + if value is None: + return + elif value.isdigit(): + return datetime.utcnow() + timedelta(seconds=int(value)) + return parse_date(value) + + @retry_after.setter + def retry_after(self, value): + if value is None: + if "retry-after" in self.headers: + del self.headers["retry-after"] + return + elif isinstance(value, datetime): + value = http_date(value) + else: + value = str(value) + self.headers["Retry-After"] = value + + def _set_property(name, doc=None): # noqa: B902 + def fget(self): + def on_update(header_set): + if not header_set and name in self.headers: + del self.headers[name] + elif header_set: + self.headers[name] = header_set.to_header() + + return parse_set_header(self.headers.get(name), on_update) + + def fset(self, value): + if not value: + del self.headers[name] + elif isinstance(value, string_types): + self.headers[name] = value + else: + self.headers[name] = dump_header(value) + + return property(fget, fset, doc=doc) + + vary = _set_property( + "Vary", + doc="""The Vary field value indicates the set of request-header + fields that fully determines, while the response is fresh, + whether a cache is permitted to use the response to reply to a + subsequent request without revalidation.""", + ) + content_language = _set_property( + "Content-Language", + doc="""The Content-Language entity-header field describes the + natural language(s) of the intended audience for the enclosed + entity. Note that this might not be equivalent to all the + languages used within the entity-body.""", + ) + allow = _set_property( + "Allow", + doc="""The Allow entity-header field lists the set of methods + supported by the resource identified by the Request-URI. The + purpose of this field is strictly to inform the recipient of + valid methods associated with the resource. An Allow header + field MUST be present in a 405 (Method Not Allowed) + response.""", + ) + + del _set_property diff --git a/python/werkzeug/wrappers/etag.py b/python/werkzeug/wrappers/etag.py new file mode 100644 index 0000000..0733506 --- /dev/null +++ b/python/werkzeug/wrappers/etag.py @@ -0,0 +1,304 @@ +from .._compat import string_types +from .._internal import _get_environ +from ..datastructures import ContentRange +from ..datastructures import RequestCacheControl +from ..datastructures import ResponseCacheControl +from ..http import generate_etag +from ..http import http_date +from ..http import is_resource_modified +from ..http import parse_cache_control_header +from ..http import parse_content_range_header +from ..http import parse_date +from ..http import parse_etags +from ..http import parse_if_range_header +from ..http import parse_range_header +from ..http import quote_etag +from ..http import unquote_etag +from ..utils import cached_property +from ..utils import header_property +from ..wrappers.base_response import _clean_accept_ranges +from ..wsgi import _RangeWrapper + + +class ETagRequestMixin(object): + """Add entity tag and cache descriptors to a request object or object with + a WSGI environment available as :attr:`~BaseRequest.environ`. This not + only provides access to etags but also to the cache control header. + """ + + @cached_property + def cache_control(self): + """A :class:`~werkzeug.datastructures.RequestCacheControl` object + for the incoming cache control headers. + """ + cache_control = self.environ.get("HTTP_CACHE_CONTROL") + return parse_cache_control_header(cache_control, None, RequestCacheControl) + + @cached_property + def if_match(self): + """An object containing all the etags in the `If-Match` header. + + :rtype: :class:`~werkzeug.datastructures.ETags` + """ + return parse_etags(self.environ.get("HTTP_IF_MATCH")) + + @cached_property + def if_none_match(self): + """An object containing all the etags in the `If-None-Match` header. + + :rtype: :class:`~werkzeug.datastructures.ETags` + """ + return parse_etags(self.environ.get("HTTP_IF_NONE_MATCH")) + + @cached_property + def if_modified_since(self): + """The parsed `If-Modified-Since` header as datetime object.""" + return parse_date(self.environ.get("HTTP_IF_MODIFIED_SINCE")) + + @cached_property + def if_unmodified_since(self): + """The parsed `If-Unmodified-Since` header as datetime object.""" + return parse_date(self.environ.get("HTTP_IF_UNMODIFIED_SINCE")) + + @cached_property + def if_range(self): + """The parsed `If-Range` header. + + .. versionadded:: 0.7 + + :rtype: :class:`~werkzeug.datastructures.IfRange` + """ + return parse_if_range_header(self.environ.get("HTTP_IF_RANGE")) + + @cached_property + def range(self): + """The parsed `Range` header. + + .. versionadded:: 0.7 + + :rtype: :class:`~werkzeug.datastructures.Range` + """ + return parse_range_header(self.environ.get("HTTP_RANGE")) + + +class ETagResponseMixin(object): + """Adds extra functionality to a response object for etag and cache + handling. This mixin requires an object with at least a `headers` + object that implements a dict like interface similar to + :class:`~werkzeug.datastructures.Headers`. + + If you want the :meth:`freeze` method to automatically add an etag, you + have to mixin this method before the response base class. The default + response class does not do that. + """ + + @property + def cache_control(self): + """The Cache-Control general-header field is used to specify + directives that MUST be obeyed by all caching mechanisms along the + request/response chain. + """ + + def on_update(cache_control): + if not cache_control and "cache-control" in self.headers: + del self.headers["cache-control"] + elif cache_control: + self.headers["Cache-Control"] = cache_control.to_header() + + return parse_cache_control_header( + self.headers.get("cache-control"), on_update, ResponseCacheControl + ) + + def _wrap_response(self, start, length): + """Wrap existing Response in case of Range Request context.""" + if self.status_code == 206: + self.response = _RangeWrapper(self.response, start, length) + + def _is_range_request_processable(self, environ): + """Return ``True`` if `Range` header is present and if underlying + resource is considered unchanged when compared with `If-Range` header. + """ + return ( + "HTTP_IF_RANGE" not in environ + or not is_resource_modified( + environ, + self.headers.get("etag"), + None, + self.headers.get("last-modified"), + ignore_if_range=False, + ) + ) and "HTTP_RANGE" in environ + + def _process_range_request(self, environ, complete_length=None, accept_ranges=None): + """Handle Range Request related headers (RFC7233). If `Accept-Ranges` + header is valid, and Range Request is processable, we set the headers + as described by the RFC, and wrap the underlying response in a + RangeWrapper. + + Returns ``True`` if Range Request can be fulfilled, ``False`` otherwise. + + :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` + if `Range` header could not be parsed or satisfied. + """ + from ..exceptions import RequestedRangeNotSatisfiable + + if accept_ranges is None: + return False + self.headers["Accept-Ranges"] = accept_ranges + if not self._is_range_request_processable(environ) or complete_length is None: + return False + parsed_range = parse_range_header(environ.get("HTTP_RANGE")) + if parsed_range is None: + raise RequestedRangeNotSatisfiable(complete_length) + range_tuple = parsed_range.range_for_length(complete_length) + content_range_header = parsed_range.to_content_range_header(complete_length) + if range_tuple is None or content_range_header is None: + raise RequestedRangeNotSatisfiable(complete_length) + content_length = range_tuple[1] - range_tuple[0] + # Be sure not to send 206 response + # if requested range is the full content. + if content_length != complete_length: + self.headers["Content-Length"] = content_length + self.content_range = content_range_header + self.status_code = 206 + self._wrap_response(range_tuple[0], content_length) + return True + return False + + def make_conditional( + self, request_or_environ, accept_ranges=False, complete_length=None + ): + """Make the response conditional to the request. This method works + best if an etag was defined for the response already. The `add_etag` + method can be used to do that. If called without etag just the date + header is set. + + This does nothing if the request method in the request or environ is + anything but GET or HEAD. + + For optimal performance when handling range requests, it's recommended + that your response data object implements `seekable`, `seek` and `tell` + methods as described by :py:class:`io.IOBase`. Objects returned by + :meth:`~werkzeug.wsgi.wrap_file` automatically implement those methods. + + It does not remove the body of the response because that's something + the :meth:`__call__` function does for us automatically. + + Returns self so that you can do ``return resp.make_conditional(req)`` + but modifies the object in-place. + + :param request_or_environ: a request object or WSGI environment to be + used to make the response conditional + against. + :param accept_ranges: This parameter dictates the value of + `Accept-Ranges` header. If ``False`` (default), + the header is not set. If ``True``, it will be set + to ``"bytes"``. If ``None``, it will be set to + ``"none"``. If it's a string, it will use this + value. + :param complete_length: Will be used only in valid Range Requests. + It will set `Content-Range` complete length + value and compute `Content-Length` real value. + This parameter is mandatory for successful + Range Requests completion. + :raises: :class:`~werkzeug.exceptions.RequestedRangeNotSatisfiable` + if `Range` header could not be parsed or satisfied. + """ + environ = _get_environ(request_or_environ) + if environ["REQUEST_METHOD"] in ("GET", "HEAD"): + # if the date is not in the headers, add it now. We however + # will not override an already existing header. Unfortunately + # this header will be overriden by many WSGI servers including + # wsgiref. + if "date" not in self.headers: + self.headers["Date"] = http_date() + accept_ranges = _clean_accept_ranges(accept_ranges) + is206 = self._process_range_request(environ, complete_length, accept_ranges) + if not is206 and not is_resource_modified( + environ, + self.headers.get("etag"), + None, + self.headers.get("last-modified"), + ): + if parse_etags(environ.get("HTTP_IF_MATCH")): + self.status_code = 412 + else: + self.status_code = 304 + if ( + self.automatically_set_content_length + and "content-length" not in self.headers + ): + length = self.calculate_content_length() + if length is not None: + self.headers["Content-Length"] = length + return self + + def add_etag(self, overwrite=False, weak=False): + """Add an etag for the current response if there is none yet.""" + if overwrite or "etag" not in self.headers: + self.set_etag(generate_etag(self.get_data()), weak) + + def set_etag(self, etag, weak=False): + """Set the etag, and override the old one if there was one.""" + self.headers["ETag"] = quote_etag(etag, weak) + + def get_etag(self): + """Return a tuple in the form ``(etag, is_weak)``. If there is no + ETag the return value is ``(None, None)``. + """ + return unquote_etag(self.headers.get("ETag")) + + def freeze(self, no_etag=False): + """Call this method if you want to make your response object ready for + pickeling. This buffers the generator if there is one. This also + sets the etag unless `no_etag` is set to `True`. + """ + if not no_etag: + self.add_etag() + super(ETagResponseMixin, self).freeze() + + accept_ranges = header_property( + "Accept-Ranges", + doc="""The `Accept-Ranges` header. Even though the name would + indicate that multiple values are supported, it must be one + string token only. + + The values ``'bytes'`` and ``'none'`` are common. + + .. versionadded:: 0.7""", + ) + + def _get_content_range(self): + def on_update(rng): + if not rng: + del self.headers["content-range"] + else: + self.headers["Content-Range"] = rng.to_header() + + rv = parse_content_range_header(self.headers.get("content-range"), on_update) + # always provide a content range object to make the descriptor + # more user friendly. It provides an unset() method that can be + # used to remove the header quickly. + if rv is None: + rv = ContentRange(None, None, None, on_update=on_update) + return rv + + def _set_content_range(self, value): + if not value: + del self.headers["content-range"] + elif isinstance(value, string_types): + self.headers["Content-Range"] = value + else: + self.headers["Content-Range"] = value.to_header() + + content_range = property( + _get_content_range, + _set_content_range, + doc="""The ``Content-Range`` header as + :class:`~werkzeug.datastructures.ContentRange` object. Even if + the header is not set it wil provide such an object for easier + manipulation. + + .. versionadded:: 0.7""", + ) + del _get_content_range, _set_content_range diff --git a/python/werkzeug/wrappers/json.py b/python/werkzeug/wrappers/json.py new file mode 100644 index 0000000..6d5dc33 --- /dev/null +++ b/python/werkzeug/wrappers/json.py @@ -0,0 +1,145 @@ +from __future__ import absolute_import + +import datetime +import uuid + +from .._compat import text_type +from ..exceptions import BadRequest +from ..utils import detect_utf_encoding + +try: + import simplejson as _json +except ImportError: + import json as _json + + +class _JSONModule(object): + @staticmethod + def _default(o): + if isinstance(o, datetime.date): + return o.isoformat() + + if isinstance(o, uuid.UUID): + return str(o) + + if hasattr(o, "__html__"): + return text_type(o.__html__()) + + raise TypeError() + + @classmethod + def dumps(cls, obj, **kw): + kw.setdefault("separators", (",", ":")) + kw.setdefault("default", cls._default) + kw.setdefault("sort_keys", True) + return _json.dumps(obj, **kw) + + @staticmethod + def loads(s, **kw): + if isinstance(s, bytes): + # Needed for Python < 3.6 + encoding = detect_utf_encoding(s) + s = s.decode(encoding) + + return _json.loads(s, **kw) + + +class JSONMixin(object): + """Mixin to parse :attr:`data` as JSON. Can be mixed in for both + :class:`~werkzeug.wrappers.Request` and + :class:`~werkzeug.wrappers.Response` classes. + + If `simplejson`_ is installed it is preferred over Python's built-in + :mod:`json` module. + + .. _simplejson: https://simplejson.readthedocs.io/en/latest/ + """ + + #: A module or other object that has ``dumps`` and ``loads`` + #: functions that match the API of the built-in :mod:`json` module. + json_module = _JSONModule + + @property + def json(self): + """The parsed JSON data if :attr:`mimetype` indicates JSON + (:mimetype:`application/json`, see :meth:`is_json`). + + Calls :meth:`get_json` with default arguments. + """ + return self.get_json() + + @property + def is_json(self): + """Check if the mimetype indicates JSON data, either + :mimetype:`application/json` or :mimetype:`application/*+json`. + """ + mt = self.mimetype + return ( + mt == "application/json" + or mt.startswith("application/") + and mt.endswith("+json") + ) + + def _get_data_for_json(self, cache): + try: + return self.get_data(cache=cache) + except TypeError: + # Response doesn't have cache param. + return self.get_data() + + # Cached values for ``(silent=False, silent=True)``. Initialized + # with sentinel values. + _cached_json = (Ellipsis, Ellipsis) + + def get_json(self, force=False, silent=False, cache=True): + """Parse :attr:`data` as JSON. + + If the mimetype does not indicate JSON + (:mimetype:`application/json`, see :meth:`is_json`), this + returns ``None``. + + If parsing fails, :meth:`on_json_loading_failed` is called and + its return value is used as the return value. + + :param force: Ignore the mimetype and always try to parse JSON. + :param silent: Silence parsing errors and return ``None`` + instead. + :param cache: Store the parsed JSON to return for subsequent + calls. + """ + if cache and self._cached_json[silent] is not Ellipsis: + return self._cached_json[silent] + + if not (force or self.is_json): + return None + + data = self._get_data_for_json(cache=cache) + + try: + rv = self.json_module.loads(data) + except ValueError as e: + if silent: + rv = None + + if cache: + normal_rv, _ = self._cached_json + self._cached_json = (normal_rv, rv) + else: + rv = self.on_json_loading_failed(e) + + if cache: + _, silent_rv = self._cached_json + self._cached_json = (rv, silent_rv) + else: + if cache: + self._cached_json = (rv, rv) + + return rv + + def on_json_loading_failed(self, e): + """Called if :meth:`get_json` parsing fails and isn't silenced. + If this method returns a value, it is used as the return value + for :meth:`get_json`. The default implementation raises + :exc:`~werkzeug.exceptions.BadRequest`. + """ + raise BadRequest("Failed to decode JSON object: {0}".format(e)) diff --git a/python/werkzeug/wrappers/request.py b/python/werkzeug/wrappers/request.py new file mode 100644 index 0000000..d1c71b6 --- /dev/null +++ b/python/werkzeug/wrappers/request.py @@ -0,0 +1,44 @@ +from .accept import AcceptMixin +from .auth import AuthorizationMixin +from .base_request import BaseRequest +from .common_descriptors import CommonRequestDescriptorsMixin +from .etag import ETagRequestMixin +from .user_agent import UserAgentMixin + + +class Request( + BaseRequest, + AcceptMixin, + ETagRequestMixin, + UserAgentMixin, + AuthorizationMixin, + CommonRequestDescriptorsMixin, +): + """Full featured request object implementing the following mixins: + + - :class:`AcceptMixin` for accept header parsing + - :class:`ETagRequestMixin` for etag and cache control handling + - :class:`UserAgentMixin` for user agent introspection + - :class:`AuthorizationMixin` for http auth handling + - :class:`CommonRequestDescriptorsMixin` for common headers + """ + + +class StreamOnlyMixin(object): + """If mixed in before the request object this will change the bahavior + of it to disable handling of form parsing. This disables the + :attr:`files`, :attr:`form` attributes and will just provide a + :attr:`stream` attribute that however is always available. + + .. versionadded:: 0.9 + """ + + disable_data_descriptor = True + want_form_data_parsed = False + + +class PlainRequest(StreamOnlyMixin, Request): + """A request object without special form parsing capabilities. + + .. versionadded:: 0.9 + """ diff --git a/python/werkzeug/wrappers/response.py b/python/werkzeug/wrappers/response.py new file mode 100644 index 0000000..cd86cac --- /dev/null +++ b/python/werkzeug/wrappers/response.py @@ -0,0 +1,78 @@ +from ..utils import cached_property +from .auth import WWWAuthenticateMixin +from .base_response import BaseResponse +from .common_descriptors import CommonResponseDescriptorsMixin +from .etag import ETagResponseMixin + + +class ResponseStream(object): + """A file descriptor like object used by the :class:`ResponseStreamMixin` to + represent the body of the stream. It directly pushes into the response + iterable of the response object. + """ + + mode = "wb+" + + def __init__(self, response): + self.response = response + self.closed = False + + def write(self, value): + if self.closed: + raise ValueError("I/O operation on closed file") + self.response._ensure_sequence(mutable=True) + self.response.response.append(value) + self.response.headers.pop("Content-Length", None) + return len(value) + + def writelines(self, seq): + for item in seq: + self.write(item) + + def close(self): + self.closed = True + + def flush(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def isatty(self): + if self.closed: + raise ValueError("I/O operation on closed file") + return False + + def tell(self): + self.response._ensure_sequence() + return sum(map(len, self.response.response)) + + @property + def encoding(self): + return self.response.charset + + +class ResponseStreamMixin(object): + """Mixin for :class:`BaseRequest` subclasses. Classes that inherit from + this mixin will automatically get a :attr:`stream` property that provides + a write-only interface to the response iterable. + """ + + @cached_property + def stream(self): + """The response iterable as write-only stream.""" + return ResponseStream(self) + + +class Response( + BaseResponse, + ETagResponseMixin, + ResponseStreamMixin, + CommonResponseDescriptorsMixin, + WWWAuthenticateMixin, +): + """Full featured response object implementing the following mixins: + + - :class:`ETagResponseMixin` for etag and cache control handling + - :class:`ResponseStreamMixin` to add support for the `stream` property + - :class:`CommonResponseDescriptorsMixin` for various HTTP descriptors + - :class:`WWWAuthenticateMixin` for HTTP authentication support + """ diff --git a/python/werkzeug/wrappers/user_agent.py b/python/werkzeug/wrappers/user_agent.py new file mode 100644 index 0000000..72588dd --- /dev/null +++ b/python/werkzeug/wrappers/user_agent.py @@ -0,0 +1,15 @@ +from ..utils import cached_property + + +class UserAgentMixin(object): + """Adds a `user_agent` attribute to the request object which + contains the parsed user agent of the browser that triggered the + request as a :class:`~werkzeug.useragents.UserAgent` object. + """ + + @cached_property + def user_agent(self): + """The current user agent.""" + from ..useragents import UserAgent + + return UserAgent(self.environ) diff --git a/python/werkzeug/wsgi.py b/python/werkzeug/wsgi.py new file mode 100644 index 0000000..f069f2d --- /dev/null +++ b/python/werkzeug/wsgi.py @@ -0,0 +1,1067 @@ +# -*- coding: utf-8 -*- +""" + werkzeug.wsgi + ~~~~~~~~~~~~~ + + This module implements WSGI related helpers. + + :copyright: 2007 Pallets + :license: BSD-3-Clause +""" +import io +import re +import warnings +from functools import partial +from functools import update_wrapper +from itertools import chain + +from ._compat import BytesIO +from ._compat import implements_iterator +from ._compat import make_literal_wrapper +from ._compat import string_types +from ._compat import text_type +from ._compat import to_bytes +from ._compat import to_unicode +from ._compat import try_coerce_native +from ._compat import wsgi_get_bytes +from ._internal import _encode_idna +from .urls import uri_to_iri +from .urls import url_join +from .urls import url_parse +from .urls import url_quote + + +def responder(f): + """Marks a function as responder. Decorate a function with it and it + will automatically call the return value as WSGI application. + + Example:: + + @responder + def application(environ, start_response): + return Response('Hello World!') + """ + return update_wrapper(lambda *a: f(*a)(*a[-2:]), f) + + +def get_current_url( + environ, + root_only=False, + strip_querystring=False, + host_only=False, + trusted_hosts=None, +): + """A handy helper function that recreates the full URL as IRI for the + current request or parts of it. Here's an example: + + >>> from werkzeug.test import create_environ + >>> env = create_environ("/?param=foo", "http://localhost/script") + >>> get_current_url(env) + 'http://localhost/script/?param=foo' + >>> get_current_url(env, root_only=True) + 'http://localhost/script/' + >>> get_current_url(env, host_only=True) + 'http://localhost/' + >>> get_current_url(env, strip_querystring=True) + 'http://localhost/script/' + + This optionally it verifies that the host is in a list of trusted hosts. + If the host is not in there it will raise a + :exc:`~werkzeug.exceptions.SecurityError`. + + Note that the string returned might contain unicode characters as the + representation is an IRI not an URI. If you need an ASCII only + representation you can use the :func:`~werkzeug.urls.iri_to_uri` + function: + + >>> from werkzeug.urls import iri_to_uri + >>> iri_to_uri(get_current_url(env)) + 'http://localhost/script/?param=foo' + + :param environ: the WSGI environment to get the current URL from. + :param root_only: set `True` if you only want the root URL. + :param strip_querystring: set to `True` if you don't want the querystring. + :param host_only: set to `True` if the host URL should be returned. + :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` + for more information. + """ + tmp = [environ["wsgi.url_scheme"], "://", get_host(environ, trusted_hosts)] + cat = tmp.append + if host_only: + return uri_to_iri("".join(tmp) + "/") + cat(url_quote(wsgi_get_bytes(environ.get("SCRIPT_NAME", ""))).rstrip("/")) + cat("/") + if not root_only: + cat(url_quote(wsgi_get_bytes(environ.get("PATH_INFO", "")).lstrip(b"/"))) + if not strip_querystring: + qs = get_query_string(environ) + if qs: + cat("?" + qs) + return uri_to_iri("".join(tmp)) + + +def host_is_trusted(hostname, trusted_list): + """Checks if a host is trusted against a list. This also takes care + of port normalization. + + .. versionadded:: 0.9 + + :param hostname: the hostname to check + :param trusted_list: a list of hostnames to check against. If a + hostname starts with a dot it will match against + all subdomains as well. + """ + if not hostname: + return False + + if isinstance(trusted_list, string_types): + trusted_list = [trusted_list] + + def _normalize(hostname): + if ":" in hostname: + hostname = hostname.rsplit(":", 1)[0] + return _encode_idna(hostname) + + try: + hostname = _normalize(hostname) + except UnicodeError: + return False + for ref in trusted_list: + if ref.startswith("."): + ref = ref[1:] + suffix_match = True + else: + suffix_match = False + try: + ref = _normalize(ref) + except UnicodeError: + return False + if ref == hostname: + return True + if suffix_match and hostname.endswith(b"." + ref): + return True + return False + + +def get_host(environ, trusted_hosts=None): + """Return the host for the given WSGI environment. This first checks + the ``Host`` header. If it's not present, then ``SERVER_NAME`` and + ``SERVER_PORT`` are used. The host will only contain the port if it + is different than the standard port for the protocol. + + Optionally, verify that the host is trusted using + :func:`host_is_trusted` and raise a + :exc:`~werkzeug.exceptions.SecurityError` if it is not. + + :param environ: The WSGI environment to get the host from. + :param trusted_hosts: A list of trusted hosts. + :return: Host, with port if necessary. + :raise ~werkzeug.exceptions.SecurityError: If the host is not + trusted. + """ + if "HTTP_HOST" in environ: + rv = environ["HTTP_HOST"] + if environ["wsgi.url_scheme"] == "http" and rv.endswith(":80"): + rv = rv[:-3] + elif environ["wsgi.url_scheme"] == "https" and rv.endswith(":443"): + rv = rv[:-4] + else: + rv = environ["SERVER_NAME"] + if (environ["wsgi.url_scheme"], environ["SERVER_PORT"]) not in ( + ("https", "443"), + ("http", "80"), + ): + rv += ":" + environ["SERVER_PORT"] + if trusted_hosts is not None: + if not host_is_trusted(rv, trusted_hosts): + from .exceptions import SecurityError + + raise SecurityError('Host "%s" is not trusted' % rv) + return rv + + +def get_content_length(environ): + """Returns the content length from the WSGI environment as + integer. If it's not available or chunked transfer encoding is used, + ``None`` is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environ to fetch the content length from. + """ + if environ.get("HTTP_TRANSFER_ENCODING", "") == "chunked": + return None + + content_length = environ.get("CONTENT_LENGTH") + if content_length is not None: + try: + return max(0, int(content_length)) + except (ValueError, TypeError): + pass + + +def get_input_stream(environ, safe_fallback=True): + """Returns the input stream from the WSGI environment and wraps it + in the most sensible way possible. The stream returned is not the + raw WSGI stream in most cases but one that is safe to read from + without taking into account the content length. + + If content length is not set, the stream will be empty for safety reasons. + If the WSGI server supports chunked or infinite streams, it should set + the ``wsgi.input_terminated`` value in the WSGI environ to indicate that. + + .. versionadded:: 0.9 + + :param environ: the WSGI environ to fetch the stream from. + :param safe_fallback: use an empty stream as a safe fallback when the + content length is not set. Disabling this allows infinite streams, + which can be a denial-of-service risk. + """ + stream = environ["wsgi.input"] + content_length = get_content_length(environ) + + # A wsgi extension that tells us if the input is terminated. In + # that case we return the stream unchanged as we know we can safely + # read it until the end. + if environ.get("wsgi.input_terminated"): + return stream + + # If the request doesn't specify a content length, returning the stream is + # potentially dangerous because it could be infinite, malicious or not. If + # safe_fallback is true, return an empty stream instead for safety. + if content_length is None: + return BytesIO() if safe_fallback else stream + + # Otherwise limit the stream to the content length + return LimitedStream(stream, content_length) + + +def get_query_string(environ): + """Returns the `QUERY_STRING` from the WSGI environment. This also takes + care about the WSGI decoding dance on Python 3 environments as a + native string. The string returned will be restricted to ASCII + characters. + + .. versionadded:: 0.9 + + :param environ: the WSGI environment object to get the query string from. + """ + qs = wsgi_get_bytes(environ.get("QUERY_STRING", "")) + # QUERY_STRING really should be ascii safe but some browsers + # will send us some unicode stuff (I am looking at you IE). + # In that case we want to urllib quote it badly. + return try_coerce_native(url_quote(qs, safe=":&%=+$!*'(),")) + + +def get_path_info(environ, charset="utf-8", errors="replace"): + """Returns the `PATH_INFO` from the WSGI environment and properly + decodes it. This also takes care about the WSGI decoding dance + on Python 3 environments. if the `charset` is set to `None` a + bytestring is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environment object to get the path from. + :param charset: the charset for the path info, or `None` if no + decoding should be performed. + :param errors: the decoding error handling. + """ + path = wsgi_get_bytes(environ.get("PATH_INFO", "")) + return to_unicode(path, charset, errors, allow_none_charset=True) + + +def get_script_name(environ, charset="utf-8", errors="replace"): + """Returns the `SCRIPT_NAME` from the WSGI environment and properly + decodes it. This also takes care about the WSGI decoding dance + on Python 3 environments. if the `charset` is set to `None` a + bytestring is returned. + + .. versionadded:: 0.9 + + :param environ: the WSGI environment object to get the path from. + :param charset: the charset for the path, or `None` if no + decoding should be performed. + :param errors: the decoding error handling. + """ + path = wsgi_get_bytes(environ.get("SCRIPT_NAME", "")) + return to_unicode(path, charset, errors, allow_none_charset=True) + + +def pop_path_info(environ, charset="utf-8", errors="replace"): + """Removes and returns the next segment of `PATH_INFO`, pushing it onto + `SCRIPT_NAME`. Returns `None` if there is nothing left on `PATH_INFO`. + + If the `charset` is set to `None` a bytestring is returned. + + If there are empty segments (``'/foo//bar``) these are ignored but + properly pushed to the `SCRIPT_NAME`: + + >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} + >>> pop_path_info(env) + 'a' + >>> env['SCRIPT_NAME'] + '/foo/a' + >>> pop_path_info(env) + 'b' + >>> env['SCRIPT_NAME'] + '/foo/a/b' + + .. versionadded:: 0.5 + + .. versionchanged:: 0.9 + The path is now decoded and a charset and encoding + parameter can be provided. + + :param environ: the WSGI environment that is modified. + """ + path = environ.get("PATH_INFO") + if not path: + return None + + script_name = environ.get("SCRIPT_NAME", "") + + # shift multiple leading slashes over + old_path = path + path = path.lstrip("/") + if path != old_path: + script_name += "/" * (len(old_path) - len(path)) + + if "/" not in path: + environ["PATH_INFO"] = "" + environ["SCRIPT_NAME"] = script_name + path + rv = wsgi_get_bytes(path) + else: + segment, path = path.split("/", 1) + environ["PATH_INFO"] = "/" + path + environ["SCRIPT_NAME"] = script_name + segment + rv = wsgi_get_bytes(segment) + + return to_unicode(rv, charset, errors, allow_none_charset=True) + + +def peek_path_info(environ, charset="utf-8", errors="replace"): + """Returns the next segment on the `PATH_INFO` or `None` if there + is none. Works like :func:`pop_path_info` without modifying the + environment: + + >>> env = {'SCRIPT_NAME': '/foo', 'PATH_INFO': '/a/b'} + >>> peek_path_info(env) + 'a' + >>> peek_path_info(env) + 'a' + + If the `charset` is set to `None` a bytestring is returned. + + .. versionadded:: 0.5 + + .. versionchanged:: 0.9 + The path is now decoded and a charset and encoding + parameter can be provided. + + :param environ: the WSGI environment that is checked. + """ + segments = environ.get("PATH_INFO", "").lstrip("/").split("/", 1) + if segments: + return to_unicode( + wsgi_get_bytes(segments[0]), charset, errors, allow_none_charset=True + ) + + +def extract_path_info( + environ_or_baseurl, + path_or_url, + charset="utf-8", + errors="werkzeug.url_quote", + collapse_http_schemes=True, +): + """Extracts the path info from the given URL (or WSGI environment) and + path. The path info returned is a unicode string, not a bytestring + suitable for a WSGI environment. The URLs might also be IRIs. + + If the path info could not be determined, `None` is returned. + + Some examples: + + >>> extract_path_info('http://example.com/app', '/app/hello') + u'/hello' + >>> extract_path_info('http://example.com/app', + ... 'https://example.com/app/hello') + u'/hello' + >>> extract_path_info('http://example.com/app', + ... 'https://example.com/app/hello', + ... collapse_http_schemes=False) is None + True + + Instead of providing a base URL you can also pass a WSGI environment. + + :param environ_or_baseurl: a WSGI environment dict, a base URL or + base IRI. This is the root of the + application. + :param path_or_url: an absolute path from the server root, a + relative path (in which case it's the path info) + or a full URL. Also accepts IRIs and unicode + parameters. + :param charset: the charset for byte data in URLs + :param errors: the error handling on decode + :param collapse_http_schemes: if set to `False` the algorithm does + not assume that http and https on the + same server point to the same + resource. + + .. versionchanged:: 0.15 + The ``errors`` parameter defaults to leaving invalid bytes + quoted instead of replacing them. + + .. versionadded:: 0.6 + """ + + def _normalize_netloc(scheme, netloc): + parts = netloc.split(u"@", 1)[-1].split(u":", 1) + if len(parts) == 2: + netloc, port = parts + if (scheme == u"http" and port == u"80") or ( + scheme == u"https" and port == u"443" + ): + port = None + else: + netloc = parts[0] + port = None + if port is not None: + netloc += u":" + port + return netloc + + # make sure whatever we are working on is a IRI and parse it + path = uri_to_iri(path_or_url, charset, errors) + if isinstance(environ_or_baseurl, dict): + environ_or_baseurl = get_current_url(environ_or_baseurl, root_only=True) + base_iri = uri_to_iri(environ_or_baseurl, charset, errors) + base_scheme, base_netloc, base_path = url_parse(base_iri)[:3] + cur_scheme, cur_netloc, cur_path, = url_parse(url_join(base_iri, path))[:3] + + # normalize the network location + base_netloc = _normalize_netloc(base_scheme, base_netloc) + cur_netloc = _normalize_netloc(cur_scheme, cur_netloc) + + # is that IRI even on a known HTTP scheme? + if collapse_http_schemes: + for scheme in base_scheme, cur_scheme: + if scheme not in (u"http", u"https"): + return None + else: + if not (base_scheme in (u"http", u"https") and base_scheme == cur_scheme): + return None + + # are the netlocs compatible? + if base_netloc != cur_netloc: + return None + + # are we below the application path? + base_path = base_path.rstrip(u"/") + if not cur_path.startswith(base_path): + return None + + return u"/" + cur_path[len(base_path) :].lstrip(u"/") + + +@implements_iterator +class ClosingIterator(object): + """The WSGI specification requires that all middlewares and gateways + respect the `close` callback of the iterable returned by the application. + Because it is useful to add another close action to a returned iterable + and adding a custom iterable is a boring task this class can be used for + that:: + + return ClosingIterator(app(environ, start_response), [cleanup_session, + cleanup_locals]) + + If there is just one close function it can be passed instead of the list. + + A closing iterator is not needed if the application uses response objects + and finishes the processing if the response is started:: + + try: + return response(environ, start_response) + finally: + cleanup_session() + cleanup_locals() + """ + + def __init__(self, iterable, callbacks=None): + iterator = iter(iterable) + self._next = partial(next, iterator) + if callbacks is None: + callbacks = [] + elif callable(callbacks): + callbacks = [callbacks] + else: + callbacks = list(callbacks) + iterable_close = getattr(iterable, "close", None) + if iterable_close: + callbacks.insert(0, iterable_close) + self._callbacks = callbacks + + def __iter__(self): + return self + + def __next__(self): + return self._next() + + def close(self): + for callback in self._callbacks: + callback() + + +def wrap_file(environ, file, buffer_size=8192): + """Wraps a file. This uses the WSGI server's file wrapper if available + or otherwise the generic :class:`FileWrapper`. + + .. versionadded:: 0.5 + + If the file wrapper from the WSGI server is used it's important to not + iterate over it from inside the application but to pass it through + unchanged. If you want to pass out a file wrapper inside a response + object you have to set :attr:`~BaseResponse.direct_passthrough` to `True`. + + More information about file wrappers are available in :pep:`333`. + + :param file: a :class:`file`-like object with a :meth:`~file.read` method. + :param buffer_size: number of bytes for one iteration. + """ + return environ.get("wsgi.file_wrapper", FileWrapper)(file, buffer_size) + + +@implements_iterator +class FileWrapper(object): + """This class can be used to convert a :class:`file`-like object into + an iterable. It yields `buffer_size` blocks until the file is fully + read. + + You should not use this class directly but rather use the + :func:`wrap_file` function that uses the WSGI server's file wrapper + support if it's available. + + .. versionadded:: 0.5 + + If you're using this object together with a :class:`BaseResponse` you have + to use the `direct_passthrough` mode. + + :param file: a :class:`file`-like object with a :meth:`~file.read` method. + :param buffer_size: number of bytes for one iteration. + """ + + def __init__(self, file, buffer_size=8192): + self.file = file + self.buffer_size = buffer_size + + def close(self): + if hasattr(self.file, "close"): + self.file.close() + + def seekable(self): + if hasattr(self.file, "seekable"): + return self.file.seekable() + if hasattr(self.file, "seek"): + return True + return False + + def seek(self, *args): + if hasattr(self.file, "seek"): + self.file.seek(*args) + + def tell(self): + if hasattr(self.file, "tell"): + return self.file.tell() + return None + + def __iter__(self): + return self + + def __next__(self): + data = self.file.read(self.buffer_size) + if data: + return data + raise StopIteration() + + +@implements_iterator +class _RangeWrapper(object): + # private for now, but should we make it public in the future ? + + """This class can be used to convert an iterable object into + an iterable that will only yield a piece of the underlying content. + It yields blocks until the underlying stream range is fully read. + The yielded blocks will have a size that can't exceed the original + iterator defined block size, but that can be smaller. + + If you're using this object together with a :class:`BaseResponse` you have + to use the `direct_passthrough` mode. + + :param iterable: an iterable object with a :meth:`__next__` method. + :param start_byte: byte from which read will start. + :param byte_range: how many bytes to read. + """ + + def __init__(self, iterable, start_byte=0, byte_range=None): + self.iterable = iter(iterable) + self.byte_range = byte_range + self.start_byte = start_byte + self.end_byte = None + if byte_range is not None: + self.end_byte = self.start_byte + self.byte_range + self.read_length = 0 + self.seekable = hasattr(iterable, "seekable") and iterable.seekable() + self.end_reached = False + + def __iter__(self): + return self + + def _next_chunk(self): + try: + chunk = next(self.iterable) + self.read_length += len(chunk) + return chunk + except StopIteration: + self.end_reached = True + raise + + def _first_iteration(self): + chunk = None + if self.seekable: + self.iterable.seek(self.start_byte) + self.read_length = self.iterable.tell() + contextual_read_length = self.read_length + else: + while self.read_length <= self.start_byte: + chunk = self._next_chunk() + if chunk is not None: + chunk = chunk[self.start_byte - self.read_length :] + contextual_read_length = self.start_byte + return chunk, contextual_read_length + + def _next(self): + if self.end_reached: + raise StopIteration() + chunk = None + contextual_read_length = self.read_length + if self.read_length == 0: + chunk, contextual_read_length = self._first_iteration() + if chunk is None: + chunk = self._next_chunk() + if self.end_byte is not None and self.read_length >= self.end_byte: + self.end_reached = True + return chunk[: self.end_byte - contextual_read_length] + return chunk + + def __next__(self): + chunk = self._next() + if chunk: + return chunk + self.end_reached = True + raise StopIteration() + + def close(self): + if hasattr(self.iterable, "close"): + self.iterable.close() + + +def _make_chunk_iter(stream, limit, buffer_size): + """Helper for the line and chunk iter functions.""" + if isinstance(stream, (bytes, bytearray, text_type)): + raise TypeError( + "Passed a string or byte object instead of true iterator or stream." + ) + if not hasattr(stream, "read"): + for item in stream: + if item: + yield item + return + if not isinstance(stream, LimitedStream) and limit is not None: + stream = LimitedStream(stream, limit) + _read = stream.read + while 1: + item = _read(buffer_size) + if not item: + break + yield item + + +def make_line_iter(stream, limit=None, buffer_size=10 * 1024, cap_at_buffer=False): + """Safely iterates line-based over an input stream. If the input stream + is not a :class:`LimitedStream` the `limit` parameter is mandatory. + + This uses the stream's :meth:`~file.read` method internally as opposite + to the :meth:`~file.readline` method that is unsafe and can only be used + in violation of the WSGI specification. The same problem applies to the + `__iter__` function of the input stream which calls :meth:`~file.readline` + without arguments. + + If you need line-by-line processing it's strongly recommended to iterate + over the input stream using this helper function. + + .. versionchanged:: 0.8 + This function now ensures that the limit was reached. + + .. versionadded:: 0.9 + added support for iterators as input stream. + + .. versionadded:: 0.11.10 + added support for the `cap_at_buffer` parameter. + + :param stream: the stream or iterate to iterate over. + :param limit: the limit in bytes for the stream. (Usually + content length. Not necessary if the `stream` + is a :class:`LimitedStream`. + :param buffer_size: The optional buffer size. + :param cap_at_buffer: if this is set chunks are split if they are longer + than the buffer size. Internally this is implemented + that the buffer size might be exhausted by a factor + of two however. + """ + _iter = _make_chunk_iter(stream, limit, buffer_size) + + first_item = next(_iter, "") + if not first_item: + return + + s = make_literal_wrapper(first_item) + empty = s("") + cr = s("\r") + lf = s("\n") + crlf = s("\r\n") + + _iter = chain((first_item,), _iter) + + def _iter_basic_lines(): + _join = empty.join + buffer = [] + while 1: + new_data = next(_iter, "") + if not new_data: + break + new_buf = [] + buf_size = 0 + for item in chain(buffer, new_data.splitlines(True)): + new_buf.append(item) + buf_size += len(item) + if item and item[-1:] in crlf: + yield _join(new_buf) + new_buf = [] + elif cap_at_buffer and buf_size >= buffer_size: + rv = _join(new_buf) + while len(rv) >= buffer_size: + yield rv[:buffer_size] + rv = rv[buffer_size:] + new_buf = [rv] + buffer = new_buf + if buffer: + yield _join(buffer) + + # This hackery is necessary to merge 'foo\r' and '\n' into one item + # of 'foo\r\n' if we were unlucky and we hit a chunk boundary. + previous = empty + for item in _iter_basic_lines(): + if item == lf and previous[-1:] == cr: + previous += item + item = empty + if previous: + yield previous + previous = item + if previous: + yield previous + + +def make_chunk_iter( + stream, separator, limit=None, buffer_size=10 * 1024, cap_at_buffer=False +): + """Works like :func:`make_line_iter` but accepts a separator + which divides chunks. If you want newline based processing + you should use :func:`make_line_iter` instead as it + supports arbitrary newline markers. + + .. versionadded:: 0.8 + + .. versionadded:: 0.9 + added support for iterators as input stream. + + .. versionadded:: 0.11.10 + added support for the `cap_at_buffer` parameter. + + :param stream: the stream or iterate to iterate over. + :param separator: the separator that divides chunks. + :param limit: the limit in bytes for the stream. (Usually + content length. Not necessary if the `stream` + is otherwise already limited). + :param buffer_size: The optional buffer size. + :param cap_at_buffer: if this is set chunks are split if they are longer + than the buffer size. Internally this is implemented + that the buffer size might be exhausted by a factor + of two however. + """ + _iter = _make_chunk_iter(stream, limit, buffer_size) + + first_item = next(_iter, "") + if not first_item: + return + + _iter = chain((first_item,), _iter) + if isinstance(first_item, text_type): + separator = to_unicode(separator) + _split = re.compile(r"(%s)" % re.escape(separator)).split + _join = u"".join + else: + separator = to_bytes(separator) + _split = re.compile(b"(" + re.escape(separator) + b")").split + _join = b"".join + + buffer = [] + while 1: + new_data = next(_iter, "") + if not new_data: + break + chunks = _split(new_data) + new_buf = [] + buf_size = 0 + for item in chain(buffer, chunks): + if item == separator: + yield _join(new_buf) + new_buf = [] + buf_size = 0 + else: + buf_size += len(item) + new_buf.append(item) + + if cap_at_buffer and buf_size >= buffer_size: + rv = _join(new_buf) + while len(rv) >= buffer_size: + yield rv[:buffer_size] + rv = rv[buffer_size:] + new_buf = [rv] + buf_size = len(rv) + + buffer = new_buf + if buffer: + yield _join(buffer) + + +@implements_iterator +class LimitedStream(io.IOBase): + """Wraps a stream so that it doesn't read more than n bytes. If the + stream is exhausted and the caller tries to get more bytes from it + :func:`on_exhausted` is called which by default returns an empty + string. The return value of that function is forwarded + to the reader function. So if it returns an empty string + :meth:`read` will return an empty string as well. + + The limit however must never be higher than what the stream can + output. Otherwise :meth:`readlines` will try to read past the + limit. + + .. admonition:: Note on WSGI compliance + + calls to :meth:`readline` and :meth:`readlines` are not + WSGI compliant because it passes a size argument to the + readline methods. Unfortunately the WSGI PEP is not safely + implementable without a size argument to :meth:`readline` + because there is no EOF marker in the stream. As a result + of that the use of :meth:`readline` is discouraged. + + For the same reason iterating over the :class:`LimitedStream` + is not portable. It internally calls :meth:`readline`. + + We strongly suggest using :meth:`read` only or using the + :func:`make_line_iter` which safely iterates line-based + over a WSGI input stream. + + :param stream: the stream to wrap. + :param limit: the limit for the stream, must not be longer than + what the string can provide if the stream does not + end with `EOF` (like `wsgi.input`) + """ + + def __init__(self, stream, limit): + self._read = stream.read + self._readline = stream.readline + self._pos = 0 + self.limit = limit + + def __iter__(self): + return self + + @property + def is_exhausted(self): + """If the stream is exhausted this attribute is `True`.""" + return self._pos >= self.limit + + def on_exhausted(self): + """This is called when the stream tries to read past the limit. + The return value of this function is returned from the reading + function. + """ + # Read null bytes from the stream so that we get the + # correct end of stream marker. + return self._read(0) + + def on_disconnect(self): + """What should happen if a disconnect is detected? The return + value of this function is returned from read functions in case + the client went away. By default a + :exc:`~werkzeug.exceptions.ClientDisconnected` exception is raised. + """ + from .exceptions import ClientDisconnected + + raise ClientDisconnected() + + def exhaust(self, chunk_size=1024 * 64): + """Exhaust the stream. This consumes all the data left until the + limit is reached. + + :param chunk_size: the size for a chunk. It will read the chunk + until the stream is exhausted and throw away + the results. + """ + to_read = self.limit - self._pos + chunk = chunk_size + while to_read > 0: + chunk = min(to_read, chunk) + self.read(chunk) + to_read -= chunk + + def read(self, size=None): + """Read `size` bytes or if size is not provided everything is read. + + :param size: the number of bytes read. + """ + if self._pos >= self.limit: + return self.on_exhausted() + if size is None or size == -1: # -1 is for consistence with file + size = self.limit + to_read = min(self.limit - self._pos, size) + try: + read = self._read(to_read) + except (IOError, ValueError): + return self.on_disconnect() + if to_read and len(read) != to_read: + return self.on_disconnect() + self._pos += len(read) + return read + + def readline(self, size=None): + """Reads one line from the stream.""" + if self._pos >= self.limit: + return self.on_exhausted() + if size is None: + size = self.limit - self._pos + else: + size = min(size, self.limit - self._pos) + try: + line = self._readline(size) + except (ValueError, IOError): + return self.on_disconnect() + if size and not line: + return self.on_disconnect() + self._pos += len(line) + return line + + def readlines(self, size=None): + """Reads a file into a list of strings. It calls :meth:`readline` + until the file is read to the end. It does support the optional + `size` argument if the underlaying stream supports it for + `readline`. + """ + last_pos = self._pos + result = [] + if size is not None: + end = min(self.limit, last_pos + size) + else: + end = self.limit + while 1: + if size is not None: + size -= last_pos - self._pos + if self._pos >= end: + break + result.append(self.readline(size)) + if size is not None: + last_pos = self._pos + return result + + def tell(self): + """Returns the position of the stream. + + .. versionadded:: 0.9 + """ + return self._pos + + def __next__(self): + line = self.readline() + if not line: + raise StopIteration() + return line + + def readable(self): + return True + + +# DEPRECATED +from .middleware.dispatcher import DispatcherMiddleware as _DispatcherMiddleware +from .middleware.http_proxy import ProxyMiddleware as _ProxyMiddleware +from .middleware.shared_data import SharedDataMiddleware as _SharedDataMiddleware + + +class ProxyMiddleware(_ProxyMiddleware): + """ + .. deprecated:: 0.15 + ``werkzeug.wsgi.ProxyMiddleware`` has moved to + :mod:`werkzeug.middleware.http_proxy`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.wsgi.ProxyMiddleware' has moved to 'werkzeug" + ".middleware.http_proxy.ProxyMiddleware'. This import is" + " deprecated as of version 0.15 and will be removed in" + " version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(ProxyMiddleware, self).__init__(*args, **kwargs) + + +class SharedDataMiddleware(_SharedDataMiddleware): + """ + .. deprecated:: 0.15 + ``werkzeug.wsgi.SharedDataMiddleware`` has moved to + :mod:`werkzeug.middleware.shared_data`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.wsgi.SharedDataMiddleware' has moved to" + " 'werkzeug.middleware.shared_data.SharedDataMiddleware'." + " This import is deprecated as of version 0.15 and will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(SharedDataMiddleware, self).__init__(*args, **kwargs) + + +class DispatcherMiddleware(_DispatcherMiddleware): + """ + .. deprecated:: 0.15 + ``werkzeug.wsgi.DispatcherMiddleware`` has moved to + :mod:`werkzeug.middleware.dispatcher`. This import will be + removed in 1.0. + """ + + def __init__(self, *args, **kwargs): + warnings.warn( + "'werkzeug.wsgi.DispatcherMiddleware' has moved to" + " 'werkzeug.middleware.dispatcher.DispatcherMiddleware'." + " This import is deprecated as of version 0.15 and will be" + " removed in version 1.0.", + DeprecationWarning, + stacklevel=2, + ) + super(DispatcherMiddleware, self).__init__(*args, **kwargs) |