Merge flask framework and other stuff from master

author: James Taylor <user234683@users.noreply.github.com> 2019-08-09 22:01:04 -0700
committer: James Taylor <user234683@users.noreply.github.com> 2019-08-09 22:01:04 -0700
commit: 2e75c6d9603f8a5edf6495f8d4fb3115a67d823c (patch)
tree: 8fb2d1bec2cf0e50c5fce6bc718f755485419db0 /python/werkzeug/urls.py
parent: cc9283ad5332f59a69a91d9d0fab299779de513c (diff)
parent: adc40bc760345a23678a01f27d7697dfd3811914 (diff)
download: yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.lz
yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.xz
yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.zip
1 files changed, 1134 insertions, 0 deletions
diff --git a/python/werkzeug/urls.py b/python/werkzeug/urls.py
new file mode 100644
index 0000000..38e9e5a
--- /dev/null
+++ b/python/werkzeug/urls.py
@@ -0,0 +1,1134 @@
+# -*- coding: utf-8 -*-
+"""
+    werkzeug.urls
+    ~~~~~~~~~~~~~
+
+    ``werkzeug.urls`` used to provide several wrapper functions for Python 2
+    urlparse, whose main purpose were to work around the behavior of the Py2
+    stdlib and its lack of unicode support. While this was already a somewhat
+    inconvenient situation, it got even more complicated because Python 3's
+    ``urllib.parse`` actually does handle unicode properly. In other words,
+    this module would wrap two libraries with completely different behavior. So
+    now this module contains a 2-and-3-compatible backport of Python 3's
+    ``urllib.parse``, which is mostly API-compatible.
+
+    :copyright: 2007 Pallets
+    :license: BSD-3-Clause
+"""
+import codecs
+import os
+import re
+from collections import namedtuple
+
+from ._compat import fix_tuple_repr
+from ._compat import implements_to_string
+from ._compat import make_literal_wrapper
+from ._compat import normalize_string_tuple
+from ._compat import PY2
+from ._compat import text_type
+from ._compat import to_native
+from ._compat import to_unicode
+from ._compat import try_coerce_native
+from ._internal import _decode_idna
+from ._internal import _encode_idna
+from .datastructures import iter_multi_items
+from .datastructures import MultiDict
+
+# A regular expression for what a valid schema looks like
+_scheme_re = re.compile(r"^[a-zA-Z0-9+-.]+$")
+
+# Characters that are safe in any part of an URL.
+_always_safe = frozenset(
+    bytearray(
+        b"abcdefghijklmnopqrstuvwxyz"
+        b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+        b"0123456789"
+        b"-._~"
+    )
+)
+
+_hexdigits = "0123456789ABCDEFabcdef"
+_hextobyte = dict(
+    ((a + b).encode(), int(a + b, 16)) for a in _hexdigits for b in _hexdigits
+)
+_bytetohex = [("%%%02X" % char).encode("ascii") for char in range(256)]
+
+
+_URLTuple = fix_tuple_repr(
+    namedtuple("_URLTuple", ["scheme", "netloc", "path", "query", "fragment"])
+)
+
+
+class BaseURL(_URLTuple):
+    """Superclass of :py:class:`URL` and :py:class:`BytesURL`."""
+
+    __slots__ = ()
+
+    def replace(self, **kwargs):
+        """Return an URL with the same values, except for those parameters
+        given new values by whichever keyword arguments are specified."""
+        return self._replace(**kwargs)
+
+    @property
+    def host(self):
+        """The host part of the URL if available, otherwise `None`.  The
+        host is either the hostname or the IP address mentioned in the
+        URL.  It will not contain the port.
+        """
+        return self._split_host()[0]
+
+    @property
+    def ascii_host(self):
+        """Works exactly like :attr:`host` but will return a result that
+        is restricted to ASCII.  If it finds a netloc that is not ASCII
+        it will attempt to idna decode it.  This is useful for socket
+        operations when the URL might include internationalized characters.
+        """
+        rv = self.host
+        if rv is not None and isinstance(rv, text_type):
+            try:
+                rv = _encode_idna(rv)
+            except UnicodeError:
+                rv = rv.encode("ascii", "ignore")
+        return to_native(rv, "ascii", "ignore")
+
+    @property
+    def port(self):
+        """The port in the URL as an integer if it was present, `None`
+        otherwise.  This does not fill in default ports.
+        """
+        try:
+            rv = int(to_native(self._split_host()[1]))
+            if 0 <= rv <= 65535:
+                return rv
+        except (ValueError, TypeError):
+            pass
+
+    @property
+    def auth(self):
+        """The authentication part in the URL if available, `None`
+        otherwise.
+        """
+        return self._split_netloc()[0]
+
+    @property
+    def username(self):
+        """The username if it was part of the URL, `None` otherwise.
+        This undergoes URL decoding and will always be a unicode string.
+        """
+        rv = self._split_auth()[0]
+        if rv is not None:
+            return _url_unquote_legacy(rv)
+
+    @property
+    def raw_username(self):
+        """The username if it was part of the URL, `None` otherwise.
+        Unlike :attr:`username` this one is not being decoded.
+        """
+        return self._split_auth()[0]
+
+    @property
+    def password(self):
+        """The password if it was part of the URL, `None` otherwise.
+        This undergoes URL decoding and will always be a unicode string.
+        """
+        rv = self._split_auth()[1]
+        if rv is not None:
+            return _url_unquote_legacy(rv)
+
+    @property
+    def raw_password(self):
+        """The password if it was part of the URL, `None` otherwise.
+        Unlike :attr:`password` this one is not being decoded.
+        """
+        return self._split_auth()[1]
+
+    def decode_query(self, *args, **kwargs):
+        """Decodes the query part of the URL.  Ths is a shortcut for
+        calling :func:`url_decode` on the query argument.  The arguments and
+        keyword arguments are forwarded to :func:`url_decode` unchanged.
+        """
+        return url_decode(self.query, *args, **kwargs)
+
+    def join(self, *args, **kwargs):
+        """Joins this URL with another one.  This is just a convenience
+        function for calling into :meth:`url_join` and then parsing the
+        return value again.
+        """
+        return url_parse(url_join(self, *args, **kwargs))
+
+    def to_url(self):
+        """Returns a URL string or bytes depending on the type of the
+        information stored.  This is just a convenience function
+        for calling :meth:`url_unparse` for this URL.
+        """
+        return url_unparse(self)
+
+    def decode_netloc(self):
+        """Decodes the netloc part into a string."""
+        rv = _decode_idna(self.host or "")
+
+        if ":" in rv:
+            rv = "[%s]" % rv
+        port = self.port
+        if port is not None:
+            rv = "%s:%d" % (rv, port)
+        auth = ":".join(
+            filter(
+                None,
+                [
+                    _url_unquote_legacy(self.raw_username or "", "/:%@"),
+                    _url_unquote_legacy(self.raw_password or "", "/:%@"),
+                ],
+            )
+        )
+        if auth:
+            rv = "%s@%s" % (auth, rv)
+        return rv
+
+    def to_uri_tuple(self):
+        """Returns a :class:`BytesURL` tuple that holds a URI.  This will
+        encode all the information in the URL properly to ASCII using the
+        rules a web browser would follow.
+
+        It's usually more interesting to directly call :meth:`iri_to_uri` which
+        will return a string.
+        """
+        return url_parse(iri_to_uri(self).encode("ascii"))
+
+    def to_iri_tuple(self):
+        """Returns a :class:`URL` tuple that holds a IRI.  This will try
+        to decode as much information as possible in the URL without
+        losing information similar to how a web browser does it for the
+        URL bar.
+
+        It's usually more interesting to directly call :meth:`uri_to_iri` which
+        will return a string.
+        """
+        return url_parse(uri_to_iri(self))
+
+    def get_file_location(self, pathformat=None):
+        """Returns a tuple with the location of the file in the form
+        ``(server, location)``.  If the netloc is empty in the URL or
+        points to localhost, it's represented as ``None``.
+
+        The `pathformat` by default is autodetection but needs to be set
+        when working with URLs of a specific system.  The supported values
+        are ``'windows'`` when working with Windows or DOS paths and
+        ``'posix'`` when working with posix paths.
+
+        If the URL does not point to a local file, the server and location
+        are both represented as ``None``.
+
+        :param pathformat: The expected format of the path component.
+                           Currently ``'windows'`` and ``'posix'`` are
+                           supported.  Defaults to ``None`` which is
+                           autodetect.
+        """
+        if self.scheme != "file":
+            return None, None
+
+        path = url_unquote(self.path)
+        host = self.netloc or None
+
+        if pathformat is None:
+            if os.name == "nt":
+                pathformat = "windows"
+            else:
+                pathformat = "posix"
+
+        if pathformat == "windows":
+            if path[:1] == "/" and path[1:2].isalpha() and path[2:3] in "|:":
+                path = path[1:2] + ":" + path[3:]
+            windows_share = path[:3] in ("\\" * 3, "/" * 3)
+            import ntpath
+
+            path = ntpath.normpath(path)
+            # Windows shared drives are represented as ``\\host\\directory``.
+            # That results in a URL like ``file://///host/directory``, and a
+            # path like ``///host/directory``. We need to special-case this
+            # because the path contains the hostname.
+            if windows_share and host is None:
+                parts = path.lstrip("\\").split("\\", 1)
+                if len(parts) == 2:
+                    host, path = parts
+                else:
+                    host = parts[0]
+                    path = ""
+        elif pathformat == "posix":
+            import posixpath
+
+            path = posixpath.normpath(path)
+        else:
+            raise TypeError("Invalid path format %s" % repr(pathformat))
+
+        if host in ("127.0.0.1", "::1", "localhost"):
+            host = None
+
+        return host, path
+
+    def _split_netloc(self):
+        if self._at in self.netloc:
+            return self.netloc.split(self._at, 1)
+        return None, self.netloc
+
+    def _split_auth(self):
+        auth = self._split_netloc()[0]
+        if not auth:
+            return None, None
+        if self._colon not in auth:
+            return auth, None
+        return auth.split(self._colon, 1)
+
+    def _split_host(self):
+        rv = self._split_netloc()[1]
+        if not rv:
+            return None, None
+
+        if not rv.startswith(self._lbracket):
+            if self._colon in rv:
+                return rv.split(self._colon, 1)
+            return rv, None
+
+        idx = rv.find(self._rbracket)
+        if idx < 0:
+            return rv, None
+
+        host = rv[1:idx]
+        rest = rv[idx + 1 :]
+        if rest.startswith(self._colon):
+            return host, rest[1:]
+        return host, None
+
+
+@implements_to_string
+class URL(BaseURL):
+    """Represents a parsed URL.  This behaves like a regular tuple but
+    also has some extra attributes that give further insight into the
+    URL.
+    """
+
+    __slots__ = ()
+    _at = "@"
+    _colon = ":"
+    _lbracket = "["
+    _rbracket = "]"
+
+    def __str__(self):
+        return self.to_url()
+
+    def encode_netloc(self):
+        """Encodes the netloc part to an ASCII safe URL as bytes."""
+        rv = self.ascii_host or ""
+        if ":" in rv:
+            rv = "[%s]" % rv
+        port = self.port
+        if port is not None:
+            rv = "%s:%d" % (rv, port)
+        auth = ":".join(
+            filter(
+                None,
+                [
+                    url_quote(self.raw_username or "", "utf-8", "strict", "/:%"),
+                    url_quote(self.raw_password or "", "utf-8", "strict", "/:%"),
+                ],
+            )
+        )
+        if auth:
+            rv = "%s@%s" % (auth, rv)
+        return to_native(rv)
+
+    def encode(self, charset="utf-8", errors="replace"):
+        """Encodes the URL to a tuple made out of bytes.  The charset is
+        only being used for the path, query and fragment.
+        """
+        return BytesURL(
+            self.scheme.encode("ascii"),
+            self.encode_netloc(),
+            self.path.encode(charset, errors),
+            self.query.encode(charset, errors),
+            self.fragment.encode(charset, errors),
+        )
+
+
+class BytesURL(BaseURL):
+    """Represents a parsed URL in bytes."""
+
+    __slots__ = ()
+    _at = b"@"
+    _colon = b":"
+    _lbracket = b"["
+    _rbracket = b"]"
+
+    def __str__(self):
+        return self.to_url().decode("utf-8", "replace")
+
+    def encode_netloc(self):
+        """Returns the netloc unchanged as bytes."""
+        return self.netloc
+
+    def decode(self, charset="utf-8", errors="replace"):
+        """Decodes the URL to a tuple made out of strings.  The charset is
+        only being used for the path, query and fragment.
+        """
+        return URL(
+            self.scheme.decode("ascii"),
+            self.decode_netloc(),
+            self.path.decode(charset, errors),
+            self.query.decode(charset, errors),
+            self.fragment.decode(charset, errors),
+        )
+
+
+_unquote_maps = {frozenset(): _hextobyte}
+
+
+def _unquote_to_bytes(string, unsafe=""):
+    if isinstance(string, text_type):
+        string = string.encode("utf-8")
+
+    if isinstance(unsafe, text_type):
+        unsafe = unsafe.encode("utf-8")
+
+    unsafe = frozenset(bytearray(unsafe))
+    groups = iter(string.split(b"%"))
+    result = bytearray(next(groups, b""))
+
+    try:
+        hex_to_byte = _unquote_maps[unsafe]
+    except KeyError:
+        hex_to_byte = _unquote_maps[unsafe] = {
+            h: b for h, b in _hextobyte.items() if b not in unsafe
+        }
+
+    for group in groups:
+        code = group[:2]
+
+        if code in hex_to_byte:
+            result.append(hex_to_byte[code])
+            result.extend(group[2:])
+        else:
+            result.append(37)  # %
+            result.extend(group)
+
+    return bytes(result)
+
+
+def _url_encode_impl(obj, charset, encode_keys, sort, key):
+    iterable = iter_multi_items(obj)
+    if sort:
+        iterable = sorted(iterable, key=key)
+    for key, value in iterable:
+        if value is None:
+            continue
+        if not isinstance(key, bytes):
+            key = text_type(key).encode(charset)
+        if not isinstance(value, bytes):
+            value = text_type(value).encode(charset)
+        yield _fast_url_quote_plus(key) + "=" + _fast_url_quote_plus(value)
+
+
+def _url_unquote_legacy(value, unsafe=""):
+    try:
+        return url_unquote(value, charset="utf-8", errors="strict", unsafe=unsafe)
+    except UnicodeError:
+        return url_unquote(value, charset="latin1", unsafe=unsafe)
+
+
+def url_parse(url, scheme=None, allow_fragments=True):
+    """Parses a URL from a string into a :class:`URL` tuple.  If the URL
+    is lacking a scheme it can be provided as second argument. Otherwise,
+    it is ignored.  Optionally fragments can be stripped from the URL
+    by setting `allow_fragments` to `False`.
+
+    The inverse of this function is :func:`url_unparse`.
+
+    :param url: the URL to parse.
+    :param scheme: the default schema to use if the URL is schemaless.
+    :param allow_fragments: if set to `False` a fragment will be removed
+                            from the URL.
+    """
+    s = make_literal_wrapper(url)
+    is_text_based = isinstance(url, text_type)
+
+    if scheme is None:
+        scheme = s("")
+    netloc = query = fragment = s("")
+    i = url.find(s(":"))
+    if i > 0 and _scheme_re.match(to_native(url[:i], errors="replace")):
+        # make sure "iri" is not actually a port number (in which case
+        # "scheme" is really part of the path)
+        rest = url[i + 1 :]
+        if not rest or any(c not in s("0123456789") for c in rest):
+            # not a port number
+            scheme, url = url[:i].lower(), rest
+
+    if url[:2] == s("//"):
+        delim = len(url)
+        for c in s("/?#"):
+            wdelim = url.find(c, 2)
+            if wdelim >= 0:
+                delim = min(delim, wdelim)
+        netloc, url = url[2:delim], url[delim:]
+        if (s("[") in netloc and s("]") not in netloc) or (
+            s("]") in netloc and s("[") not in netloc
+        ):
+            raise ValueError("Invalid IPv6 URL")
+
+    if allow_fragments and s("#") in url:
+        url, fragment = url.split(s("#"), 1)
+    if s("?") in url:
+        url, query = url.split(s("?"), 1)
+
+    result_type = URL if is_text_based else BytesURL
+    return result_type(scheme, netloc, url, query, fragment)
+
+
+def _make_fast_url_quote(charset="utf-8", errors="strict", safe="/:", unsafe=""):
+    """Precompile the translation table for a URL encoding function.
+
+    Unlike :func:`url_quote`, the generated function only takes the
+    string to quote.
+
+    :param charset: The charset to encode the result with.
+    :param errors: How to handle encoding errors.
+    :param safe: An optional sequence of safe characters to never encode.
+    :param unsafe: An optional sequence of unsafe characters to always encode.
+    """
+    if isinstance(safe, text_type):
+        safe = safe.encode(charset, errors)
+
+    if isinstance(unsafe, text_type):
+        unsafe = unsafe.encode(charset, errors)
+
+    safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
+    table = [chr(c) if c in safe else "%%%02X" % c for c in range(256)]
+
+    if not PY2:
+
+        def quote(string):
+            return "".join([table[c] for c in string])
+
+    else:
+
+        def quote(string):
+            return "".join([table[c] for c in bytearray(string)])
+
+    return quote
+
+
+_fast_url_quote = _make_fast_url_quote()
+_fast_quote_plus = _make_fast_url_quote(safe=" ", unsafe="+")
+
+
+def _fast_url_quote_plus(string):
+    return _fast_quote_plus(string).replace(" ", "+")
+
+
+def url_quote(string, charset="utf-8", errors="strict", safe="/:", unsafe=""):
+    """URL encode a single string with a given encoding.
+
+    :param s: the string to quote.
+    :param charset: the charset to be used.
+    :param safe: an optional sequence of safe characters.
+    :param unsafe: an optional sequence of unsafe characters.
+
+    .. versionadded:: 0.9.2
+       The `unsafe` parameter was added.
+    """
+    if not isinstance(string, (text_type, bytes, bytearray)):
+        string = text_type(string)
+    if isinstance(string, text_type):
+        string = string.encode(charset, errors)
+    if isinstance(safe, text_type):
+        safe = safe.encode(charset, errors)
+    if isinstance(unsafe, text_type):
+        unsafe = unsafe.encode(charset, errors)
+    safe = (frozenset(bytearray(safe)) | _always_safe) - frozenset(bytearray(unsafe))
+    rv = bytearray()
+    for char in bytearray(string):
+        if char in safe:
+            rv.append(char)
+        else:
+            rv.extend(_bytetohex[char])
+    return to_native(bytes(rv))
+
+
+def url_quote_plus(string, charset="utf-8", errors="strict", safe=""):
+    """URL encode a single string with the given encoding and convert
+    whitespace to "+".
+
+    :param s: The string to quote.
+    :param charset: The charset to be used.
+    :param safe: An optional sequence of safe characters.
+    """
+    return url_quote(string, charset, errors, safe + " ", "+").replace(" ", "+")
+
+
+def url_unparse(components):
+    """The reverse operation to :meth:`url_parse`.  This accepts arbitrary
+    as well as :class:`URL` tuples and returns a URL as a string.
+
+    :param components: the parsed URL as tuple which should be converted
+                       into a URL string.
+    """
+    scheme, netloc, path, query, fragment = normalize_string_tuple(components)
+    s = make_literal_wrapper(scheme)
+    url = s("")
+
+    # We generally treat file:///x and file:/x the same which is also
+    # what browsers seem to do.  This also allows us to ignore a schema
+    # register for netloc utilization or having to differenciate between
+    # empty and missing netloc.
+    if netloc or (scheme and path.startswith(s("/"))):
+        if path and path[:1] != s("/"):
+            path = s("/") + path
+        url = s("//") + (netloc or s("")) + path
+    elif path:
+        url += path
+    if scheme:
+        url = scheme + s(":") + url
+    if query:
+        url = url + s("?") + query
+    if fragment:
+        url = url + s("#") + fragment
+    return url
+
+
+def url_unquote(string, charset="utf-8", errors="replace", unsafe=""):
+    """URL decode a single string with a given encoding.  If the charset
+    is set to `None` no unicode decoding is performed and raw bytes
+    are returned.
+
+    :param s: the string to unquote.
+    :param charset: the charset of the query string.  If set to `None`
+                    no unicode decoding will take place.
+    :param errors: the error handling for the charset decoding.
+    """
+    rv = _unquote_to_bytes(string, unsafe)
+    if charset is not None:
+        rv = rv.decode(charset, errors)
+    return rv
+
+
+def url_unquote_plus(s, charset="utf-8", errors="replace"):
+    """URL decode a single string with the given `charset` and decode "+" to
+    whitespace.
+
+    Per default encoding errors are ignored.  If you want a different behavior
+    you can set `errors` to ``'replace'`` or ``'strict'``.  In strict mode a
+    :exc:`HTTPUnicodeError` is raised.
+
+    :param s: The string to unquote.
+    :param charset: the charset of the query string.  If set to `None`
+                    no unicode decoding will take place.
+    :param errors: The error handling for the `charset` decoding.
+    """
+    if isinstance(s, text_type):
+        s = s.replace(u"+", u" ")
+    else:
+        s = s.replace(b"+", b" ")
+    return url_unquote(s, charset, errors)
+
+
+def url_fix(s, charset="utf-8"):
+    r"""Sometimes you get an URL by a user that just isn't a real URL because
+    it contains unsafe characters like ' ' and so on. This function can fix
+    some of the problems in a similar way browsers handle data entered by the
+    user:
+
+    >>> url_fix(u'http://de.wikipedia.org/wiki/Elf (Begriffskl\xe4rung)')
+    'http://de.wikipedia.org/wiki/Elf%20(Begriffskl%C3%A4rung)'
+
+    :param s: the string with the URL to fix.
+    :param charset: The target charset for the URL if the url was given as
+                    unicode string.
+    """
+    # First step is to switch to unicode processing and to convert
+    # backslashes (which are invalid in URLs anyways) to slashes.  This is
+    # consistent with what Chrome does.
+    s = to_unicode(s, charset, "replace").replace("\\", "/")
+
+    # For the specific case that we look like a malformed windows URL
+    # we want to fix this up manually:
+    if s.startswith("file://") and s[7:8].isalpha() and s[8:10] in (":/", "|/"):
+        s = "file:///" + s[7:]
+
+    url = url_parse(s)
+    path = url_quote(url.path, charset, safe="/%+$!*'(),")
+    qs = url_quote_plus(url.query, charset, safe=":&%=+$!*'(),")
+    anchor = url_quote_plus(url.fragment, charset, safe=":&%=+$!*'(),")
+    return to_native(url_unparse((url.scheme, url.encode_netloc(), path, qs, anchor)))
+
+
+# not-unreserved characters remain quoted when unquoting to IRI
+_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe])
+
+
+def _codec_error_url_quote(e):
+    """Used in :func:`uri_to_iri` after unquoting to re-quote any
+    invalid bytes.
+    """
+    out = _fast_url_quote(e.object[e.start : e.end])
+
+    if PY2:
+        out = out.decode("utf-8")
+
+    return out, e.end
+
+
+codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)
+
+
+def uri_to_iri(uri, charset="utf-8", errors="werkzeug.url_quote"):
+    """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
+    leaving all reserved and invalid characters quoted. If the URL has
+    a domain, it is decoded from Punycode.
+
+    >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
+    'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'
+
+    :param uri: The URI to convert.
+    :param charset: The encoding to encode unquoted bytes with.
+    :param errors: Error handler to use during ``bytes.encode``. By
+        default, invalid bytes are left quoted.
+
+    .. versionchanged:: 0.15
+        All reserved and invalid characters remain quoted. Previously,
+        only some reserved characters were preserved, and invalid bytes
+        were replaced instead of left quoted.
+
+    .. versionadded:: 0.6
+    """
+    if isinstance(uri, tuple):
+        uri = url_unparse(uri)
+
+    uri = url_parse(to_unicode(uri, charset))
+    path = url_unquote(uri.path, charset, errors, _to_iri_unsafe)
+    query = url_unquote(uri.query, charset, errors, _to_iri_unsafe)
+    fragment = url_unquote(uri.fragment, charset, errors, _to_iri_unsafe)
+    return url_unparse((uri.scheme, uri.decode_netloc(), path, query, fragment))
+
+
+# reserved characters remain unquoted when quoting to URI
+_to_uri_safe = ":/?#[]@!$&'()*+,;=%"
+
+
+def iri_to_uri(iri, charset="utf-8", errors="strict", safe_conversion=False):
+    """Convert an IRI to a URI. All non-ASCII and unsafe characters are
+    quoted. If the URL has a domain, it is encoded to Punycode.
+
+    >>> iri_to_uri('http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF')
+    'http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF'
+
+    :param iri: The IRI to convert.
+    :param charset: The encoding of the IRI.
+    :param errors: Error handler to use during ``bytes.encode``.
+    :param safe_conversion: Return the URL unchanged if it only contains
+        ASCII characters and no whitespace. See the explanation below.
+
+    There is a general problem with IRI conversion with some protocols
+    that are in violation of the URI specification. Consider the
+    following two IRIs::
+
+        magnet:?xt=uri:whatever
+        itms-services://?action=download-manifest
+
+    After parsing, we don't know if the scheme requires the ``//``,
+    which is dropped if empty, but conveys different meanings in the
+    final URL if it's present or not. In this case, you can use
+    ``safe_conversion``, which will return the URL unchanged if it only
+    contains ASCII characters and no whitespace. This can result in a
+    URI with unquoted characters if it was not already quoted correctly,
+    but preserves the URL's semantics. Werkzeug uses this for the
+    ``Location`` header for redirects.
+
+    .. versionchanged:: 0.15
+        All reserved characters remain unquoted. Previously, only some
+        reserved characters were left unquoted.
+
+    .. versionchanged:: 0.9.6
+       The ``safe_conversion`` parameter was added.
+
+    .. versionadded:: 0.6
+    """
+    if isinstance(iri, tuple):
+        iri = url_unparse(iri)
+
+    if safe_conversion:
+        # If we're not sure if it's safe to convert the URL, and it only
+        # contains ASCII characters, return it unconverted.
+        try:
+            native_iri = to_native(iri)
+            ascii_iri = native_iri.encode("ascii")
+
+            # Only return if it doesn't have whitespace. (Why?)
+            if len(ascii_iri.split()) == 1:
+                return native_iri
+        except UnicodeError:
+            pass
+
+    iri = url_parse(to_unicode(iri, charset, errors))
+    path = url_quote(iri.path, charset, errors, _to_uri_safe)
+    query = url_quote(iri.query, charset, errors, _to_uri_safe)
+    fragment = url_quote(iri.fragment, charset, errors, _to_uri_safe)
+    return to_native(
+        url_unparse((iri.scheme, iri.encode_netloc(), path, query, fragment))
+    )
+
+
+def url_decode(
+    s,
+    charset="utf-8",
+    decode_keys=False,
+    include_empty=True,
+    errors="replace",
+    separator="&",
+    cls=None,
+):
+    """
+    Parse a querystring and return it as :class:`MultiDict`.  There is a
+    difference in key decoding on different Python versions.  On Python 3
+    keys will always be fully decoded whereas on Python 2, keys will
+    remain bytestrings if they fit into ASCII.  On 2.x keys can be forced
+    to be unicode by setting `decode_keys` to `True`.
+
+    If the charset is set to `None` no unicode decoding will happen and
+    raw bytes will be returned.
+
+    Per default a missing value for a key will default to an empty key.  If
+    you don't want that behavior you can set `include_empty` to `False`.
+
+    Per default encoding errors are ignored.  If you want a different behavior
+    you can set `errors` to ``'replace'`` or ``'strict'``.  In strict mode a
+    `HTTPUnicodeError` is raised.
+
+    .. versionchanged:: 0.5
+       In previous versions ";" and "&" could be used for url decoding.
+       This changed in 0.5 where only "&" is supported.  If you want to
+       use ";" instead a different `separator` can be provided.
+
+       The `cls` parameter was added.
+
+    :param s: a string with the query string to decode.
+    :param charset: the charset of the query string.  If set to `None`
+                    no unicode decoding will take place.
+    :param decode_keys: Used on Python 2.x to control whether keys should
+                        be forced to be unicode objects.  If set to `True`
+                        then keys will be unicode in all cases. Otherwise,
+                        they remain `str` if they fit into ASCII.
+    :param include_empty: Set to `False` if you don't want empty values to
+                          appear in the dict.
+    :param errors: the decoding error behavior.
+    :param separator: the pair separator to be used, defaults to ``&``
+    :param cls: an optional dict class to use.  If this is not specified
+                       or `None` the default :class:`MultiDict` is used.
+    """
+    if cls is None:
+        cls = MultiDict
+    if isinstance(s, text_type) and not isinstance(separator, text_type):
+        separator = separator.decode(charset or "ascii")
+    elif isinstance(s, bytes) and not isinstance(separator, bytes):
+        separator = separator.encode(charset or "ascii")
+    return cls(
+        _url_decode_impl(
+            s.split(separator), charset, decode_keys, include_empty, errors
+        )
+    )
+
+
+def url_decode_stream(
+    stream,
+    charset="utf-8",
+    decode_keys=False,
+    include_empty=True,
+    errors="replace",
+    separator="&",
+    cls=None,
+    limit=None,
+    return_iterator=False,
+):
+    """Works like :func:`url_decode` but decodes a stream.  The behavior
+    of stream and limit follows functions like
+    :func:`~werkzeug.wsgi.make_line_iter`.  The generator of pairs is
+    directly fed to the `cls` so you can consume the data while it's
+    parsed.
+
+    .. versionadded:: 0.8
+
+    :param stream: a stream with the encoded querystring
+    :param charset: the charset of the query string.  If set to `None`
+                    no unicode decoding will take place.
+    :param decode_keys: Used on Python 2.x to control whether keys should
+                        be forced to be unicode objects.  If set to `True`,
+                        keys will be unicode in all cases. Otherwise, they
+                        remain `str` if they fit into ASCII.
+    :param include_empty: Set to `False` if you don't want empty values to
+                          appear in the dict.
+    :param errors: the decoding error behavior.
+    :param separator: the pair separator to be used, defaults to ``&``
+    :param cls: an optional dict class to use.  If this is not specified
+                       or `None` the default :class:`MultiDict` is used.
+    :param limit: the content length of the URL data.  Not necessary if
+                  a limited stream is provided.
+    :param return_iterator: if set to `True` the `cls` argument is ignored
+                            and an iterator over all decoded pairs is
+                            returned
+    """
+    from .wsgi import make_chunk_iter
+
+    pair_iter = make_chunk_iter(stream, separator, limit)
+    decoder = _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors)
+
+    if return_iterator:
+        return decoder
+
+    if cls is None:
+        cls = MultiDict
+
+    return cls(decoder)
+
+
+def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors):
+    for pair in pair_iter:
+        if not pair:
+            continue
+        s = make_literal_wrapper(pair)
+        equal = s("=")
+        if equal in pair:
+            key, value = pair.split(equal, 1)
+        else:
+            if not include_empty:
+                continue
+            key = pair
+            value = s("")
+        key = url_unquote_plus(key, charset, errors)
+        if charset is not None and PY2 and not decode_keys:
+            key = try_coerce_native(key)
+        yield key, url_unquote_plus(value, charset, errors)
+
+
+def url_encode(
+    obj, charset="utf-8", encode_keys=False, sort=False, key=None, separator=b"&"
+):
+    """URL encode a dict/`MultiDict`.  If a value is `None` it will not appear
+    in the result string.  Per default only values are encoded into the target
+    charset strings.  If `encode_keys` is set to ``True`` unicode keys are
+    supported too.
+
+    If `sort` is set to `True` the items are sorted by `key` or the default
+    sorting algorithm.
+
+    .. versionadded:: 0.5
+        `sort`, `key`, and `separator` were added.
+
+    :param obj: the object to encode into a query string.
+    :param charset: the charset of the query string.
+    :param encode_keys: set to `True` if you have unicode keys. (Ignored on
+                        Python 3.x)
+    :param sort: set to `True` if you want parameters to be sorted by `key`.
+    :param separator: the separator to be used for the pairs.
+    :param key: an optional function to be used for sorting.  For more details
+                check out the :func:`sorted` documentation.
+    """
+    separator = to_native(separator, "ascii")
+    return separator.join(_url_encode_impl(obj, charset, encode_keys, sort, key))
+
+
+def url_encode_stream(
+    obj,
+    stream=None,
+    charset="utf-8",
+    encode_keys=False,
+    sort=False,
+    key=None,
+    separator=b"&",
+):
+    """Like :meth:`url_encode` but writes the results to a stream
+    object.  If the stream is `None` a generator over all encoded
+    pairs is returned.
+
+    .. versionadded:: 0.8
+
+    :param obj: the object to encode into a query string.
+    :param stream: a stream to write the encoded object into or `None` if
+                   an iterator over the encoded pairs should be returned.  In
+                   that case the separator argument is ignored.
+    :param charset: the charset of the query string.
+    :param encode_keys: set to `True` if you have unicode keys. (Ignored on
+                        Python 3.x)
+    :param sort: set to `True` if you want parameters to be sorted by `key`.
+    :param separator: the separator to be used for the pairs.
+    :param key: an optional function to be used for sorting.  For more details
+                check out the :func:`sorted` documentation.
+    """
+    separator = to_native(separator, "ascii")
+    gen = _url_encode_impl(obj, charset, encode_keys, sort, key)
+    if stream is None:
+        return gen
+    for idx, chunk in enumerate(gen):
+        if idx:
+            stream.write(separator)
+        stream.write(chunk)
+
+
+def url_join(base, url, allow_fragments=True):
+    """Join a base URL and a possibly relative URL to form an absolute
+    interpretation of the latter.
+
+    :param base: the base URL for the join operation.
+    :param url: the URL to join.
+    :param allow_fragments: indicates whether fragments should be allowed.
+    """
+    if isinstance(base, tuple):
+        base = url_unparse(base)
+    if isinstance(url, tuple):
+        url = url_unparse(url)
+
+    base, url = normalize_string_tuple((base, url))
+    s = make_literal_wrapper(base)
+
+    if not base:
+        return url
+    if not url:
+        return base
+
+    bscheme, bnetloc, bpath, bquery, bfragment = url_parse(
+        base, allow_fragments=allow_fragments
+    )
+    scheme, netloc, path, query, fragment = url_parse(url, bscheme, allow_fragments)
+    if scheme != bscheme:
+        return url
+    if netloc:
+        return url_unparse((scheme, netloc, path, query, fragment))
+    netloc = bnetloc
+
+    if path[:1] == s("/"):
+        segments = path.split(s("/"))
+    elif not path:
+        segments = bpath.split(s("/"))
+        if not query:
+            query = bquery
+    else:
+        segments = bpath.split(s("/"))[:-1] + path.split(s("/"))
+
+    # If the rightmost part is "./" we want to keep the slash but
+    # remove the dot.
+    if segments[-1] == s("."):
+        segments[-1] = s("")
+
+    # Resolve ".." and "."
+    segments = [segment for segment in segments if segment != s(".")]
+    while 1:
+        i = 1
+        n = len(segments) - 1
+        while i < n:
+            if segments[i] == s("..") and segments[i - 1] not in (s(""), s("..")):
+                del segments[i - 1 : i + 1]
+                break
+            i += 1
+        else:
+            break
+
+    # Remove trailing ".." if the URL is absolute
+    unwanted_marker = [s(""), s("..")]
+    while segments[:2] == unwanted_marker:
+        del segments[1]
+
+    path = s("/").join(segments)
+    return url_unparse((scheme, netloc, path, query, fragment))
+
+
+class Href(object):
+    """Implements a callable that constructs URLs with the given base. The
+    function can be called with any number of positional and keyword
+    arguments which than are used to assemble the URL.  Works with URLs
+    and posix paths.
+
+    Positional arguments are appended as individual segments to
+    the path of the URL:
+
+    >>> href = Href('/foo')
+    >>> href('bar', 23)
+    '/foo/bar/23'
+    >>> href('foo', bar=23)
+    '/foo/foo?bar=23'
+
+    If any of the arguments (positional or keyword) evaluates to `None` it
+    will be skipped.  If no keyword arguments are given the last argument
+    can be a :class:`dict` or :class:`MultiDict` (or any other dict subclass),
+    otherwise the keyword arguments are used for the query parameters, cutting
+    off the first trailing underscore of the parameter name:
+
+    >>> href(is_=42)
+    '/foo?is=42'
+    >>> href({'foo': 'bar'})
+    '/foo?foo=bar'
+
+    Combining of both methods is not allowed:
+
+    >>> href({'foo': 'bar'}, bar=42)
+    Traceback (most recent call last):
+      ...
+    TypeError: keyword arguments and query-dicts can't be combined
+
+    Accessing attributes on the href object creates a new href object with
+    the attribute name as prefix:
+
+    >>> bar_href = href.bar
+    >>> bar_href("blub")
+    '/foo/bar/blub'
+
+    If `sort` is set to `True` the items are sorted by `key` or the default
+    sorting algorithm:
+
+    >>> href = Href("/", sort=True)
+    >>> href(a=1, b=2, c=3)
+    '/?a=1&b=2&c=3'
+
+    .. versionadded:: 0.5
+        `sort` and `key` were added.
+    """
+
+    def __init__(self, base="./", charset="utf-8", sort=False, key=None):
+        if not base:
+            base = "./"
+        self.base = base
+        self.charset = charset
+        self.sort = sort
+        self.key = key
+
+    def __getattr__(self, name):
+        if name[:2] == "__":
+            raise AttributeError(name)
+        base = self.base
+        if base[-1:] != "/":
+            base += "/"
+        return Href(url_join(base, name), self.charset, self.sort, self.key)
+
+    def __call__(self, *path, **query):
+        if path and isinstance(path[-1], dict):
+            if query:
+                raise TypeError("keyword arguments and query-dicts can't be combined")
+            query, path = path[-1], path[:-1]
+        elif query:
+            query = dict(
+                [(k.endswith("_") and k[:-1] or k, v) for k, v in query.items()]
+            )
+        path = "/".join(
+            [
+                to_unicode(url_quote(x, self.charset), "ascii")
+                for x in path
+                if x is not None
+            ]
+        ).lstrip("/")
+        rv = self.base
+        if path:
+            if not rv.endswith("/"):
+                rv += "/"
+            rv = url_join(rv, "./" + path)
+        if query:
+            rv += "?" + to_unicode(
+                url_encode(query, self.charset, sort=self.sort, key=self.key), "ascii"
+            )
+        return to_native(rv)
author	James Taylor <user234683@users.noreply.github.com>	2019-08-09 22:01:04 -0700
committer	James Taylor <user234683@users.noreply.github.com>	2019-08-09 22:01:04 -0700
commit	2e75c6d9603f8a5edf6495f8d4fb3115a67d823c (patch)
tree	8fb2d1bec2cf0e50c5fce6bc718f755485419db0 /python/werkzeug/urls.py
parent	cc9283ad5332f59a69a91d9d0fab299779de513c (diff)
parent	adc40bc760345a23678a01f27d7697dfd3811914 (diff)
download	yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.lz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.tar.xz yt-local-2e75c6d9603f8a5edf6495f8d4fb3115a67d823c.zip