1 files changed, 230 insertions, 0 deletions
diff --git a/python/urllib3/util/url.py b/python/urllib3/util/url.py
new file mode 100644
index 0000000..6b6f996
--- /dev/null
+++ b/python/urllib3/util/url.py
@@ -0,0 +1,230 @@
+from __future__ import absolute_import
+from collections import namedtuple
+
+from ..exceptions import LocationParseError
+
+
+url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+
+# We only want to normalize urls with an HTTP(S) scheme.
+# urllib3 infers URLs without a scheme (None) to be http.
+NORMALIZABLE_SCHEMES = ('http', 'https', None)
+
+
+class Url(namedtuple('Url', url_attrs)):
+    """
+    Datastructure for representing an HTTP URL. Used as a return value for
+    :func:`parse_url`. Both the scheme and host are normalized as they are
+    both case-insensitive according to RFC 3986.
+    """
+    __slots__ = ()
+
+    def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
+                query=None, fragment=None):
+        if path and not path.startswith('/'):
+            path = '/' + path
+        if scheme:
+            scheme = scheme.lower()
+        if host and scheme in NORMALIZABLE_SCHEMES:
+            host = host.lower()
+        return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
+                                       query, fragment)
+
+    @property
+    def hostname(self):
+        """For backwards-compatibility with urlparse. We're nice like that."""
+        return self.host
+
+    @property
+    def request_uri(self):
+        """Absolute path including the query string."""
+        uri = self.path or '/'
+
+        if self.query is not None:
+            uri += '?' + self.query
+
+        return uri
+
+    @property
+    def netloc(self):
+        """Network location including host and port"""
+        if self.port:
+            return '%s:%d' % (self.host, self.port)
+        return self.host
+
+    @property
+    def url(self):
+        """
+        Convert self into a url
+
+        This function should more or less round-trip with :func:`.parse_url`. The
+        returned url may not be exactly the same as the url inputted to
+        :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
+        with a blank port will have : removed).
+
+        Example: ::
+
+            >>> U = parse_url('http://google.com/mail/')
+            >>> U.url
+            'http://google.com/mail/'
+            >>> Url('http', 'username:password', 'host.com', 80,
+            ... '/path', 'query', 'fragment').url
+            'http://username:password@host.com:80/path?query#fragment'
+        """
+        scheme, auth, host, port, path, query, fragment = self
+        url = ''
+
+        # We use "is not None" we want things to happen with empty strings (or 0 port)
+        if scheme is not None:
+            url += scheme + '://'
+        if auth is not None:
+            url += auth + '@'
+        if host is not None:
+            url += host
+        if port is not None:
+            url += ':' + str(port)
+        if path is not None:
+            url += path
+        if query is not None:
+            url += '?' + query
+        if fragment is not None:
+            url += '#' + fragment
+
+        return url
+
+    def __str__(self):
+        return self.url
+
+
+def split_first(s, delims):
+    """
+    Given a string and an iterable of delimiters, split on the first found
+    delimiter. Return two split parts and the matched delimiter.
+
+    If not found, then the first part is the full input string.
+
+    Example::
+
+        >>> split_first('foo/bar?baz', '?/=')
+        ('foo', 'bar?baz', '/')
+        >>> split_first('foo/bar?baz', '123')
+        ('foo/bar?baz', '', None)
+
+    Scales linearly with number of delims. Not ideal for large number of delims.
+    """
+    min_idx = None
+    min_delim = None
+    for d in delims:
+        idx = s.find(d)
+        if idx < 0:
+            continue
+
+        if min_idx is None or idx < min_idx:
+            min_idx = idx
+            min_delim = d
+
+    if min_idx is None or min_idx < 0:
+        return s, '', None
+
+    return s[:min_idx], s[min_idx + 1:], min_delim
+
+
+def parse_url(url):
+    """
+    Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+    performed to parse incomplete urls. Fields not provided will be None.
+
+    Partly backwards-compatible with :mod:`urlparse`.
+
+    Example::
+
+        >>> parse_url('http://google.com/mail/')
+        Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+        >>> parse_url('google.com:80')
+        Url(scheme=None, host='google.com', port=80, path=None, ...)
+        >>> parse_url('/foo?bar')
+        Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+    """
+
+    # While this code has overlap with stdlib's urlparse, it is much
+    # simplified for our needs and less annoying.
+    # Additionally, this implementations does silly things to be optimal
+    # on CPython.
+
+    if not url:
+        # Empty
+        return Url()
+
+    scheme = None
+    auth = None
+    host = None
+    port = None
+    path = None
+    fragment = None
+    query = None
+
+    # Scheme
+    if '://' in url:
+        scheme, url = url.split('://', 1)
+
+    # Find the earliest Authority Terminator
+    # (http://tools.ietf.org/html/rfc3986#section-3.2)
+    url, path_, delim = split_first(url, ['/', '?', '#'])
+
+    if delim:
+        # Reassemble the path
+        path = delim + path_
+
+    # Auth
+    if '@' in url:
+        # Last '@' denotes end of auth part
+        auth, url = url.rsplit('@', 1)
+
+    # IPv6
+    if url and url[0] == '[':
+        host, url = url.split(']', 1)
+        host += ']'
+
+    # Port
+    if ':' in url:
+        _host, port = url.split(':', 1)
+
+        if not host:
+            host = _host
+
+        if port:
+            # If given, ports must be integers. No whitespace, no plus or
+            # minus prefixes, no non-integer digits such as ^2 (superscript).
+            if not port.isdigit():
+                raise LocationParseError(url)
+            try:
+                port = int(port)
+            except ValueError:
+                raise LocationParseError(url)
+        else:
+            # Blank ports are cool, too. (rfc3986#section-3.2.3)
+            port = None
+
+    elif not host and url:
+        host = url
+
+    if not path:
+        return Url(scheme, auth, host, port, path, query, fragment)
+
+    # Fragment
+    if '#' in path:
+        path, fragment = path.split('#', 1)
+
+    # Query
+    if '?' in path:
+        path, query = path.split('?', 1)
+
+    return Url(scheme, auth, host, port, path, query, fragment)
+
+
+def get_host(url):
+    """
+    Deprecated. Use :func:`parse_url` instead.
+    """
+    p = parse_url(url)
+    return p.scheme or 'http', p.hostname, p.port