aboutsummaryrefslogtreecommitdiffstats
path: root/python/urllib3/util
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-02-25 20:47:49 -0800
committerJames Taylor <user234683@users.noreply.github.com>2019-02-25 20:47:49 -0800
commit1e1f55c9e5d98ba076bc67e7abe9e4d77d84c65b (patch)
tree56eeabd9f7f0813c4b02f03f865d7cae35277d29 /python/urllib3/util
parentb32330be4f15dd044e6212f526e52375f0a0f6c2 (diff)
downloadyt-local-1e1f55c9e5d98ba076bc67e7abe9e4d77d84c65b.tar.lz
yt-local-1e1f55c9e5d98ba076bc67e7abe9e4d77d84c65b.tar.xz
yt-local-1e1f55c9e5d98ba076bc67e7abe9e4d77d84c65b.zip
Use persistent connections
Diffstat (limited to 'python/urllib3/util')
-rw-r--r--python/urllib3/util/__init__.py54
-rw-r--r--python/urllib3/util/connection.py134
-rw-r--r--python/urllib3/util/queue.py21
-rw-r--r--python/urllib3/util/request.py118
-rw-r--r--python/urllib3/util/response.py87
-rw-r--r--python/urllib3/util/retry.py411
-rw-r--r--python/urllib3/util/ssl_.py381
-rw-r--r--python/urllib3/util/timeout.py242
-rw-r--r--python/urllib3/util/url.py230
-rw-r--r--python/urllib3/util/wait.py150
10 files changed, 1828 insertions, 0 deletions
diff --git a/python/urllib3/util/__init__.py b/python/urllib3/util/__init__.py
new file mode 100644
index 0000000..2f2770b
--- /dev/null
+++ b/python/urllib3/util/__init__.py
@@ -0,0 +1,54 @@
+from __future__ import absolute_import
+# For backwards compatibility, provide imports that used to be here.
+from .connection import is_connection_dropped
+from .request import make_headers
+from .response import is_fp_closed
+from .ssl_ import (
+ SSLContext,
+ HAS_SNI,
+ IS_PYOPENSSL,
+ IS_SECURETRANSPORT,
+ assert_fingerprint,
+ resolve_cert_reqs,
+ resolve_ssl_version,
+ ssl_wrap_socket,
+)
+from .timeout import (
+ current_time,
+ Timeout,
+)
+
+from .retry import Retry
+from .url import (
+ get_host,
+ parse_url,
+ split_first,
+ Url,
+)
+from .wait import (
+ wait_for_read,
+ wait_for_write
+)
+
+__all__ = (
+ 'HAS_SNI',
+ 'IS_PYOPENSSL',
+ 'IS_SECURETRANSPORT',
+ 'SSLContext',
+ 'Retry',
+ 'Timeout',
+ 'Url',
+ 'assert_fingerprint',
+ 'current_time',
+ 'is_connection_dropped',
+ 'is_fp_closed',
+ 'get_host',
+ 'parse_url',
+ 'make_headers',
+ 'resolve_cert_reqs',
+ 'resolve_ssl_version',
+ 'split_first',
+ 'ssl_wrap_socket',
+ 'wait_for_read',
+ 'wait_for_write'
+)
diff --git a/python/urllib3/util/connection.py b/python/urllib3/util/connection.py
new file mode 100644
index 0000000..5ad70b2
--- /dev/null
+++ b/python/urllib3/util/connection.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import
+import socket
+from .wait import NoWayToWaitForSocketError, wait_for_read
+from ..contrib import _appengine_environ
+
+
+def is_connection_dropped(conn): # Platform-specific
+ """
+ Returns True if the connection is dropped and should be closed.
+
+ :param conn:
+ :class:`httplib.HTTPConnection` object.
+
+ Note: For platforms like AppEngine, this will always return ``False`` to
+ let the platform handle connection recycling transparently for us.
+ """
+ sock = getattr(conn, 'sock', False)
+ if sock is False: # Platform-specific: AppEngine
+ return False
+ if sock is None: # Connection already closed (such as by httplib).
+ return True
+ try:
+ # Returns True if readable, which here means it's been dropped
+ return wait_for_read(sock, timeout=0.0)
+ except NoWayToWaitForSocketError: # Platform-specific: AppEngine
+ return False
+
+
+# This function is copied from socket.py in the Python 2.7 standard
+# library test suite. Added to its signature is only `socket_options`.
+# One additional modification is that we avoid binding to IPv6 servers
+# discovered in DNS if the system doesn't have IPv6 functionality.
+def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, socket_options=None):
+ """Connect to *address* and return the socket object.
+
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
+ port)``) and return the socket object. Passing the optional
+ *timeout* parameter will set the timeout on the socket instance
+ before attempting to connect. If no *timeout* is supplied, the
+ global default timeout setting returned by :func:`getdefaulttimeout`
+ is used. If *source_address* is set it must be a tuple of (host, port)
+ for the socket to bind as a source address before making the connection.
+ An host of '' or port 0 tells the OS to use the default.
+ """
+
+ host, port = address
+ if host.startswith('['):
+ host = host.strip('[]')
+ err = None
+
+ # Using the value from allowed_gai_family() in the context of getaddrinfo lets
+ # us select whether to work with IPv4 DNS records, IPv6 records, or both.
+ # The original create_connection function always returns all records.
+ family = allowed_gai_family()
+
+ for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+
+ # If provided, set socket level options before connecting.
+ _set_socket_options(sock, socket_options)
+
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+
+ except socket.error as e:
+ err = e
+ if sock is not None:
+ sock.close()
+ sock = None
+
+ if err is not None:
+ raise err
+
+ raise socket.error("getaddrinfo returns an empty list")
+
+
+def _set_socket_options(sock, options):
+ if options is None:
+ return
+
+ for opt in options:
+ sock.setsockopt(*opt)
+
+
+def allowed_gai_family():
+ """This function is designed to work in the context of
+ getaddrinfo, where family=socket.AF_UNSPEC is the default and
+ will perform a DNS search for both IPv6 and IPv4 records."""
+
+ family = socket.AF_INET
+ if HAS_IPV6:
+ family = socket.AF_UNSPEC
+ return family
+
+
+def _has_ipv6(host):
+ """ Returns True if the system can bind an IPv6 address. """
+ sock = None
+ has_ipv6 = False
+
+ # App Engine doesn't support IPV6 sockets and actually has a quota on the
+ # number of sockets that can be used, so just early out here instead of
+ # creating a socket needlessly.
+ # See https://github.com/urllib3/urllib3/issues/1446
+ if _appengine_environ.is_appengine_sandbox():
+ return False
+
+ if socket.has_ipv6:
+ # has_ipv6 returns true if cPython was compiled with IPv6 support.
+ # It does not tell us if the system has IPv6 support enabled. To
+ # determine that we must bind to an IPv6 address.
+ # https://github.com/shazow/urllib3/pull/611
+ # https://bugs.python.org/issue658327
+ try:
+ sock = socket.socket(socket.AF_INET6)
+ sock.bind((host, 0))
+ has_ipv6 = True
+ except Exception:
+ pass
+
+ if sock:
+ sock.close()
+ return has_ipv6
+
+
+HAS_IPV6 = _has_ipv6('::1')
diff --git a/python/urllib3/util/queue.py b/python/urllib3/util/queue.py
new file mode 100644
index 0000000..d3d379a
--- /dev/null
+++ b/python/urllib3/util/queue.py
@@ -0,0 +1,21 @@
+import collections
+from ..packages import six
+from ..packages.six.moves import queue
+
+if six.PY2:
+ # Queue is imported for side effects on MS Windows. See issue #229.
+ import Queue as _unused_module_Queue # noqa: F401
+
+
+class LifoQueue(queue.Queue):
+ def _init(self, _):
+ self.queue = collections.deque()
+
+ def _qsize(self, len=len):
+ return len(self.queue)
+
+ def _put(self, item):
+ self.queue.append(item)
+
+ def _get(self):
+ return self.queue.pop()
diff --git a/python/urllib3/util/request.py b/python/urllib3/util/request.py
new file mode 100644
index 0000000..3ddfcd5
--- /dev/null
+++ b/python/urllib3/util/request.py
@@ -0,0 +1,118 @@
+from __future__ import absolute_import
+from base64 import b64encode
+
+from ..packages.six import b, integer_types
+from ..exceptions import UnrewindableBodyError
+
+ACCEPT_ENCODING = 'gzip,deflate'
+_FAILEDTELL = object()
+
+
+def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
+ basic_auth=None, proxy_basic_auth=None, disable_cache=None):
+ """
+ Shortcuts for generating request headers.
+
+ :param keep_alive:
+ If ``True``, adds 'connection: keep-alive' header.
+
+ :param accept_encoding:
+ Can be a boolean, list, or string.
+ ``True`` translates to 'gzip,deflate'.
+ List will get joined by comma.
+ String will be used as provided.
+
+ :param user_agent:
+ String representing the user-agent you want, such as
+ "python-urllib3/0.6"
+
+ :param basic_auth:
+ Colon-separated username:password string for 'authorization: basic ...'
+ auth header.
+
+ :param proxy_basic_auth:
+ Colon-separated username:password string for 'proxy-authorization: basic ...'
+ auth header.
+
+ :param disable_cache:
+ If ``True``, adds 'cache-control: no-cache' header.
+
+ Example::
+
+ >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
+ {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
+ >>> make_headers(accept_encoding=True)
+ {'accept-encoding': 'gzip,deflate'}
+ """
+ headers = {}
+ if accept_encoding:
+ if isinstance(accept_encoding, str):
+ pass
+ elif isinstance(accept_encoding, list):
+ accept_encoding = ','.join(accept_encoding)
+ else:
+ accept_encoding = ACCEPT_ENCODING
+ headers['accept-encoding'] = accept_encoding
+
+ if user_agent:
+ headers['user-agent'] = user_agent
+
+ if keep_alive:
+ headers['connection'] = 'keep-alive'
+
+ if basic_auth:
+ headers['authorization'] = 'Basic ' + \
+ b64encode(b(basic_auth)).decode('utf-8')
+
+ if proxy_basic_auth:
+ headers['proxy-authorization'] = 'Basic ' + \
+ b64encode(b(proxy_basic_auth)).decode('utf-8')
+
+ if disable_cache:
+ headers['cache-control'] = 'no-cache'
+
+ return headers
+
+
+def set_file_position(body, pos):
+ """
+ If a position is provided, move file to that point.
+ Otherwise, we'll attempt to record a position for future use.
+ """
+ if pos is not None:
+ rewind_body(body, pos)
+ elif getattr(body, 'tell', None) is not None:
+ try:
+ pos = body.tell()
+ except (IOError, OSError):
+ # This differentiates from None, allowing us to catch
+ # a failed `tell()` later when trying to rewind the body.
+ pos = _FAILEDTELL
+
+ return pos
+
+
+def rewind_body(body, body_pos):
+ """
+ Attempt to rewind body to a certain position.
+ Primarily used for request redirects and retries.
+
+ :param body:
+ File-like object that supports seek.
+
+ :param int pos:
+ Position to seek to in file.
+ """
+ body_seek = getattr(body, 'seek', None)
+ if body_seek is not None and isinstance(body_pos, integer_types):
+ try:
+ body_seek(body_pos)
+ except (IOError, OSError):
+ raise UnrewindableBodyError("An error occurred when rewinding request "
+ "body for redirect/retry.")
+ elif body_pos is _FAILEDTELL:
+ raise UnrewindableBodyError("Unable to record file position for rewinding "
+ "request body during a redirect/retry.")
+ else:
+ raise ValueError("body_pos must be of type integer, "
+ "instead it was %s." % type(body_pos))
diff --git a/python/urllib3/util/response.py b/python/urllib3/util/response.py
new file mode 100644
index 0000000..3d54864
--- /dev/null
+++ b/python/urllib3/util/response.py
@@ -0,0 +1,87 @@
+from __future__ import absolute_import
+from ..packages.six.moves import http_client as httplib
+
+from ..exceptions import HeaderParsingError
+
+
+def is_fp_closed(obj):
+ """
+ Checks whether a given file-like object is closed.
+
+ :param obj:
+ The file-like object to check.
+ """
+
+ try:
+ # Check `isclosed()` first, in case Python3 doesn't set `closed`.
+ # GH Issue #928
+ return obj.isclosed()
+ except AttributeError:
+ pass
+
+ try:
+ # Check via the official file-like-object way.
+ return obj.closed
+ except AttributeError:
+ pass
+
+ try:
+ # Check if the object is a container for another file-like object that
+ # gets released on exhaustion (e.g. HTTPResponse).
+ return obj.fp is None
+ except AttributeError:
+ pass
+
+ raise ValueError("Unable to determine whether fp is closed.")
+
+
+def assert_header_parsing(headers):
+ """
+ Asserts whether all headers have been successfully parsed.
+ Extracts encountered errors from the result of parsing headers.
+
+ Only works on Python 3.
+
+ :param headers: Headers to verify.
+ :type headers: `httplib.HTTPMessage`.
+
+ :raises urllib3.exceptions.HeaderParsingError:
+ If parsing errors are found.
+ """
+
+ # This will fail silently if we pass in the wrong kind of parameter.
+ # To make debugging easier add an explicit check.
+ if not isinstance(headers, httplib.HTTPMessage):
+ raise TypeError('expected httplib.Message, got {0}.'.format(
+ type(headers)))
+
+ defects = getattr(headers, 'defects', None)
+ get_payload = getattr(headers, 'get_payload', None)
+
+ unparsed_data = None
+ if get_payload:
+ # get_payload is actually email.message.Message.get_payload;
+ # we're only interested in the result if it's not a multipart message
+ if not headers.is_multipart():
+ payload = get_payload()
+
+ if isinstance(payload, (bytes, str)):
+ unparsed_data = payload
+
+ if defects or unparsed_data:
+ raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
+
+
+def is_response_to_head(response):
+ """
+ Checks whether the request of a response has been a HEAD-request.
+ Handles the quirks of AppEngine.
+
+ :param conn:
+ :type conn: :class:`httplib.HTTPResponse`
+ """
+ # FIXME: Can we do this somehow without accessing private httplib _method?
+ method = response._method
+ if isinstance(method, int): # Platform-specific: Appengine
+ return method == 3
+ return method.upper() == 'HEAD'
diff --git a/python/urllib3/util/retry.py b/python/urllib3/util/retry.py
new file mode 100644
index 0000000..e7d0abd
--- /dev/null
+++ b/python/urllib3/util/retry.py
@@ -0,0 +1,411 @@
+from __future__ import absolute_import
+import time
+import logging
+from collections import namedtuple
+from itertools import takewhile
+import email
+import re
+
+from ..exceptions import (
+ ConnectTimeoutError,
+ MaxRetryError,
+ ProtocolError,
+ ReadTimeoutError,
+ ResponseError,
+ InvalidHeader,
+)
+from ..packages import six
+
+
+log = logging.getLogger(__name__)
+
+
+# Data structure for representing the metadata of requests that result in a retry.
+RequestHistory = namedtuple('RequestHistory', ["method", "url", "error",
+ "status", "redirect_location"])
+
+
+class Retry(object):
+ """ Retry configuration.
+
+ Each retry attempt will create a new Retry object with updated values, so
+ they can be safely reused.
+
+ Retries can be defined as a default for a pool::
+
+ retries = Retry(connect=5, read=2, redirect=5)
+ http = PoolManager(retries=retries)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', retries=Retry(10))
+
+ Retries can be disabled by passing ``False``::
+
+ response = http.request('GET', 'http://example.com/', retries=False)
+
+ Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
+ retries are disabled, in which case the causing exception will be raised.
+
+ :param int total:
+ Total number of retries to allow. Takes precedence over other counts.
+
+ Set to ``None`` to remove this constraint and fall back on other
+ counts. It's a good idea to set this to some sensibly-high value to
+ account for unexpected edge cases and avoid infinite retry loops.
+
+ Set to ``0`` to fail on the first retry.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param int connect:
+ How many connection-related errors to retry on.
+
+ These are errors raised before the request is sent to the remote server,
+ which we assume has not triggered the server to process the request.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int read:
+ How many times to retry on read errors.
+
+ These errors are raised after the request was sent to the server, so the
+ request may have side-effects.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param int redirect:
+ How many redirects to perform. Limit this to avoid infinite redirect
+ loops.
+
+ A redirect is a HTTP response with a status code 301, 302, 303, 307 or
+ 308.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ Set to ``False`` to disable and imply ``raise_on_redirect=False``.
+
+ :param int status:
+ How many times to retry on bad status codes.
+
+ These are retries made on responses, where status code matches
+ ``status_forcelist``.
+
+ Set to ``0`` to fail on the first retry of this type.
+
+ :param iterable method_whitelist:
+ Set of uppercased HTTP method verbs that we should retry on.
+
+ By default, we only retry on methods which are considered to be
+ idempotent (multiple requests with the same parameters end with the
+ same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
+
+ Set to a ``False`` value to retry on any verb.
+
+ :param iterable status_forcelist:
+ A set of integer HTTP status codes that we should force a retry on.
+ A retry is initiated if the request method is in ``method_whitelist``
+ and the response status code is in ``status_forcelist``.
+
+ By default, this is disabled with ``None``.
+
+ :param float backoff_factor:
+ A backoff factor to apply between attempts after the second try
+ (most errors are resolved immediately by a second try without a
+ delay). urllib3 will sleep for::
+
+ {backoff factor} * (2 ** ({number of total retries} - 1))
+
+ seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
+ for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer
+ than :attr:`Retry.BACKOFF_MAX`.
+
+ By default, backoff is disabled (set to 0).
+
+ :param bool raise_on_redirect: Whether, if the number of redirects is
+ exhausted, to raise a MaxRetryError, or to return a response with a
+ response code in the 3xx range.
+
+ :param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
+ whether we should raise an exception, or return a response,
+ if status falls in ``status_forcelist`` range and retries have
+ been exhausted.
+
+ :param tuple history: The history of the request encountered during
+ each call to :meth:`~Retry.increment`. The list is in the order
+ the requests occurred. Each list item is of class :class:`RequestHistory`.
+
+ :param bool respect_retry_after_header:
+ Whether to respect Retry-After header on status codes defined as
+ :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
+
+ :param iterable remove_headers_on_redirect:
+ Sequence of headers to remove from the request when a response
+ indicating a redirect is returned before firing off the redirected
+ request.
+ """
+
+ DEFAULT_METHOD_WHITELIST = frozenset([
+ 'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
+
+ RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
+
+ DEFAULT_REDIRECT_HEADERS_BLACKLIST = frozenset(['Authorization'])
+
+ #: Maximum backoff time.
+ BACKOFF_MAX = 120
+
+ def __init__(self, total=10, connect=None, read=None, redirect=None, status=None,
+ method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
+ backoff_factor=0, raise_on_redirect=True, raise_on_status=True,
+ history=None, respect_retry_after_header=True,
+ remove_headers_on_redirect=DEFAULT_REDIRECT_HEADERS_BLACKLIST):
+
+ self.total = total
+ self.connect = connect
+ self.read = read
+ self.status = status
+
+ if redirect is False or total is False:
+ redirect = 0
+ raise_on_redirect = False
+
+ self.redirect = redirect
+ self.status_forcelist = status_forcelist or set()
+ self.method_whitelist = method_whitelist
+ self.backoff_factor = backoff_factor
+ self.raise_on_redirect = raise_on_redirect
+ self.raise_on_status = raise_on_status
+ self.history = history or tuple()
+ self.respect_retry_after_header = respect_retry_after_header
+ self.remove_headers_on_redirect = remove_headers_on_redirect
+
+ def new(self, **kw):
+ params = dict(
+ total=self.total,
+ connect=self.connect, read=self.read, redirect=self.redirect, status=self.status,
+ method_whitelist=self.method_whitelist,
+ status_forcelist=self.status_forcelist,
+ backoff_factor=self.backoff_factor,
+ raise_on_redirect=self.raise_on_redirect,
+ raise_on_status=self.raise_on_status,
+ history=self.history,
+ remove_headers_on_redirect=self.remove_headers_on_redirect
+ )
+ params.update(kw)
+ return type(self)(**params)
+
+ @classmethod
+ def from_int(cls, retries, redirect=True, default=None):
+ """ Backwards-compatibility for the old retries format."""
+ if retries is None:
+ retries = default if default is not None else cls.DEFAULT
+
+ if isinstance(retries, Retry):
+ return retries
+
+ redirect = bool(redirect) and None
+ new_retries = cls(retries, redirect=redirect)
+ log.debug("Converted retries value: %r -> %r", retries, new_retries)
+ return new_retries
+
+ def get_backoff_time(self):
+ """ Formula for computing the current backoff
+
+ :rtype: float
+ """
+ # We want to consider only the last consecutive errors sequence (Ignore redirects).
+ consecutive_errors_len = len(list(takewhile(lambda x: x.redirect_location is None,
+ reversed(self.history))))
+ if consecutive_errors_len <= 1:
+ return 0
+
+ backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
+ return min(self.BACKOFF_MAX, backoff_value)
+
+ def parse_retry_after(self, retry_after):
+ # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
+ if re.match(r"^\s*[0-9]+\s*$", retry_after):
+ seconds = int(retry_after)
+ else:
+ retry_date_tuple = email.utils.parsedate(retry_after)
+ if retry_date_tuple is None:
+ raise InvalidHeader("Invalid Retry-After header: %s" % retry_after)
+ retry_date = time.mktime(retry_date_tuple)
+ seconds = retry_date - time.time()
+
+ if seconds < 0:
+ seconds = 0
+
+ return seconds
+
+ def get_retry_after(self, response):
+ """ Get the value of Retry-After in seconds. """
+
+ retry_after = response.getheader("Retry-After")
+
+ if retry_after is None:
+ return None
+
+ return self.parse_retry_after(retry_after)
+
+ def sleep_for_retry(self, response=None):
+ retry_after = self.get_retry_after(response)
+ if retry_after:
+ time.sleep(retry_after)
+ return True
+
+ return False
+
+ def _sleep_backoff(self):
+ backoff = self.get_backoff_time()
+ if backoff <= 0:
+ return
+ time.sleep(backoff)
+
+ def sleep(self, response=None):
+ """ Sleep between retry attempts.
+
+ This method will respect a server's ``Retry-After`` response header
+ and sleep the duration of the time requested. If that is not present, it
+ will use an exponential backoff. By default, the backoff factor is 0 and
+ this method will return immediately.
+ """
+
+ if response:
+ slept = self.sleep_for_retry(response)
+ if slept:
+ return
+
+ self._sleep_backoff()
+
+ def _is_connection_error(self, err):
+ """ Errors when we're fairly sure that the server did not receive the
+ request, so it should be safe to retry.
+ """
+ return isinstance(err, ConnectTimeoutError)
+
+ def _is_read_error(self, err):
+ """ Errors that occur after the request has been started, so we should
+ assume that the server began processing it.
+ """
+ return isinstance(err, (ReadTimeoutError, ProtocolError))
+
+ def _is_method_retryable(self, method):
+ """ Checks if a given HTTP method should be retried upon, depending if
+ it is included on the method whitelist.
+ """
+ if self.method_whitelist and method.upper() not in self.method_whitelist:
+ return False
+
+ return True
+
+ def is_retry(self, method, status_code, has_retry_after=False):
+ """ Is this method/status code retryable? (Based on whitelists and control
+ variables such as the number of total retries to allow, whether to
+ respect the Retry-After header, whether this header is present, and
+ whether the returned status code is on the list of status codes to
+ be retried upon on the presence of the aforementioned header)
+ """
+ if not self._is_method_retryable(method):
+ return False
+
+ if self.status_forcelist and status_code in self.status_forcelist:
+ return True
+
+ return (self.total and self.respect_retry_after_header and
+ has_retry_after and (status_code in self.RETRY_AFTER_STATUS_CODES))
+
+ def is_exhausted(self):
+ """ Are we out of retries? """
+ retry_counts = (self.total, self.connect, self.read, self.redirect, self.status)
+ retry_counts = list(filter(None, retry_counts))
+ if not retry_counts:
+ return False
+
+ return min(retry_counts) < 0
+
+ def increment(self, method=None, url=None, response=None, error=None,
+ _pool=None, _stacktrace=None):
+ """ Return a new Retry object with incremented retry counters.
+
+ :param response: A response object, or None, if the server did not
+ return a response.
+ :type response: :class:`~urllib3.response.HTTPResponse`
+ :param Exception error: An error encountered during the request, or
+ None if the response was received successfully.
+
+ :return: A new ``Retry`` object.
+ """
+ if self.total is False and error:
+ # Disabled, indicate to re-raise the error.
+ raise six.reraise(type(error), error, _stacktrace)
+
+ total = self.total
+ if total is not None:
+ total -= 1
+
+ connect = self.connect
+ read = self.read
+ redirect = self.redirect
+ status_count = self.status
+ cause = 'unknown'
+ status = None
+ redirect_location = None
+
+ if error and self._is_connection_error(error):
+ # Connect retry?
+ if connect is False:
+ raise six.reraise(type(error), error, _stacktrace)
+ elif connect is not None:
+ connect -= 1
+
+ elif error and self._is_read_error(error):
+ # Read retry?
+ if read is False or not self._is_method_retryable(method):
+ raise six.reraise(type(error), error, _stacktrace)
+ elif read is not None:
+ read -= 1
+
+ elif response and response.get_redirect_location():
+ # Redirect retry?
+ if redirect is not None:
+ redirect -= 1
+ cause = 'too many redirects'
+ redirect_location = response.get_redirect_location()
+ status = response.status
+
+ else:
+ # Incrementing because of a server error like a 500 in
+ # status_forcelist and a the given method is in the whitelist
+ cause = ResponseError.GENERIC_ERROR
+ if response and response.status:
+ if status_count is not None:
+ status_count -= 1
+ cause = ResponseError.SPECIFIC_ERROR.format(
+ status_code=response.status)
+ status = response.status
+
+ history = self.history + (RequestHistory(method, url, error, status, redirect_location),)
+
+ new_retry = self.new(
+ total=total,
+ connect=connect, read=read, redirect=redirect, status=status_count,
+ history=history)
+
+ if new_retry.is_exhausted():
+ raise MaxRetryError(_pool, url, error or ResponseError(cause))
+
+ log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
+
+ return new_retry
+
+ def __repr__(self):
+ return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
+ 'read={self.read}, redirect={self.redirect}, status={self.status})').format(
+ cls=type(self), self=self)
+
+
+# For backwards compatibility (equivalent to pre-v1.9):
+Retry.DEFAULT = Retry(3)
diff --git a/python/urllib3/util/ssl_.py b/python/urllib3/util/ssl_.py
new file mode 100644
index 0000000..64ea192
--- /dev/null
+++ b/python/urllib3/util/ssl_.py
@@ -0,0 +1,381 @@
+from __future__ import absolute_import
+import errno
+import warnings
+import hmac
+import socket
+
+from binascii import hexlify, unhexlify
+from hashlib import md5, sha1, sha256
+
+from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning
+from ..packages import six
+
+
+SSLContext = None
+HAS_SNI = False
+IS_PYOPENSSL = False
+IS_SECURETRANSPORT = False
+
+# Maps the length of a digest to a possible hash function producing this digest
+HASHFUNC_MAP = {
+ 32: md5,
+ 40: sha1,
+ 64: sha256,
+}
+
+
+def _const_compare_digest_backport(a, b):
+ """
+ Compare two digests of equal length in constant time.
+
+ The digests must be of type str/bytes.
+ Returns True if the digests match, and False otherwise.
+ """
+ result = abs(len(a) - len(b))
+ for l, r in zip(bytearray(a), bytearray(b)):
+ result |= l ^ r
+ return result == 0
+
+
+_const_compare_digest = getattr(hmac, 'compare_digest',
+ _const_compare_digest_backport)
+
+
+try: # Test for SSL features
+ import ssl
+ from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
+ from ssl import HAS_SNI # Has SNI?
+except ImportError:
+ pass
+
+
+try:
+ from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
+except ImportError:
+ OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
+ OP_NO_COMPRESSION = 0x20000
+
+
+# Python 2.7 doesn't have inet_pton on non-Linux so we fallback on inet_aton in
+# those cases. This means that we can only detect IPv4 addresses in this case.
+if hasattr(socket, 'inet_pton'):
+ inet_pton = socket.inet_pton
+else:
+ # Maybe we can use ipaddress if the user has urllib3[secure]?
+ try:
+ import ipaddress
+
+ def inet_pton(_, host):
+ if isinstance(host, bytes):
+ host = host.decode('ascii')
+ return ipaddress.ip_address(host)
+
+ except ImportError: # Platform-specific: Non-Linux
+ def inet_pton(_, host):
+ return socket.inet_aton(host)
+
+
+# A secure default.
+# Sources for more information on TLS ciphers:
+#
+# - https://wiki.mozilla.org/Security/Server_Side_TLS
+# - https://www.ssllabs.com/projects/best-practices/index.html
+# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
+#
+# The general intent is:
+# - Prefer TLS 1.3 cipher suites
+# - prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
+# - prefer ECDHE over DHE for better performance,
+# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and
+# security,
+# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common,
+# - disable NULL authentication, MD5 MACs and DSS for security reasons.
+DEFAULT_CIPHERS = ':'.join([
+ 'TLS13-AES-256-GCM-SHA384',
+ 'TLS13-CHACHA20-POLY1305-SHA256',
+ 'TLS13-AES-128-GCM-SHA256',
+ 'ECDH+AESGCM',
+ 'ECDH+CHACHA20',
+ 'DH+AESGCM',
+ 'DH+CHACHA20',
+ 'ECDH+AES256',
+ 'DH+AES256',
+ 'ECDH+AES128',
+ 'DH+AES',
+ 'RSA+AESGCM',
+ 'RSA+AES',
+ '!aNULL',
+ '!eNULL',
+ '!MD5',
+])
+
+try:
+ from ssl import SSLContext # Modern SSL?
+except ImportError:
+ import sys
+
+ class SSLContext(object): # Platform-specific: Python 2
+ def __init__(self, protocol_version):
+ self.protocol = protocol_version
+ # Use default values from a real SSLContext
+ self.check_hostname = False
+ self.verify_mode = ssl.CERT_NONE
+ self.ca_certs = None
+ self.options = 0
+ self.certfile = None
+ self.keyfile = None
+ self.ciphers = None
+
+ def load_cert_chain(self, certfile, keyfile):
+ self.certfile = certfile
+ self.keyfile = keyfile
+
+ def load_verify_locations(self, cafile=None, capath=None):
+ self.ca_certs = cafile
+
+ if capath is not None:
+ raise SSLError("CA directories not supported in older Pythons")
+
+ def set_ciphers(self, cipher_suite):
+ self.ciphers = cipher_suite
+
+ def wrap_socket(self, socket, server_hostname=None, server_side=False):
+ warnings.warn(
+ 'A true SSLContext object is not available. This prevents '
+ 'urllib3 from configuring SSL appropriately and may cause '
+ 'certain SSL connections to fail. You can upgrade to a newer '
+ 'version of Python to solve this. For more information, see '
+ 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
+ '#ssl-warnings',
+ InsecurePlatformWarning
+ )
+ kwargs = {
+ 'keyfile': self.keyfile,
+ 'certfile': self.certfile,
+ 'ca_certs': self.ca_certs,
+ 'cert_reqs': self.verify_mode,
+ 'ssl_version': self.protocol,
+ 'server_side': server_side,
+ }
+ return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
+
+
+def assert_fingerprint(cert, fingerprint):
+ """
+ Checks if given fingerprint matches the supplied certificate.
+
+ :param cert:
+ Certificate as bytes object.
+ :param fingerprint:
+ Fingerprint as string of hexdigits, can be interspersed by colons.
+ """
+
+ fingerprint = fingerprint.replace(':', '').lower()
+ digest_length = len(fingerprint)
+ hashfunc = HASHFUNC_MAP.get(digest_length)
+ if not hashfunc:
+ raise SSLError(
+ 'Fingerprint of invalid length: {0}'.format(fingerprint))
+
+ # We need encode() here for py32; works on py2 and p33.
+ fingerprint_bytes = unhexlify(fingerprint.encode())
+
+ cert_digest = hashfunc(cert).digest()
+
+ if not _const_compare_digest(cert_digest, fingerprint_bytes):
+ raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
+ .format(fingerprint, hexlify(cert_digest)))
+
+
+def resolve_cert_reqs(candidate):
+ """
+ Resolves the argument to a numeric constant, which can be passed to
+ the wrap_socket function/method from the ssl module.
+ Defaults to :data:`ssl.CERT_NONE`.
+ If given a string it is assumed to be the name of the constant in the
+ :mod:`ssl` module or its abbreviation.
+ (So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
+ If it's neither `None` nor a string we assume it is already the numeric
+ constant which can directly be passed to wrap_socket.
+ """
+ if candidate is None:
+ return CERT_NONE
+
+ if isinstance(candidate, str):
+ res = getattr(ssl, candidate, None)
+ if res is None:
+ res = getattr(ssl, 'CERT_' + candidate)
+ return res
+
+ return candidate
+
+
+def resolve_ssl_version(candidate):
+ """
+ like resolve_cert_reqs
+ """
+ if candidate is None:
+ return PROTOCOL_SSLv23
+
+ if isinstance(candidate, str):
+ res = getattr(ssl, candidate, None)
+ if res is None:
+ res = getattr(ssl, 'PROTOCOL_' + candidate)
+ return res
+
+ return candidate
+
+
+def create_urllib3_context(ssl_version=None, cert_reqs=None,
+ options=None, ciphers=None):
+ """All arguments have the same meaning as ``ssl_wrap_socket``.
+
+ By default, this function does a lot of the same work that
+ ``ssl.create_default_context`` does on Python 3.4+. It:
+
+ - Disables SSLv2, SSLv3, and compression
+ - Sets a restricted set of server ciphers
+
+ If you wish to enable SSLv3, you can do::
+
+ from urllib3.util import ssl_
+ context = ssl_.create_urllib3_context()
+ context.options &= ~ssl_.OP_NO_SSLv3
+
+ You can do the same to enable compression (substituting ``COMPRESSION``
+ for ``SSLv3`` in the last line above).
+
+ :param ssl_version:
+ The desired protocol version to use. This will default to
+ PROTOCOL_SSLv23 which will negotiate the highest protocol that both
+ the server and your installation of OpenSSL support.
+ :param cert_reqs:
+ Whether to require the certificate verification. This defaults to
+ ``ssl.CERT_REQUIRED``.
+ :param options:
+ Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
+ ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
+ :param ciphers:
+ Which cipher suites to allow the server to select.
+ :returns:
+ Constructed SSLContext object with specified options
+ :rtype: SSLContext
+ """
+ context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23)
+
+ context.set_ciphers(ciphers or DEFAULT_CIPHERS)
+
+ # Setting the default here, as we may have no ssl module on import
+ cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
+
+ if options is None:
+ options = 0
+ # SSLv2 is easily broken and is considered harmful and dangerous
+ options |= OP_NO_SSLv2
+ # SSLv3 has several problems and is now dangerous
+ options |= OP_NO_SSLv3
+ # Disable compression to prevent CRIME attacks for OpenSSL 1.0+
+ # (issue #309)
+ options |= OP_NO_COMPRESSION
+
+ context.options |= options
+
+ context.verify_mode = cert_reqs
+ if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2
+ # We do our own verification, including fingerprints and alternative
+ # hostnames. So disable it here
+ context.check_hostname = False
+ return context
+
+
+def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
+ ca_certs=None, server_hostname=None,
+ ssl_version=None, ciphers=None, ssl_context=None,
+ ca_cert_dir=None):
+ """
+ All arguments except for server_hostname, ssl_context, and ca_cert_dir have
+ the same meaning as they do when using :func:`ssl.wrap_socket`.
+
+ :param server_hostname:
+ When SNI is supported, the expected hostname of the certificate
+ :param ssl_context:
+ A pre-made :class:`SSLContext` object. If none is provided, one will
+ be created using :func:`create_urllib3_context`.
+ :param ciphers:
+ A string of ciphers we wish the client to support.
+ :param ca_cert_dir:
+ A directory containing CA certificates in multiple separate files, as
+ supported by OpenSSL's -CApath flag or the capath argument to
+ SSLContext.load_verify_locations().
+ """
+ context = ssl_context
+ if context is None:
+ # Note: This branch of code and all the variables in it are no longer
+ # used by urllib3 itself. We should consider deprecating and removing
+ # this code.
+ context = create_urllib3_context(ssl_version, cert_reqs,
+ ciphers=ciphers)
+
+ if ca_certs or ca_cert_dir:
+ try:
+ context.load_verify_locations(ca_certs, ca_cert_dir)
+ except IOError as e: # Platform-specific: Python 2.7
+ raise SSLError(e)
+ # Py33 raises FileNotFoundError which subclasses OSError
+ # These are not equivalent unless we check the errno attribute
+ except OSError as e: # Platform-specific: Python 3.3 and beyond
+ if e.errno == errno.ENOENT:
+ raise SSLError(e)
+ raise
+ elif getattr(context, 'load_default_certs', None) is not None:
+ # try to load OS default certs; works well on Windows (require Python3.4+)
+ context.load_default_certs()
+
+ if certfile:
+ context.load_cert_chain(certfile, keyfile)
+
+ # If we detect server_hostname is an IP address then the SNI
+ # extension should not be used according to RFC3546 Section 3.1
+ # We shouldn't warn the user if SNI isn't available but we would
+ # not be using SNI anyways due to IP address for server_hostname.
+ if ((server_hostname is not None and not is_ipaddress(server_hostname))
+ or IS_SECURETRANSPORT):
+ if HAS_SNI and server_hostname is not None:
+ return context.wrap_socket(sock, server_hostname=server_hostname)
+
+ warnings.warn(
+ 'An HTTPS request has been made, but the SNI (Server Name '
+ 'Indication) extension to TLS is not available on this platform. '
+ 'This may cause the server to present an incorrect TLS '
+ 'certificate, which can cause validation failures. You can upgrade to '
+ 'a newer version of Python to solve this. For more information, see '
+ 'https://urllib3.readthedocs.io/en/latest/advanced-usage.html'
+ '#ssl-warnings',
+ SNIMissingWarning
+ )
+
+ return context.wrap_socket(sock)
+
+
+def is_ipaddress(hostname):
+ """Detects whether the hostname given is an IP address.
+
+ :param str hostname: Hostname to examine.
+ :return: True if the hostname is an IP address, False otherwise.
+ """
+ if six.PY3 and isinstance(hostname, bytes):
+ # IDN A-label bytes are ASCII compatible.
+ hostname = hostname.decode('ascii')
+
+ families = [socket.AF_INET]
+ if hasattr(socket, 'AF_INET6'):
+ families.append(socket.AF_INET6)
+
+ for af in families:
+ try:
+ inet_pton(af, hostname)
+ except (socket.error, ValueError, OSError):
+ pass
+ else:
+ return True
+ return False
diff --git a/python/urllib3/util/timeout.py b/python/urllib3/util/timeout.py
new file mode 100644
index 0000000..cec817e
--- /dev/null
+++ b/python/urllib3/util/timeout.py
@@ -0,0 +1,242 @@
+from __future__ import absolute_import
+# The default socket timeout, used by httplib to indicate that no timeout was
+# specified by the user
+from socket import _GLOBAL_DEFAULT_TIMEOUT
+import time
+
+from ..exceptions import TimeoutStateError
+
+# A sentinel value to indicate that no timeout was specified by the user in
+# urllib3
+_Default = object()
+
+
+# Use time.monotonic if available.
+current_time = getattr(time, "monotonic", time.time)
+
+
+class Timeout(object):
+ """ Timeout configuration.
+
+ Timeouts can be defined as a default for a pool::
+
+ timeout = Timeout(connect=2.0, read=7.0)
+ http = PoolManager(timeout=timeout)
+ response = http.request('GET', 'http://example.com/')
+
+ Or per-request (which overrides the default for the pool)::
+
+ response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
+
+ Timeouts can be disabled by setting all the parameters to ``None``::
+
+ no_timeout = Timeout(connect=None, read=None)
+ response = http.request('GET', 'http://example.com/, timeout=no_timeout)
+
+
+ :param total:
+ This combines the connect and read timeouts into one; the read timeout
+ will be set to the time leftover from the connect attempt. In the
+ event that both a connect timeout and a total are specified, or a read
+ timeout and a total are specified, the shorter timeout will be applied.
+
+ Defaults to None.
+
+ :type total: integer, float, or None
+
+ :param connect:
+ The maximum amount of time to wait for a connection attempt to a server
+ to succeed. Omitting the parameter will default the connect timeout to
+ the system default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout for connection attempts.
+
+ :type connect: integer, float, or None
+
+ :param read:
+ The maximum amount of time to wait between consecutive
+ read operations for a response from the server. Omitting
+ the parameter will default the read timeout to the system
+ default, probably `the global default timeout in socket.py
+ <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
+ None will set an infinite timeout.
+
+ :type read: integer, float, or None
+
+ .. note::
+
+ Many factors can affect the total amount of time for urllib3 to return
+ an HTTP response.
+
+ For example, Python's DNS resolver does not obey the timeout specified
+ on the socket. Other factors that can affect total request time include
+ high CPU load, high swap, the program running at a low priority level,
+ or other behaviors.
+
+ In addition, the read and total timeouts only measure the time between
+ read operations on the socket connecting the client and the server,
+ not the total amount of time for the request to return a complete
+ response. For most requests, the timeout is raised because the server
+ has not sent the first byte in the specified time. This is not always
+ the case; if a server streams one byte every fifteen seconds, a timeout
+ of 20 seconds will not trigger, even though the request will take
+ several minutes to complete.
+
+ If your goal is to cut off any request after a set amount of wall clock
+ time, consider having a second "watcher" thread to cut off a slow
+ request.
+ """
+
+ #: A sentinel object representing the default timeout value
+ DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
+
+ def __init__(self, total=None, connect=_Default, read=_Default):
+ self._connect = self._validate_timeout(connect, 'connect')
+ self._read = self._validate_timeout(read, 'read')
+ self.total = self._validate_timeout(total, 'total')
+ self._start_connect = None
+
+ def __str__(self):
+ return '%s(connect=%r, read=%r, total=%r)' % (
+ type(self).__name__, self._connect, self._read, self.total)
+
+ @classmethod
+ def _validate_timeout(cls, value, name):
+ """ Check that a timeout attribute is valid.
+
+ :param value: The timeout value to validate
+ :param name: The name of the timeout attribute to validate. This is
+ used to specify in error messages.
+ :return: The validated and casted version of the given value.
+ :raises ValueError: If it is a numeric value less than or equal to
+ zero, or the type is not an integer, float, or None.
+ """
+ if value is _Default:
+ return cls.DEFAULT_TIMEOUT
+
+ if value is None or value is cls.DEFAULT_TIMEOUT:
+ return value
+
+ if isinstance(value, bool):
+ raise ValueError("Timeout cannot be a boolean value. It must "
+ "be an int, float or None.")
+ try:
+ float(value)
+ except (TypeError, ValueError):
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int, float or None." % (name, value))
+
+ try:
+ if value <= 0:
+ raise ValueError("Attempted to set %s timeout to %s, but the "
+ "timeout cannot be set to a value less "
+ "than or equal to 0." % (name, value))
+ except TypeError: # Python 3
+ raise ValueError("Timeout value %s was %s, but it must be an "
+ "int, float or None." % (name, value))
+
+ return value
+
+ @classmethod
+ def from_float(cls, timeout):
+ """ Create a new Timeout from a legacy timeout value.
+
+ The timeout value used by httplib.py sets the same timeout on the
+ connect(), and recv() socket requests. This creates a :class:`Timeout`
+ object that sets the individual timeouts to the ``timeout`` value
+ passed to this function.
+
+ :param timeout: The legacy timeout value.
+ :type timeout: integer, float, sentinel default object, or None
+ :return: Timeout object
+ :rtype: :class:`Timeout`
+ """
+ return Timeout(read=timeout, connect=timeout)
+
+ def clone(self):
+ """ Create a copy of the timeout object
+
+ Timeout properties are stored per-pool but each request needs a fresh
+ Timeout object to ensure each one has its own start/stop configured.
+
+ :return: a copy of the timeout object
+ :rtype: :class:`Timeout`
+ """
+ # We can't use copy.deepcopy because that will also create a new object
+ # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
+ # detect the user default.
+ return Timeout(connect=self._connect, read=self._read,
+ total=self.total)
+
+ def start_connect(self):
+ """ Start the timeout clock, used during a connect() attempt
+
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to start a timer that has been started already.
+ """
+ if self._start_connect is not None:
+ raise TimeoutStateError("Timeout timer has already been started.")
+ self._start_connect = current_time()
+ return self._start_connect
+
+ def get_connect_duration(self):
+ """ Gets the time elapsed since the call to :meth:`start_connect`.
+
+ :return: Elapsed time.
+ :rtype: float
+ :raises urllib3.exceptions.TimeoutStateError: if you attempt
+ to get duration for a timer that hasn't been started.
+ """
+ if self._start_connect is None:
+ raise TimeoutStateError("Can't get connect duration for timer "
+ "that has not started.")
+ return current_time() - self._start_connect
+
+ @property
+ def connect_timeout(self):
+ """ Get the value to use when setting a connection timeout.
+
+ This will be a positive float or integer, the value None
+ (never timeout), or the default system timeout.
+
+ :return: Connect timeout.
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ """
+ if self.total is None:
+ return self._connect
+
+ if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
+ return self.total
+
+ return min(self._connect, self.total)
+
+ @property
+ def read_timeout(self):
+ """ Get the value for the read timeout.
+
+ This assumes some time has elapsed in the connection timeout and
+ computes the read timeout appropriately.
+
+ If self.total is set, the read timeout is dependent on the amount of
+ time taken by the connect timeout. If the connection time has not been
+ established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
+ raised.
+
+ :return: Value to use for the read timeout.
+ :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
+ :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
+ has not yet been called on this object.
+ """
+ if (self.total is not None and
+ self.total is not self.DEFAULT_TIMEOUT and
+ self._read is not None and
+ self._read is not self.DEFAULT_TIMEOUT):
+ # In case the connect timeout has not yet been established.
+ if self._start_connect is None:
+ return self._read
+ return max(0, min(self.total - self.get_connect_duration(),
+ self._read))
+ elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
+ return max(0, self.total - self.get_connect_duration())
+ else:
+ return self._read
diff --git a/python/urllib3/util/url.py b/python/urllib3/util/url.py
new file mode 100644
index 0000000..6b6f996
--- /dev/null
+++ b/python/urllib3/util/url.py
@@ -0,0 +1,230 @@
+from __future__ import absolute_import
+from collections import namedtuple
+
+from ..exceptions import LocationParseError
+
+
+url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
+
+# We only want to normalize urls with an HTTP(S) scheme.
+# urllib3 infers URLs without a scheme (None) to be http.
+NORMALIZABLE_SCHEMES = ('http', 'https', None)
+
+
+class Url(namedtuple('Url', url_attrs)):
+ """
+ Datastructure for representing an HTTP URL. Used as a return value for
+ :func:`parse_url`. Both the scheme and host are normalized as they are
+ both case-insensitive according to RFC 3986.
+ """
+ __slots__ = ()
+
+ def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
+ query=None, fragment=None):
+ if path and not path.startswith('/'):
+ path = '/' + path
+ if scheme:
+ scheme = scheme.lower()
+ if host and scheme in NORMALIZABLE_SCHEMES:
+ host = host.lower()
+ return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
+ query, fragment)
+
+ @property
+ def hostname(self):
+ """For backwards-compatibility with urlparse. We're nice like that."""
+ return self.host
+
+ @property
+ def request_uri(self):
+ """Absolute path including the query string."""
+ uri = self.path or '/'
+
+ if self.query is not None:
+ uri += '?' + self.query
+
+ return uri
+
+ @property
+ def netloc(self):
+ """Network location including host and port"""
+ if self.port:
+ return '%s:%d' % (self.host, self.port)
+ return self.host
+
+ @property
+ def url(self):
+ """
+ Convert self into a url
+
+ This function should more or less round-trip with :func:`.parse_url`. The
+ returned url may not be exactly the same as the url inputted to
+ :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
+ with a blank port will have : removed).
+
+ Example: ::
+
+ >>> U = parse_url('http://google.com/mail/')
+ >>> U.url
+ 'http://google.com/mail/'
+ >>> Url('http', 'username:password', 'host.com', 80,
+ ... '/path', 'query', 'fragment').url
+ 'http://username:password@host.com:80/path?query#fragment'
+ """
+ scheme, auth, host, port, path, query, fragment = self
+ url = ''
+
+ # We use "is not None" we want things to happen with empty strings (or 0 port)
+ if scheme is not None:
+ url += scheme + '://'
+ if auth is not None:
+ url += auth + '@'
+ if host is not None:
+ url += host
+ if port is not None:
+ url += ':' + str(port)
+ if path is not None:
+ url += path
+ if query is not None:
+ url += '?' + query
+ if fragment is not None:
+ url += '#' + fragment
+
+ return url
+
+ def __str__(self):
+ return self.url
+
+
+def split_first(s, delims):
+ """
+ Given a string and an iterable of delimiters, split on the first found
+ delimiter. Return two split parts and the matched delimiter.
+
+ If not found, then the first part is the full input string.
+
+ Example::
+
+ >>> split_first('foo/bar?baz', '?/=')
+ ('foo', 'bar?baz', '/')
+ >>> split_first('foo/bar?baz', '123')
+ ('foo/bar?baz', '', None)
+
+ Scales linearly with number of delims. Not ideal for large number of delims.
+ """
+ min_idx = None
+ min_delim = None
+ for d in delims:
+ idx = s.find(d)
+ if idx < 0:
+ continue
+
+ if min_idx is None or idx < min_idx:
+ min_idx = idx
+ min_delim = d
+
+ if min_idx is None or min_idx < 0:
+ return s, '', None
+
+ return s[:min_idx], s[min_idx + 1:], min_delim
+
+
+def parse_url(url):
+ """
+ Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
+ performed to parse incomplete urls. Fields not provided will be None.
+
+ Partly backwards-compatible with :mod:`urlparse`.
+
+ Example::
+
+ >>> parse_url('http://google.com/mail/')
+ Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
+ >>> parse_url('google.com:80')
+ Url(scheme=None, host='google.com', port=80, path=None, ...)
+ >>> parse_url('/foo?bar')
+ Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
+ """
+
+ # While this code has overlap with stdlib's urlparse, it is much
+ # simplified for our needs and less annoying.
+ # Additionally, this implementations does silly things to be optimal
+ # on CPython.
+
+ if not url:
+ # Empty
+ return Url()
+
+ scheme = None
+ auth = None
+ host = None
+ port = None
+ path = None
+ fragment = None
+ query = None
+
+ # Scheme
+ if '://' in url:
+ scheme, url = url.split('://', 1)
+
+ # Find the earliest Authority Terminator
+ # (http://tools.ietf.org/html/rfc3986#section-3.2)
+ url, path_, delim = split_first(url, ['/', '?', '#'])
+
+ if delim:
+ # Reassemble the path
+ path = delim + path_
+
+ # Auth
+ if '@' in url:
+ # Last '@' denotes end of auth part
+ auth, url = url.rsplit('@', 1)
+
+ # IPv6
+ if url and url[0] == '[':
+ host, url = url.split(']', 1)
+ host += ']'
+
+ # Port
+ if ':' in url:
+ _host, port = url.split(':', 1)
+
+ if not host:
+ host = _host
+
+ if port:
+ # If given, ports must be integers. No whitespace, no plus or
+ # minus prefixes, no non-integer digits such as ^2 (superscript).
+ if not port.isdigit():
+ raise LocationParseError(url)
+ try:
+ port = int(port)
+ except ValueError:
+ raise LocationParseError(url)
+ else:
+ # Blank ports are cool, too. (rfc3986#section-3.2.3)
+ port = None
+
+ elif not host and url:
+ host = url
+
+ if not path:
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+ # Fragment
+ if '#' in path:
+ path, fragment = path.split('#', 1)
+
+ # Query
+ if '?' in path:
+ path, query = path.split('?', 1)
+
+ return Url(scheme, auth, host, port, path, query, fragment)
+
+
+def get_host(url):
+ """
+ Deprecated. Use :func:`parse_url` instead.
+ """
+ p = parse_url(url)
+ return p.scheme or 'http', p.hostname, p.port
diff --git a/python/urllib3/util/wait.py b/python/urllib3/util/wait.py
new file mode 100644
index 0000000..4db71ba
--- /dev/null
+++ b/python/urllib3/util/wait.py
@@ -0,0 +1,150 @@
+import errno
+from functools import partial
+import select
+import sys
+try:
+ from time import monotonic
+except ImportError:
+ from time import time as monotonic
+
+__all__ = ["NoWayToWaitForSocketError", "wait_for_read", "wait_for_write"]
+
+
+class NoWayToWaitForSocketError(Exception):
+ pass
+
+
+# How should we wait on sockets?
+#
+# There are two types of APIs you can use for waiting on sockets: the fancy
+# modern stateful APIs like epoll/kqueue, and the older stateless APIs like
+# select/poll. The stateful APIs are more efficient when you have a lots of
+# sockets to keep track of, because you can set them up once and then use them
+# lots of times. But we only ever want to wait on a single socket at a time
+# and don't want to keep track of state, so the stateless APIs are actually
+# more efficient. So we want to use select() or poll().
+#
+# Now, how do we choose between select() and poll()? On traditional Unixes,
+# select() has a strange calling convention that makes it slow, or fail
+# altogether, for high-numbered file descriptors. The point of poll() is to fix
+# that, so on Unixes, we prefer poll().
+#
+# On Windows, there is no poll() (or at least Python doesn't provide a wrapper
+# for it), but that's OK, because on Windows, select() doesn't have this
+# strange calling convention; plain select() works fine.
+#
+# So: on Windows we use select(), and everywhere else we use poll(). We also
+# fall back to select() in case poll() is somehow broken or missing.
+
+if sys.version_info >= (3, 5):
+ # Modern Python, that retries syscalls by default
+ def _retry_on_intr(fn, timeout):
+ return fn(timeout)
+else:
+ # Old and broken Pythons.
+ def _retry_on_intr(fn, timeout):
+ if timeout is None:
+ deadline = float("inf")
+ else:
+ deadline = monotonic() + timeout
+
+ while True:
+ try:
+ return fn(timeout)
+ # OSError for 3 <= pyver < 3.5, select.error for pyver <= 2.7
+ except (OSError, select.error) as e:
+ # 'e.args[0]' incantation works for both OSError and select.error
+ if e.args[0] != errno.EINTR:
+ raise
+ else:
+ timeout = deadline - monotonic()
+ if timeout < 0:
+ timeout = 0
+ if timeout == float("inf"):
+ timeout = None
+ continue
+
+
+def select_wait_for_socket(sock, read=False, write=False, timeout=None):
+ if not read and not write:
+ raise RuntimeError("must specify at least one of read=True, write=True")
+ rcheck = []
+ wcheck = []
+ if read:
+ rcheck.append(sock)
+ if write:
+ wcheck.append(sock)
+ # When doing a non-blocking connect, most systems signal success by
+ # marking the socket writable. Windows, though, signals success by marked
+ # it as "exceptional". We paper over the difference by checking the write
+ # sockets for both conditions. (The stdlib selectors module does the same
+ # thing.)
+ fn = partial(select.select, rcheck, wcheck, wcheck)
+ rready, wready, xready = _retry_on_intr(fn, timeout)
+ return bool(rready or wready or xready)
+
+
+def poll_wait_for_socket(sock, read=False, write=False, timeout=None):
+ if not read and not write:
+ raise RuntimeError("must specify at least one of read=True, write=True")
+ mask = 0
+ if read:
+ mask |= select.POLLIN
+ if write:
+ mask |= select.POLLOUT
+ poll_obj = select.poll()
+ poll_obj.register(sock, mask)
+
+ # For some reason, poll() takes timeout in milliseconds
+ def do_poll(t):
+ if t is not None:
+ t *= 1000
+ return poll_obj.poll(t)
+
+ return bool(_retry_on_intr(do_poll, timeout))
+
+
+def null_wait_for_socket(*args, **kwargs):
+ raise NoWayToWaitForSocketError("no select-equivalent available")
+
+
+def _have_working_poll():
+ # Apparently some systems have a select.poll that fails as soon as you try
+ # to use it, either due to strange configuration or broken monkeypatching
+ # from libraries like eventlet/greenlet.
+ try:
+ poll_obj = select.poll()
+ _retry_on_intr(poll_obj.poll, 0)
+ except (AttributeError, OSError):
+ return False
+ else:
+ return True
+
+
+def wait_for_socket(*args, **kwargs):
+ # We delay choosing which implementation to use until the first time we're
+ # called. We could do it at import time, but then we might make the wrong
+ # decision if someone goes wild with monkeypatching select.poll after
+ # we're imported.
+ global wait_for_socket
+ if _have_working_poll():
+ wait_for_socket = poll_wait_for_socket
+ elif hasattr(select, "select"):
+ wait_for_socket = select_wait_for_socket
+ else: # Platform-specific: Appengine.
+ wait_for_socket = null_wait_for_socket
+ return wait_for_socket(*args, **kwargs)
+
+
+def wait_for_read(sock, timeout=None):
+ """ Waits for reading to be available on a given socket.
+ Returns True if the socket is readable, or False if the timeout expired.
+ """
+ return wait_for_socket(sock, read=True, timeout=timeout)
+
+
+def wait_for_write(sock, timeout=None):
+ """ Waits for writing to be available on a given socket.
+ Returns True if the socket is readable, or False if the timeout expired.
+ """
+ return wait_for_socket(sock, write=True, timeout=timeout)