diff options
Diffstat (limited to 'python/gevent/pywsgi.py')
-rw-r--r-- | python/gevent/pywsgi.py | 1545 |
1 files changed, 0 insertions, 1545 deletions
diff --git a/python/gevent/pywsgi.py b/python/gevent/pywsgi.py deleted file mode 100644 index 8d0b5b8..0000000 --- a/python/gevent/pywsgi.py +++ /dev/null @@ -1,1545 +0,0 @@ -# Copyright (c) 2005-2009, eventlet contributors -# Copyright (c) 2009-2018, gevent contributors -""" -A pure-Python, gevent-friendly WSGI server. - -The server is provided in :class:`WSGIServer`, but most of the actual -WSGI work is handled by :class:`WSGIHandler` --- a new instance is -created for each request. The server can be customized to use -different subclasses of :class:`WSGIHandler`. - -""" -from __future__ import absolute_import - -# FIXME: Can we refactor to make smallor? -# pylint:disable=too-many-lines - -import errno -from io import BytesIO -import string -import sys -import time -import traceback -from datetime import datetime - -try: - from urllib import unquote -except ImportError: - from urllib.parse import unquote # python 2 pylint:disable=import-error,no-name-in-module - -from gevent import socket -import gevent -from gevent.server import StreamServer -from gevent.hub import GreenletExit -from gevent._compat import PY3, reraise - -from functools import partial -if PY3: - unquote_latin1 = partial(unquote, encoding='latin-1') -else: - unquote_latin1 = unquote - -_no_undoc_members = True # Don't put undocumented things into sphinx - -__all__ = [ - 'WSGIServer', - 'WSGIHandler', - 'LoggingLogAdapter', - 'Environ', - 'SecureEnviron', - 'WSGISecureEnviron', -] - - -MAX_REQUEST_LINE = 8192 -# Weekday and month names for HTTP date/time formatting; always English! -_WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -_MONTHNAME = [None, # Dummy so we can use 1-based month numbers - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] - -# The contents of the "HEX" grammar rule for HTTP, upper and lowercase A-F plus digits, -# in byte form for comparing to the network. -_HEX = string.hexdigits.encode('ascii') - -# Errors -_ERRORS = dict() -_INTERNAL_ERROR_STATUS = '500 Internal Server Error' -_INTERNAL_ERROR_BODY = b'Internal Server Error' -_INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'), - ('Connection', 'close'), - ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))] -_ERRORS[500] = (_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS, _INTERNAL_ERROR_BODY) - -_BAD_REQUEST_STATUS = '400 Bad Request' -_BAD_REQUEST_BODY = '' -_BAD_REQUEST_HEADERS = [('Content-Type', 'text/plain'), - ('Connection', 'close'), - ('Content-Length', str(len(_BAD_REQUEST_BODY)))] -_ERRORS[400] = (_BAD_REQUEST_STATUS, _BAD_REQUEST_HEADERS, _BAD_REQUEST_BODY) - -_REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n" -_BAD_REQUEST_RESPONSE = b"HTTP/1.1 400 Bad Request\r\nConnection: close\r\nContent-length: 0\r\n\r\n" -_CONTINUE_RESPONSE = b"HTTP/1.1 100 Continue\r\n\r\n" - - -def format_date_time(timestamp): - # Return a byte-string of the date and time in HTTP format - # .. versionchanged:: 1.1b5 - # Return a byte string, not a native string - year, month, day, hh, mm, ss, wd, _y, _z = time.gmtime(timestamp) - value = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (_WEEKDAYNAME[wd], day, _MONTHNAME[month], year, hh, mm, ss) - if PY3: - value = value.encode("latin-1") - return value - - -class _InvalidClientInput(IOError): - # Internal exception raised by Input indicating that the client - # sent invalid data at the lowest level of the stream. The result - # *should* be a HTTP 400 error. - pass - - -class _InvalidClientRequest(ValueError): - # Internal exception raised by WSGIHandler.read_request - # indicating that the client sent an HTTP request that cannot - # be parsed (e.g., invalid grammar). The result *should* be an - # HTTP 400 error - pass - - -class Input(object): - - __slots__ = ('rfile', 'content_length', 'socket', 'position', - 'chunked_input', 'chunk_length', '_chunked_input_error') - - def __init__(self, rfile, content_length, socket=None, chunked_input=False): - # pylint:disable=redefined-outer-name - self.rfile = rfile - self.content_length = content_length - self.socket = socket - self.position = 0 - self.chunked_input = chunked_input - self.chunk_length = -1 - self._chunked_input_error = False - - def _discard(self): - if self._chunked_input_error: - # We are in an unknown state, so we can't necessarily discard - # the body (e.g., if the client keeps the socket open, we could hang - # here forever). - # In this case, we've raised an exception and the user of this object - # is going to close the socket, so we don't have to discard - return - - if self.socket is None and (self.position < (self.content_length or 0) or self.chunked_input): - # ## Read and discard body - while 1: - d = self.read(16384) - if not d: - break - - def _send_100_continue(self): - if self.socket is not None: - self.socket.sendall(_CONTINUE_RESPONSE) - self.socket = None - - def _do_read(self, length=None, use_readline=False): - if use_readline: - reader = self.rfile.readline - else: - reader = self.rfile.read - content_length = self.content_length - if content_length is None: - # Either Content-Length or "Transfer-Encoding: chunked" must be present in a request with a body - # if it was chunked, then this function would have not been called - return b'' - - self._send_100_continue() - left = content_length - self.position - if length is None: - length = left - elif length > left: - length = left - if not length: - return b'' - - # On Python 2, self.rfile is usually socket.makefile(), which - # uses cStringIO.StringIO. If *length* is greater than the C - # sizeof(int) (typically 32 bits signed), parsing the argument to - # readline raises OverflowError. StringIO.read(), OTOH, uses - # PySize_t, typically a long (64 bits). In a bare readline() - # case, because the header lines we're trying to read with - # readline are typically expected to be small, we can correct - # that failure by simply doing a smaller call to readline and - # appending; failures in read we let propagate. - try: - read = reader(length) - except OverflowError: - if not use_readline: - # Expecting to read more than 64 bits of data. Ouch! - raise - # We could loop on calls to smaller readline(), appending them - # until we actually get a newline. For uses in this module, - # we expect the actual length to be small, but WSGI applications - # are allowed to pass in an arbitrary length. (This loop isn't optimal, - # but even client applications *probably* have short lines.) - read = b'' - while len(read) < length and not read.endswith(b'\n'): - read += reader(MAX_REQUEST_LINE) - - self.position += len(read) - if len(read) < length: - if (use_readline and not read.endswith(b"\n")) or not use_readline: - raise IOError("unexpected end of file while reading request at position %s" % (self.position,)) - - return read - - def __read_chunk_length(self, rfile): - # Read and return the next integer chunk length. If no - # chunk length can be read, raises _InvalidClientInput. - - # Here's the production for a chunk: - # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html) - # chunk = chunk-size [ chunk-extension ] CRLF - # chunk-data CRLF - # chunk-size = 1*HEX - # chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) - # chunk-ext-name = token - # chunk-ext-val = token | quoted-string - - # To cope with malicious or broken clients that fail to send valid - # chunk lines, the strategy is to read character by character until we either reach - # a ; or newline. If at any time we read a non-HEX digit, we bail. If we hit a - # ;, indicating an chunk-extension, we'll read up to the next - # MAX_REQUEST_LINE characters - # looking for the CRLF, and if we don't find it, we bail. If we read more than 16 hex characters, - # (the number needed to represent a 64-bit chunk size), we bail (this protects us from - # a client that sends an infinite stream of `F`, for example). - - buf = BytesIO() - while 1: - char = rfile.read(1) - if not char: - self._chunked_input_error = True - raise _InvalidClientInput("EOF before chunk end reached") - if char == b'\r': - break - if char == b';': - break - - if char not in _HEX: - self._chunked_input_error = True - raise _InvalidClientInput("Non-hex data", char) - buf.write(char) - if buf.tell() > 16: - self._chunked_input_error = True - raise _InvalidClientInput("Chunk-size too large.") - - if char == b';': - i = 0 - while i < MAX_REQUEST_LINE: - char = rfile.read(1) - if char == b'\r': - break - i += 1 - else: - # we read more than MAX_REQUEST_LINE without - # hitting CR - self._chunked_input_error = True - raise _InvalidClientInput("Too large chunk extension") - - if char == b'\r': - # We either got here from the main loop or from the - # end of an extension - char = rfile.read(1) - if char != b'\n': - self._chunked_input_error = True - raise _InvalidClientInput("Line didn't end in CRLF") - return int(buf.getvalue(), 16) - - def _chunked_read(self, length=None, use_readline=False): - # pylint:disable=too-many-branches - rfile = self.rfile - self._send_100_continue() - - if length == 0: - return b"" - - if length is not None and length < 0: - length = None - - if use_readline: - reader = self.rfile.readline - else: - reader = self.rfile.read - - response = [] - while self.chunk_length != 0: - maxreadlen = self.chunk_length - self.position - if length is not None and length < maxreadlen: - maxreadlen = length - - if maxreadlen > 0: - data = reader(maxreadlen) - if not data: - self.chunk_length = 0 - self._chunked_input_error = True - raise IOError("unexpected end of file while parsing chunked data") - - datalen = len(data) - response.append(data) - - self.position += datalen - if self.chunk_length == self.position: - rfile.readline() - - if length is not None: - length -= datalen - if length == 0: - break - if use_readline and data[-1] == b"\n"[0]: - break - else: - # We're at the beginning of a chunk, so we need to - # determine the next size to read - self.chunk_length = self.__read_chunk_length(rfile) - self.position = 0 - if self.chunk_length == 0: - # Last chunk. Terminates with a CRLF. - rfile.readline() - return b''.join(response) - - def read(self, length=None): - if self.chunked_input: - return self._chunked_read(length) - return self._do_read(length) - - def readline(self, size=None): - if self.chunked_input: - return self._chunked_read(size, True) - return self._do_read(size, use_readline=True) - - def readlines(self, hint=None): - # pylint:disable=unused-argument - return list(self) - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - __next__ = next - - -try: - import mimetools - headers_factory = mimetools.Message -except ImportError: - # adapt Python 3 HTTP headers to old API - from http import client # pylint:disable=import-error - - class OldMessage(client.HTTPMessage): - def __init__(self, **kwargs): - super(client.HTTPMessage, self).__init__(**kwargs) # pylint:disable=bad-super-call - self.status = '' - - def getheader(self, name, default=None): - return self.get(name, default) - - @property - def headers(self): - for key, value in self._headers: - yield '%s: %s\r\n' % (key, value) - - @property - def typeheader(self): - return self.get('content-type') - - def headers_factory(fp, *args): # pylint:disable=unused-argument - try: - ret = client.parse_headers(fp, _class=OldMessage) - except client.LineTooLong: - ret = OldMessage() - ret.status = 'Line too long' - return ret - - -class WSGIHandler(object): - """ - Handles HTTP requests from a socket, creates the WSGI environment, and - interacts with the WSGI application. - - This is the default value of :attr:`WSGIServer.handler_class`. - This class may be subclassed carefully, and that class set on a - :class:`WSGIServer` instance through a keyword argument at - construction time. - - Instances are constructed with the same arguments as passed to the - server's :meth:`WSGIServer.handle` method followed by the server - itself. The application and environment are obtained from the server. - - """ - # pylint:disable=too-many-instance-attributes - - protocol_version = 'HTTP/1.1' - if PY3: - # if we do like Py2, then headers_factory unconditionally - # becomes a bound method, meaning the fp argument becomes WSGIHandler - def MessageClass(self, *args): - return headers_factory(*args) - else: - MessageClass = headers_factory - - # Attributes reset at various times for each request; not public - # documented. Class attributes to keep the constructor fast - # (but not make lint tools complain) - - status = None # byte string: b'200 OK' - _orig_status = None # native string: '200 OK' - response_headers = None # list of tuples (b'name', b'value') - code = None # Integer parsed from status - provided_date = None - provided_content_length = None - close_connection = False - time_start = 0 # time.time() when begin handling request - time_finish = 0 # time.time() when done handling request - headers_sent = False # Have we already sent headers? - response_use_chunked = False # Write with transfer-encoding chunked - environ = None # Dict from self.get_environ - application = None # application callable from self.server.application - requestline = None # native str 'GET / HTTP/1.1' - response_length = 0 # How much data we sent - result = None # The return value of the WSGI application - wsgi_input = None # Instance of Input() - content_length = 0 # From application-provided headers Incoming - # request headers, instance of MessageClass (gunicorn uses hasattr - # on this so the default value needs to be compatible with the - # API) - headers = headers_factory(BytesIO()) - request_version = None # str: 'HTTP 1.1' - command = None # str: 'GET' - path = None # str: '/' - - def __init__(self, sock, address, server, rfile=None): - # Deprecation: The rfile kwarg was introduced in 1.0a1 as part - # of a refactoring. It was never documented or used. It is - # considered DEPRECATED and may be removed in the future. Its - # use is not supported. - - self.socket = sock - self.client_address = address - self.server = server - if rfile is None: - self.rfile = sock.makefile('rb', -1) - else: - self.rfile = rfile - - def handle(self): - """ - The main request handling method, called by the server. - - This method runs a request handling loop, calling - :meth:`handle_one_request` until all requests on the - connection have been handled (that is, it implements - keep-alive). - """ - try: - while self.socket is not None: - self.time_start = time.time() - self.time_finish = 0 - - result = self.handle_one_request() - if result is None: - break - if result is True: - continue - - self.status, response_body = result - self.socket.sendall(response_body) - if self.time_finish == 0: - self.time_finish = time.time() - self.log_request() - break - finally: - if self.socket is not None: - _sock = getattr(self.socket, '_sock', None) # Python 3 - try: - # read out request data to prevent error: [Errno 104] Connection reset by peer - if _sock: - try: - # socket.recv would hang - _sock.recv(16384) - finally: - _sock.close() - self.socket.close() - except socket.error: - pass - self.__dict__.pop('socket', None) - self.__dict__.pop('rfile', None) - - def _check_http_version(self): - version_str = self.request_version - if not version_str.startswith("HTTP/"): - return False - version = tuple(int(x) for x in version_str[5:].split(".")) # "HTTP/" - if version[1] < 0 or version < (0, 9) or version >= (2, 0): - return False - return True - - def read_request(self, raw_requestline): - """ - Parse the incoming request. - - Parses various headers into ``self.headers`` using - :attr:`MessageClass`. Other attributes that are set upon a successful - return of this method include ``self.content_length`` and ``self.close_connection``. - - :param str raw_requestline: A native :class:`str` representing - the request line. A processed version of this will be stored - into ``self.requestline``. - - :raises ValueError: If the request is invalid. This error will - not be logged as a traceback (because it's a client issue, not a server problem). - :return: A boolean value indicating whether the request was successfully parsed. - This method should either return a true value or have raised a ValueError - with details about the parsing error. - - .. versionchanged:: 1.1b6 - Raise the previously documented :exc:`ValueError` in more cases instead of returning a - false value; this allows subclasses more opportunity to customize behaviour. - """ - # pylint:disable=too-many-branches - self.requestline = raw_requestline.rstrip() - words = self.requestline.split() - if len(words) == 3: - self.command, self.path, self.request_version = words - if not self._check_http_version(): - raise _InvalidClientRequest('Invalid http version: %r' % (raw_requestline,)) - elif len(words) == 2: - self.command, self.path = words - if self.command != "GET": - raise _InvalidClientRequest('Expected GET method: %r' % (raw_requestline,)) - self.request_version = "HTTP/0.9" - # QQQ I'm pretty sure we can drop support for HTTP/0.9 - else: - raise _InvalidClientRequest('Invalid HTTP method: %r' % (raw_requestline,)) - - self.headers = self.MessageClass(self.rfile, 0) - - if self.headers.status: - raise _InvalidClientRequest('Invalid headers status: %r' % (self.headers.status,)) - - if self.headers.get("transfer-encoding", "").lower() == "chunked": - try: - del self.headers["content-length"] - except KeyError: - pass - - content_length = self.headers.get("content-length") - if content_length is not None: - content_length = int(content_length) - if content_length < 0: - raise _InvalidClientRequest('Invalid Content-Length: %r' % (content_length,)) - - if content_length and self.command in ('HEAD', ): - raise _InvalidClientRequest('Unexpected Content-Length') - - self.content_length = content_length - - if self.request_version == "HTTP/1.1": - conntype = self.headers.get("Connection", "").lower() - self.close_connection = (conntype == 'close') - else: - self.close_connection = True - - return True - - def log_error(self, msg, *args): - try: - message = msg % args - except Exception: # pylint:disable=broad-except - traceback.print_exc() - message = '%r %r' % (msg, args) - try: - message = '%s: %s' % (self.socket, message) - except Exception: # pylint:disable=broad-except - pass - - try: - self.server.error_log.write(message + '\n') - except Exception: # pylint:disable=broad-except - traceback.print_exc() - - def read_requestline(self): - """ - Read and return the HTTP request line. - - Under both Python 2 and 3, this should return the native - ``str`` type; under Python 3, this probably means the bytes read - from the network need to be decoded (using the ISO-8859-1 charset, aka - latin-1). - """ - line = self.rfile.readline(MAX_REQUEST_LINE) - if PY3: - line = line.decode('latin-1') - return line - - def handle_one_request(self): - """ - Handles one HTTP request using ``self.socket`` and ``self.rfile``. - - Each invocation of this method will do several things, including (but not limited to): - - - Read the request line using :meth:`read_requestline`; - - Read the rest of the request, including headers, with :meth:`read_request`; - - Construct a new WSGI environment in ``self.environ`` using :meth:`get_environ`; - - Store the application in ``self.application``, retrieving it from the server; - - Handle the remainder of the request, including invoking the application, - with :meth:`handle_one_response` - - There are several possible return values to indicate the state - of the client connection: - - - ``None`` - The client connection is already closed or should - be closed because the WSGI application or client set the - ``Connection: close`` header. The request handling - loop should terminate and perform cleanup steps. - - (status, body) - An HTTP status and body tuple. The request was in error, - as detailed by the status and body. The request handling - loop should terminate, close the connection, and perform - cleanup steps. Note that the ``body`` is the complete contents - to send to the client, including all headers and the initial - status line. - - ``True`` - The literal ``True`` value. The request was successfully handled - and the response sent to the client by :meth:`handle_one_response`. - The connection remains open to process more requests and the connection - handling loop should call this method again. This is the typical return - value. - - .. seealso:: :meth:`handle` - - .. versionchanged:: 1.1b6 - Funnel exceptions having to do with invalid HTTP requests through - :meth:`_handle_client_error` to allow subclasses to customize. Note that - this is experimental and may change in the future. - """ - # pylint:disable=too-many-return-statements - if self.rfile.closed: - return - - try: - self.requestline = self.read_requestline() - # Account for old subclasses that haven't done this - if PY3 and isinstance(self.requestline, bytes): - self.requestline = self.requestline.decode('latin-1') - except socket.error: - # "Connection reset by peer" or other socket errors aren't interesting here - return - - if not self.requestline: - return - - self.response_length = 0 - - if len(self.requestline) >= MAX_REQUEST_LINE: - return ('414', _REQUEST_TOO_LONG_RESPONSE) - - try: - # for compatibility with older versions of pywsgi, we pass self.requestline as an argument there - # NOTE: read_request is supposed to raise ValueError on invalid input; allow old - # subclasses that return a False value instead. - # NOTE: This can mutate the value of self.headers, so self.get_environ() must not be - # called until AFTER this call is done. - if not self.read_request(self.requestline): - return ('400', _BAD_REQUEST_RESPONSE) - except Exception as ex: # pylint:disable=broad-except - # Notice we don't use self.handle_error because it reports - # a 500 error to the client, and this is almost certainly - # a client error. - # Provide a hook for subclasses. - return self._handle_client_error(ex) - - self.environ = self.get_environ() - self.application = self.server.application - - self.handle_one_response() - - if self.close_connection: - return - - if self.rfile.closed: - return - - return True # read more requests - - def finalize_headers(self): - if self.provided_date is None: - self.response_headers.append((b'Date', format_date_time(time.time()))) - - if self.code not in (304, 204): - # the reply will include message-body; make sure we have either Content-Length or chunked - if self.provided_content_length is None: - if hasattr(self.result, '__len__'): - total_len = sum(len(chunk) for chunk in self.result) - total_len_str = str(total_len) - if PY3: - total_len_str = total_len_str.encode("latin-1") - self.response_headers.append((b'Content-Length', total_len_str)) - else: - if self.request_version != 'HTTP/1.0': - self.response_use_chunked = True - self.response_headers.append((b'Transfer-Encoding', b'chunked')) - - def _sendall(self, data): - try: - self.socket.sendall(data) - except socket.error as ex: - self.status = 'socket error: %s' % ex - if self.code > 0: - self.code = -self.code - raise - self.response_length += len(data) - - def _write(self, data, - _PY34_EXACTLY=(sys.version_info[:2] == (3, 4)), - _bytearray=bytearray): - if not data: - # The application/middleware are allowed to yield - # empty bytestrings. - return - - if self.response_use_chunked: - ## Write the chunked encoding - # header - if _PY34_EXACTLY: - # This is the only version we support that doesn't - # allow % to be used with bytes. Passing a bytestring - # directly in to bytearray() is faster than passing a - # (unicode) str with encoding, which naturally is faster still - # than encoding first. Interestingly, byte formatting on Python 3 - # is faster than str formatting. - header_str = '%x\r\n' % len(data) - towrite = _bytearray(header_str, 'ascii') - else: - header_str = b'%x\r\n' % len(data) - towrite = _bytearray(header_str) - - # data - towrite += data - # trailer - towrite += b'\r\n' - self._sendall(towrite) - else: - self._sendall(data) - - def write(self, data): - # The write() callable we return from start_response. - # https://www.python.org/dev/peps/pep-3333/#the-write-callable - # Supposed to do pretty much the same thing as yielding values - # from the application's return. - if self.code in (304, 204) and data: - raise AssertionError('The %s response must have no body' % self.code) - - if self.headers_sent: - self._write(data) - else: - if not self.status: - raise AssertionError("The application did not call start_response()") - self._write_with_headers(data) - - def _write_with_headers(self, data): - self.headers_sent = True - self.finalize_headers() - - # self.response_headers and self.status are already in latin-1, as encoded by self.start_response - towrite = bytearray(b'HTTP/1.1 ') - towrite += self.status - towrite += b'\r\n' - for header, value in self.response_headers: - towrite += header - towrite += b': ' - towrite += value - towrite += b"\r\n" - - towrite += b'\r\n' - self._sendall(towrite) - # No need to copy the data into towrite; we may make an extra syscall - # but the copy time could be substantial too, and it reduces the chances - # of sendall being able to send everything in one go - self._write(data) - - def start_response(self, status, headers, exc_info=None): - """ - .. versionchanged:: 1.2a1 - Avoid HTTP header injection by raising a :exc:`ValueError` - if *status* or any *header* name or value contains a carriage - return or newline. - .. versionchanged:: 1.1b5 - Pro-actively handle checking the encoding of the status line - and headers during this method. On Python 2, avoid some - extra encodings. - """ - # pylint:disable=too-many-branches,too-many-statements - if exc_info: - try: - if self.headers_sent: - # Re-raise original exception if headers sent - reraise(*exc_info) - finally: - # Avoid dangling circular ref - exc_info = None - - # Pep 3333, "The start_response callable": - # https://www.python.org/dev/peps/pep-3333/#the-start-response-callable - # "Servers should check for errors in the headers at the time - # start_response is called, so that an error can be raised - # while the application is still running." Here, we check the encoding. - # This aids debugging: headers especially are generated programmatically - # and an encoding error in a loop or list comprehension yields an opaque - # UnicodeError without any clue which header was wrong. - # Note that this results in copying the header list at this point, not modifying it, - # although we are allowed to do so if needed. This slightly increases memory usage. - # We also check for HTTP Response Splitting vulnerabilities - response_headers = [] - header = None - value = None - try: - for header, value in headers: - if not isinstance(header, str): - raise UnicodeError("The header must be a native string", header, value) - if not isinstance(value, str): - raise UnicodeError("The value must be a native string", header, value) - if '\r' in header or '\n' in header: - raise ValueError('carriage return or newline in header name', header) - if '\r' in value or '\n' in value: - raise ValueError('carriage return or newline in header value', value) - # Either we're on Python 2, in which case bytes is correct, or - # we're on Python 3 and the user screwed up (because it should be a native - # string). In either case, make sure that this is latin-1 compatible. Under - # Python 2, bytes.encode() will take a round-trip through the system encoding, - # which may be ascii, which is not really what we want. However, the latin-1 encoding - # can encode everything except control characters and the block from 0x7F to 0x9F, so - # explicitly round-tripping bytes through the encoding is unlikely to be of much - # benefit, so we go for speed (the WSGI spec specifically calls out allowing the range - # from 0x00 to 0xFF, although the HTTP spec forbids the control characters). - # Note: Some Python 2 implementations, like Jython, may allow non-octet (above 255) values - # in their str implementation; this is mentioned in the WSGI spec, but we don't - # run on any platform like that so we can assume that a str value is pure bytes. - response_headers.append((header if not PY3 else header.encode("latin-1"), - value if not PY3 else value.encode("latin-1"))) - except UnicodeEncodeError: - # If we get here, we're guaranteed to have a header and value - raise UnicodeError("Non-latin1 header", repr(header), repr(value)) - - # Same as above - if not isinstance(status, str): - raise UnicodeError("The status string must be a native string") - if '\r' in status or '\n' in status: - raise ValueError("carriage return or newline in status", status) - # don't assign to anything until the validation is complete, including parsing the - # code - code = int(status.split(' ', 1)[0]) - - self.status = status if not PY3 else status.encode("latin-1") - self._orig_status = status # Preserve the native string for logging - self.response_headers = response_headers - self.code = code - - provided_connection = None - self.provided_date = None - self.provided_content_length = None - - for header, value in headers: - header = header.lower() - if header == 'connection': - provided_connection = value - elif header == 'date': - self.provided_date = value - elif header == 'content-length': - self.provided_content_length = value - - if self.request_version == 'HTTP/1.0' and provided_connection is None: - response_headers.append((b'Connection', b'close')) - self.close_connection = True - elif provided_connection == 'close': - self.close_connection = True - - if self.code in (304, 204): - if self.provided_content_length is not None and self.provided_content_length != '0': - msg = 'Invalid Content-Length for %s response: %r (must be absent or zero)' % (self.code, self.provided_content_length) - if PY3: - msg = msg.encode('latin-1') - raise AssertionError(msg) - - return self.write - - def log_request(self): - self.server.log.write(self.format_request() + '\n') - - def format_request(self): - now = datetime.now().replace(microsecond=0) - length = self.response_length or '-' - if self.time_finish: - delta = '%.6f' % (self.time_finish - self.time_start) - else: - delta = '-' - client_address = self.client_address[0] if isinstance(self.client_address, tuple) else self.client_address - return '%s - - [%s] "%s" %s %s %s' % ( - client_address or '-', - now, - self.requestline or '', - # Use the native string version of the status, saved so we don't have to - # decode. But fallback to the encoded 'status' in case of subclasses - # (Is that really necessary? At least there's no overhead.) - (self._orig_status or self.status or '000').split()[0], - length, - delta) - - def process_result(self): - for data in self.result: - if data: - self.write(data) - if self.status and not self.headers_sent: - # In other words, the application returned an empty - # result iterable (and did not use the write callable) - # Trigger the flush of the headers. - self.write(b'') - if self.response_use_chunked: - self._sendall(b'0\r\n\r\n') - - - def run_application(self): - assert self.result is None - try: - self.result = self.application(self.environ, self.start_response) - self.process_result() - finally: - close = getattr(self.result, 'close', None) - try: - if close is not None: - close() - finally: - # Discard the result. If it's a generator this can - # free a lot of hidden resources (if we failed to iterate - # all the way through it---the frames are automatically - # cleaned up when StopIteration is raised); but other cases - # could still free up resources sooner than otherwise. - close = None - self.result = None - - #: These errors are silently ignored by :meth:`handle_one_response` to avoid producing - #: excess log entries on normal operating conditions. They indicate - #: a remote client has disconnected and there is little or nothing - #: this process can be expected to do about it. You may change this - #: value in a subclass. - #: - #: The default value includes :data:`errno.EPIPE` and :data:`errno.ECONNRESET`. - #: On Windows this also includes :data:`errno.WSAECONNABORTED`. - #: - #: This is a provisional API, subject to change. See :pr:`377`, :pr:`999` - #: and :issue:`136`. - #: - #: .. versionadded:: 1.3 - ignored_socket_errors = (errno.EPIPE, errno.ECONNRESET) - try: - ignored_socket_errors += (errno.WSAECONNABORTED,) - except AttributeError: - pass # Not windows - - def handle_one_response(self): - """ - Invoke the application to produce one response. - - This is called by :meth:`handle_one_request` after all the - state for the request has been established. It is responsible - for error handling. - """ - self.time_start = time.time() - self.status = None - self.headers_sent = False - - self.result = None - self.response_use_chunked = False - self.response_length = 0 - - try: - try: - self.run_application() - finally: - try: - self.wsgi_input._discard() - except (socket.error, IOError): - # Don't let exceptions during discarding - # input override any exception that may have been - # raised by the application, such as our own _InvalidClientInput. - # In the general case, these aren't even worth logging (see the comment - # just below) - pass - except _InvalidClientInput: - self._send_error_response_if_possible(400) - except socket.error as ex: - if ex.args[0] in self.ignored_socket_errors: - # See description of self.ignored_socket_errors. - if not PY3: - sys.exc_clear() - self.close_connection = True - else: - self.handle_error(*sys.exc_info()) - except: # pylint:disable=bare-except - self.handle_error(*sys.exc_info()) - finally: - self.time_finish = time.time() - self.log_request() - - def _send_error_response_if_possible(self, error_code): - if self.response_length: - self.close_connection = True - else: - status, headers, body = _ERRORS[error_code] - try: - self.start_response(status, headers[:]) - self.write(body) - except socket.error: - if not PY3: - sys.exc_clear() - self.close_connection = True - - def _log_error(self, t, v, tb): - # TODO: Shouldn't we dump this to wsgi.errors? If we did that now, it would - # wind up getting logged twice - if not issubclass(t, GreenletExit): - context = self.environ - if not isinstance(context, self.server.secure_environ_class): - context = self.server.secure_environ_class(context) - self.server.loop.handle_error(context, t, v, tb) - - def handle_error(self, t, v, tb): - # Called for internal, unexpected errors, NOT invalid client input - self._log_error(t, v, tb) - del tb - self._send_error_response_if_possible(500) - - def _handle_client_error(self, ex): - # Called for invalid client input - # Returns the appropriate error response. - if not isinstance(ex, ValueError): - # XXX: Why not self._log_error to send it through the loop's - # handle_error method? - traceback.print_exc() - if isinstance(ex, _InvalidClientRequest): - # These come with good error messages, and we want to let - # log_error deal with the formatting, especially to handle encoding - self.log_error(*ex.args) - else: - self.log_error('Invalid request: %s', str(ex) or ex.__class__.__name__) - return ('400', _BAD_REQUEST_RESPONSE) - - def _headers(self): - key = None - value = None - IGNORED_KEYS = (None, 'CONTENT_TYPE', 'CONTENT_LENGTH') - for header in self.headers.headers: - if key is not None and header[:1] in " \t": - value += header - continue - - if key not in IGNORED_KEYS: - yield 'HTTP_' + key, value.strip() - - key, value = header.split(':', 1) - if '_' in key: - # strip incoming bad veaders - key = None - else: - key = key.replace('-', '_').upper() - - if key not in IGNORED_KEYS: - yield 'HTTP_' + key, value.strip() - - def get_environ(self): - """ - Construct and return a new WSGI environment dictionary for a specific request. - - This should begin with asking the server for the base environment - using :meth:`WSGIServer.get_environ`, and then proceed to add the - request specific values. - - By the time this method is invoked the request line and request shall have - been parsed and ``self.headers`` shall be populated. - """ - env = self.server.get_environ() - env['REQUEST_METHOD'] = self.command - env['SCRIPT_NAME'] = '' - - if '?' in self.path: - path, query = self.path.split('?', 1) - else: - path, query = self.path, '' - # Note that self.path contains the original str object; if it contains - # encoded escapes, it will NOT match PATH_INFO. - env['PATH_INFO'] = unquote_latin1(path) - env['QUERY_STRING'] = query - - if self.headers.typeheader is not None: - env['CONTENT_TYPE'] = self.headers.typeheader - - length = self.headers.getheader('content-length') - if length: - env['CONTENT_LENGTH'] = length - env['SERVER_PROTOCOL'] = self.request_version - - client_address = self.client_address - if isinstance(client_address, tuple): - env['REMOTE_ADDR'] = str(client_address[0]) - env['REMOTE_PORT'] = str(client_address[1]) - - for key, value in self._headers(): - if key in env: - if 'COOKIE' in key: - env[key] += '; ' + value - else: - env[key] += ',' + value - else: - env[key] = value - - if env.get('HTTP_EXPECT') == '100-continue': - sock = self.socket - else: - sock = None - - chunked = env.get('HTTP_TRANSFER_ENCODING', '').lower() == 'chunked' - self.wsgi_input = Input(self.rfile, self.content_length, socket=sock, chunked_input=chunked) - env['wsgi.input'] = self.wsgi_input - return env - - -class _NoopLog(object): - # Does nothing; implements just enough file-like methods - # to pass the WSGI validator - - def write(self, *args, **kwargs): - # pylint:disable=unused-argument - return - - def flush(self): - pass - - def writelines(self, *args, **kwargs): - pass - - -class LoggingLogAdapter(object): - """ - An adapter for :class:`logging.Logger` instances - to let them be used with :class:`WSGIServer`. - - .. warning:: Unless the entire process is monkey-patched at a very - early part of the lifecycle (before logging is configured), - loggers are likely to not be gevent-cooperative. For example, - the socket and syslog handlers use the socket module in a way - that can block, and most handlers acquire threading locks. - - .. warning:: It *may* be possible for the logging functions to be - called in the :class:`gevent.Hub` greenlet. Code running in the - hub greenlet cannot use any gevent blocking functions without triggering - a ``LoopExit``. - - .. versionadded:: 1.1a3 - - .. versionchanged:: 1.1b6 - Attributes not present on this object are proxied to the underlying - logger instance. This permits using custom :class:`~logging.Logger` - subclasses (or indeed, even duck-typed objects). - - .. versionchanged:: 1.1 - Strip trailing newline characters on the message passed to :meth:`write` - because log handlers will usually add one themselves. - """ - - # gevent avoids importing and using logging because importing it and - # creating loggers creates native locks unless monkey-patched. - - __slots__ = ('_logger', '_level') - - def __init__(self, logger, level=20): - """ - Write information to the *logger* at the given *level* (default to INFO). - """ - self._logger = logger - self._level = level - - def write(self, msg): - if msg and msg.endswith('\n'): - msg = msg[:-1] - self._logger.log(self._level, msg) - - def flush(self): - "No-op; required to be a file-like object" - pass - - def writelines(self, lines): - for line in lines: - self.write(line) - - def __getattr__(self, name): - return getattr(self._logger, name) - - def __setattr__(self, name, value): - if name not in LoggingLogAdapter.__slots__: - setattr(self._logger, name, value) - else: - object.__setattr__(self, name, value) - - def __delattr__(self, name): - delattr(self._logger, name) - -#### -## Environ classes. -# These subclass dict. They could subclass collections.UserDict on -# 3.3+ and proxy to the underlying real dict to avoid a copy if we -# have to print them (on 2.7 it's slightly more complicated to be an -# instance of collections.MutableMapping; UserDict.UserDict isn't.) -# Then we could have either the WSGIHandler.get_environ or the -# WSGIServer.get_environ return one of these proxies, and -# WSGIHandler.run_application would know to access the `environ.data` -# attribute to be able to pass the *real* dict to the application -# (because PEP3333 requires no subclasses, only actual dict objects; -# wsgiref.validator and webob.Request both enforce this). This has the -# advantage of not being fragile if anybody else tries to print/log -# self.environ (and not requiring a copy). However, if there are any -# subclasses of Handler or Server, this could break if they don't know -# to return this type. -#### - -class Environ(dict): - """ - A base class that can be used for WSGI environment objects. - - Provisional API. - - .. versionadded:: 1.2a1 - """ - - __slots__ = () # add no ivars or weakref ability - - def copy(self): - return self.__class__(self) - - if not hasattr(dict, 'iteritems'): - # Python 3 - def iteritems(self): - return self.items() - - def __reduce_ex__(self, proto): - return (dict, (), None, None, iter(self.iteritems())) - -class SecureEnviron(Environ): - """ - An environment that does not print its keys and values - by default. - - Provisional API. - - This is intended to keep potentially sensitive information like - HTTP authorization and cookies from being inadvertently printed - or logged. - - For debugging, each instance can have its *secure_repr* attribute - set to ``False``, which will cause it to print like a normal dict. - - When *secure_repr* is ``True`` (the default), then the value of - the *whitelist_keys* attribute is consulted; if this value is - true-ish, it should be a container (something that responds to - ``in``) of key names (typically a list or set). Keys and values in - this dictionary that are in *whitelist_keys* will then be printed, - while all other values will be masked. These values may be - customized on the class by setting the *default_secure_repr* and - *default_whitelist_keys*, respectively:: - - >>> environ = SecureEnviron(key='value') - >>> environ # doctest: +ELLIPSIS - <pywsgi.SecureEnviron dict (keys: 1) at ... - - If we whitelist the key, it gets printed:: - - >>> environ.whitelist_keys = {'key'} - >>> environ - {'key': 'value'} - - A non-whitelisted key (*only*, to avoid doctest issues) is masked:: - - >>> environ['secure'] = 'secret'; del environ['key'] - >>> environ - {'secure': '<MASKED>'} - - We can turn it off entirely for the instance:: - - >>> environ.secure_repr = False - >>> environ - {'secure': 'secret'} - - We can also customize it at the class level (here we use a new - class to be explicit and to avoid polluting the true default - values; we would set this class to be the ``environ_class`` of the - server):: - - >>> class MyEnviron(SecureEnviron): - ... default_whitelist_keys = ('key',) - ... - >>> environ = MyEnviron({'key': 'value'}) - >>> environ - {'key': 'value'} - - .. versionadded:: 1.2a1 - """ - - default_secure_repr = True - default_whitelist_keys = () - default_print_masked_keys = True - - # Allow instances to override the class values, - # but inherit from the class if not present. Keeps instances - # small since we can't combine __slots__ with class attributes - # of the same name. - __slots__ = ('secure_repr', 'whitelist_keys', 'print_masked_keys') - - def __getattr__(self, name): - if name in SecureEnviron.__slots__: - return getattr(type(self), 'default_' + name) - raise AttributeError(name) - - def __repr__(self): - if self.secure_repr: - whitelist = self.whitelist_keys - print_masked = self.print_masked_keys - if whitelist: - safe = {k: self[k] if k in whitelist else "<MASKED>" - for k in self - if k in whitelist or print_masked} - safe_repr = repr(safe) - if not print_masked and len(safe) != len(self): - safe_repr = safe_repr[:-1] + ", (hidden keys: %d)}" % (len(self) - len(safe)) - return safe_repr - return "<pywsgi.SecureEnviron dict (keys: %d) at %s>" % (len(self), id(self)) - return Environ.__repr__(self) - __str__ = __repr__ - - -class WSGISecureEnviron(SecureEnviron): - """ - Specializes the default list of whitelisted keys to a few - common WSGI variables. - - Example:: - - >>> environ = WSGISecureEnviron(REMOTE_ADDR='::1', HTTP_AUTHORIZATION='secret') - >>> environ - {'REMOTE_ADDR': '::1', (hidden keys: 1)} - >>> import pprint - >>> pprint.pprint(environ) - {'REMOTE_ADDR': '::1', (hidden keys: 1)} - >>> print(pprint.pformat(environ)) - {'REMOTE_ADDR': '::1', (hidden keys: 1)} - """ - default_whitelist_keys = ('REMOTE_ADDR', 'REMOTE_PORT', 'HTTP_HOST') - default_print_masked_keys = False - - -class WSGIServer(StreamServer): - """ - A WSGI server based on :class:`StreamServer` that supports HTTPS. - - - :keyword log: If given, an object with a ``write`` method to which - request (access) logs will be written. If not given, defaults - to :obj:`sys.stderr`. You may pass ``None`` to disable request - logging. You may use a wrapper, around e.g., :mod:`logging`, - to support objects that don't implement a ``write`` method. - (If you pass a :class:`~logging.Logger` instance, or in - general something that provides a ``log`` method but not a - ``write`` method, such a wrapper will automatically be created - and it will be logged to at the :data:`~logging.INFO` level.) - - :keyword error_log: If given, a file-like object with ``write``, - ``writelines`` and ``flush`` methods to which error logs will - be written. If not given, defaults to :obj:`sys.stderr`. You - may pass ``None`` to disable error logging (not recommended). - You may use a wrapper, around e.g., :mod:`logging`, to support - objects that don't implement the proper methods. This - parameter will become the value for ``wsgi.errors`` in the - WSGI environment (if not already set). (As with *log*, - wrappers for :class:`~logging.Logger` instances and the like - will be created automatically and logged to at the :data:`~logging.ERROR` - level.) - - .. seealso:: - - :class:`LoggingLogAdapter` - See important warnings before attempting to use :mod:`logging`. - - .. versionchanged:: 1.1a3 - Added the ``error_log`` parameter, and set ``wsgi.errors`` in the WSGI - environment to this value. - .. versionchanged:: 1.1a3 - Add support for passing :class:`logging.Logger` objects to the ``log`` and - ``error_log`` arguments. - """ - - #: A callable taking three arguments: (socket, address, server) and returning - #: an object with a ``handle()`` method. The callable is called once for - #: each incoming socket request, as is its handle method. The handle method should not - #: return until all use of the socket is complete. - #: - #: This class uses the :class:`WSGIHandler` object as the default value. You may - #: subclass this class and set a different default value, or you may pass - #: a value to use in the ``handler_class`` keyword constructor argument. - handler_class = WSGIHandler - - #: The object to which request logs will be written. - #: It must never be None. Initialized from the ``log`` constructor - #: parameter. - log = None - - #: The object to which error logs will be written. - #: It must never be None. Initialized from the ``error_log`` constructor - #: parameter. - error_log = None - - #: The class of environ objects passed to the handlers. - #: Must be a dict subclass. For compliance with :pep:`3333` - #: and libraries like WebOb, this is simply :class:`dict` - #: but this can be customized in a subclass or per-instance - #: (probably to :class:`WSGISecureEnviron`). - #: - #: .. versionadded:: 1.2a1 - environ_class = dict - - # Undocumented internal detail: the class that WSGIHandler._log_error - # will cast to before passing to the loop. - secure_environ_class = WSGISecureEnviron - - base_env = {'GATEWAY_INTERFACE': 'CGI/1.1', - 'SERVER_SOFTWARE': 'gevent/%d.%d Python/%d.%d' % (gevent.version_info[:2] + sys.version_info[:2]), - 'SCRIPT_NAME': '', - 'wsgi.version': (1, 0), - 'wsgi.multithread': False, # XXX: Aren't we really, though? - 'wsgi.multiprocess': False, - 'wsgi.run_once': False} - - def __init__(self, listener, application=None, backlog=None, spawn='default', - log='default', error_log='default', - handler_class=None, - environ=None, **ssl_args): - StreamServer.__init__(self, listener, backlog=backlog, spawn=spawn, **ssl_args) - if application is not None: - self.application = application - if handler_class is not None: - self.handler_class = handler_class - - # Note that we can't initialize these as class variables: - # sys.stderr might get monkey patched at runtime. - def _make_log(l, level=20): - if l == 'default': - return sys.stderr - if l is None: - return _NoopLog() - if not hasattr(l, 'write') and hasattr(l, 'log'): - return LoggingLogAdapter(l, level) - return l - self.log = _make_log(log) - self.error_log = _make_log(error_log, 40) # logging.ERROR - - self.set_environ(environ) - self.set_max_accept() - - def set_environ(self, environ=None): - if environ is not None: - self.environ = environ - environ_update = getattr(self, 'environ', None) - - self.environ = self.environ_class(self.base_env) - if self.ssl_enabled: - self.environ['wsgi.url_scheme'] = 'https' - else: - self.environ['wsgi.url_scheme'] = 'http' - if environ_update is not None: - self.environ.update(environ_update) - if self.environ.get('wsgi.errors') is None: - self.environ['wsgi.errors'] = self.error_log - - def set_max_accept(self): - if self.environ.get('wsgi.multiprocess'): - self.max_accept = 1 - - def get_environ(self): - return self.environ_class(self.environ) - - def init_socket(self): - StreamServer.init_socket(self) - self.update_environ() - - def update_environ(self): - """ - Called before the first request is handled to fill in WSGI environment values. - - This includes getting the correct server name and port. - """ - address = self.address - if isinstance(address, tuple): - if 'SERVER_NAME' not in self.environ: - try: - name = socket.getfqdn(address[0]) - except socket.error: - name = str(address[0]) - if PY3 and not isinstance(name, str): - name = name.decode('ascii') - self.environ['SERVER_NAME'] = name - self.environ.setdefault('SERVER_PORT', str(address[1])) - else: - self.environ.setdefault('SERVER_NAME', '') - self.environ.setdefault('SERVER_PORT', '') - - def handle(self, sock, address): - """ - Create an instance of :attr:`handler_class` to handle the request. - - This method blocks until the handler returns. - """ - # pylint:disable=method-hidden - handler = self.handler_class(sock, address, self) - handler.handle() - -def _main(): - # Provisional main handler, for quick tests, not production - # usage. - from gevent import monkey; monkey.patch_all() - - import argparse - import importlib - - parser = argparse.ArgumentParser() - parser.add_argument("app", help="dotted name of WSGI app callable [module:callable]") - parser.add_argument("-b", "--bind", - help="The socket to bind", - default=":8080") - - args = parser.parse_args() - - module_name, app_name = args.app.split(':') - module = importlib.import_module(module_name) - app = getattr(module, app_name) - bind = args.bind - - server = WSGIServer(bind, app) - server.serve_forever() - -if __name__ == '__main__': - _main() |