diff options
Diffstat (limited to 'python/gevent/pywsgi.py')
-rw-r--r-- | python/gevent/pywsgi.py | 1509 |
1 files changed, 1509 insertions, 0 deletions
diff --git a/python/gevent/pywsgi.py b/python/gevent/pywsgi.py new file mode 100644 index 0000000..2726f6d --- /dev/null +++ b/python/gevent/pywsgi.py @@ -0,0 +1,1509 @@ +# Copyright (c) 2005-2009, eventlet contributors +# Copyright (c) 2009-2015, gevent contributors +""" +A pure-Python, gevent-friendly WSGI server. + +The server is provided in :class:`WSGIServer`, but most of the actual +WSGI work is handled by :class:`WSGIHandler` --- a new instance is +created for each request. The server can be customized to use +different subclasses of :class:`WSGIHandler`. + +""" +# FIXME: Can we refactor to make smallor? +# pylint:disable=too-many-lines + +import errno +from io import BytesIO +import string +import sys +import time +import traceback +from datetime import datetime + +try: + from urllib import unquote +except ImportError: + from urllib.parse import unquote # python 2 pylint:disable=import-error,no-name-in-module + +from gevent import socket +import gevent +from gevent.server import StreamServer +from gevent.hub import GreenletExit +from gevent._compat import PY3, reraise + +from functools import partial +if PY3: + unquote_latin1 = partial(unquote, encoding='latin-1') +else: + unquote_latin1 = unquote + +_no_undoc_members = True # Don't put undocumented things into sphinx + +__all__ = [ + 'WSGIServer', + 'WSGIHandler', + 'LoggingLogAdapter', + 'Environ', + 'SecureEnviron', + 'WSGISecureEnviron', +] + + +MAX_REQUEST_LINE = 8192 +# Weekday and month names for HTTP date/time formatting; always English! +_WEEKDAYNAME = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +_MONTHNAME = [None, # Dummy so we can use 1-based month numbers + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] + +# The contents of the "HEX" grammar rule for HTTP, upper and lowercase A-F plus digits, +# in byte form for comparing to the network. +_HEX = string.hexdigits.encode('ascii') + +# Errors +_ERRORS = dict() +_INTERNAL_ERROR_STATUS = '500 Internal Server Error' +_INTERNAL_ERROR_BODY = b'Internal Server Error' +_INTERNAL_ERROR_HEADERS = [('Content-Type', 'text/plain'), + ('Connection', 'close'), + ('Content-Length', str(len(_INTERNAL_ERROR_BODY)))] +_ERRORS[500] = (_INTERNAL_ERROR_STATUS, _INTERNAL_ERROR_HEADERS, _INTERNAL_ERROR_BODY) + +_BAD_REQUEST_STATUS = '400 Bad Request' +_BAD_REQUEST_BODY = '' +_BAD_REQUEST_HEADERS = [('Content-Type', 'text/plain'), + ('Connection', 'close'), + ('Content-Length', str(len(_BAD_REQUEST_BODY)))] +_ERRORS[400] = (_BAD_REQUEST_STATUS, _BAD_REQUEST_HEADERS, _BAD_REQUEST_BODY) + +_REQUEST_TOO_LONG_RESPONSE = b"HTTP/1.1 414 Request URI Too Long\r\nConnection: close\r\nContent-length: 0\r\n\r\n" +_BAD_REQUEST_RESPONSE = b"HTTP/1.1 400 Bad Request\r\nConnection: close\r\nContent-length: 0\r\n\r\n" +_CONTINUE_RESPONSE = b"HTTP/1.1 100 Continue\r\n\r\n" + + +def format_date_time(timestamp): + # Return a byte-string of the date and time in HTTP format + # .. versionchanged:: 1.1b5 + # Return a byte string, not a native string + year, month, day, hh, mm, ss, wd, _y, _z = time.gmtime(timestamp) + value = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (_WEEKDAYNAME[wd], day, _MONTHNAME[month], year, hh, mm, ss) + if PY3: + value = value.encode("latin-1") + return value + + +class _InvalidClientInput(IOError): + # Internal exception raised by Input indicating that the client + # sent invalid data at the lowest level of the stream. The result + # *should* be a HTTP 400 error. + pass + + +class _InvalidClientRequest(ValueError): + # Internal exception raised by WSGIHandler.read_request + # indicating that the client sent an HTTP request that cannot + # be parsed (e.g., invalid grammar). The result *should* be an + # HTTP 400 error + pass + + +class Input(object): + + __slots__ = ('rfile', 'content_length', 'socket', 'position', + 'chunked_input', 'chunk_length', '_chunked_input_error') + + def __init__(self, rfile, content_length, socket=None, chunked_input=False): + # pylint:disable=redefined-outer-name + self.rfile = rfile + self.content_length = content_length + self.socket = socket + self.position = 0 + self.chunked_input = chunked_input + self.chunk_length = -1 + self._chunked_input_error = False + + def _discard(self): + if self._chunked_input_error: + # We are in an unknown state, so we can't necessarily discard + # the body (e.g., if the client keeps the socket open, we could hang + # here forever). + # In this case, we've raised an exception and the user of this object + # is going to close the socket, so we don't have to discard + return + + if self.socket is None and (self.position < (self.content_length or 0) or self.chunked_input): + # ## Read and discard body + while 1: + d = self.read(16384) + if not d: + break + + def _send_100_continue(self): + if self.socket is not None: + self.socket.sendall(_CONTINUE_RESPONSE) + self.socket = None + + def _do_read(self, length=None, use_readline=False): + if use_readline: + reader = self.rfile.readline + else: + reader = self.rfile.read + content_length = self.content_length + if content_length is None: + # Either Content-Length or "Transfer-Encoding: chunked" must be present in a request with a body + # if it was chunked, then this function would have not been called + return b'' + + self._send_100_continue() + left = content_length - self.position + if length is None: + length = left + elif length > left: + length = left + if not length: + return b'' + + # On Python 2, self.rfile is usually socket.makefile(), which + # uses cStringIO.StringIO. If *length* is greater than the C + # sizeof(int) (typically 32 bits signed), parsing the argument to + # readline raises OverflowError. StringIO.read(), OTOH, uses + # PySize_t, typically a long (64 bits). In a bare readline() + # case, because the header lines we're trying to read with + # readline are typically expected to be small, we can correct + # that failure by simply doing a smaller call to readline and + # appending; failures in read we let propagate. + try: + read = reader(length) + except OverflowError: + if not use_readline: + # Expecting to read more than 64 bits of data. Ouch! + raise + # We could loop on calls to smaller readline(), appending them + # until we actually get a newline. For uses in this module, + # we expect the actual length to be small, but WSGI applications + # are allowed to pass in an arbitrary length. (This loop isn't optimal, + # but even client applications *probably* have short lines.) + read = b'' + while len(read) < length and not read.endswith(b'\n'): + read += reader(MAX_REQUEST_LINE) + + self.position += len(read) + if len(read) < length: + if (use_readline and not read.endswith(b"\n")) or not use_readline: + raise IOError("unexpected end of file while reading request at position %s" % (self.position,)) + + return read + + def __read_chunk_length(self, rfile): + # Read and return the next integer chunk length. If no + # chunk length can be read, raises _InvalidClientInput. + + # Here's the production for a chunk: + # (http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html) + # chunk = chunk-size [ chunk-extension ] CRLF + # chunk-data CRLF + # chunk-size = 1*HEX + # chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + # chunk-ext-name = token + # chunk-ext-val = token | quoted-string + + # To cope with malicious or broken clients that fail to send valid + # chunk lines, the strategy is to read character by character until we either reach + # a ; or newline. If at any time we read a non-HEX digit, we bail. If we hit a + # ;, indicating an chunk-extension, we'll read up to the next + # MAX_REQUEST_LINE characters + # looking for the CRLF, and if we don't find it, we bail. If we read more than 16 hex characters, + # (the number needed to represent a 64-bit chunk size), we bail (this protects us from + # a client that sends an infinite stream of `F`, for example). + + buf = BytesIO() + while 1: + char = rfile.read(1) + if not char: + self._chunked_input_error = True + raise _InvalidClientInput("EOF before chunk end reached") + if char == b'\r': + break + if char == b';': + break + + if char not in _HEX: + self._chunked_input_error = True + raise _InvalidClientInput("Non-hex data", char) + buf.write(char) + if buf.tell() > 16: + self._chunked_input_error = True + raise _InvalidClientInput("Chunk-size too large.") + + if char == b';': + i = 0 + while i < MAX_REQUEST_LINE: + char = rfile.read(1) + if char == b'\r': + break + i += 1 + else: + # we read more than MAX_REQUEST_LINE without + # hitting CR + self._chunked_input_error = True + raise _InvalidClientInput("Too large chunk extension") + + if char == b'\r': + # We either got here from the main loop or from the + # end of an extension + char = rfile.read(1) + if char != b'\n': + self._chunked_input_error = True + raise _InvalidClientInput("Line didn't end in CRLF") + return int(buf.getvalue(), 16) + + def _chunked_read(self, length=None, use_readline=False): + # pylint:disable=too-many-branches + rfile = self.rfile + self._send_100_continue() + + if length == 0: + return b"" + + if length is not None and length < 0: + length = None + + if use_readline: + reader = self.rfile.readline + else: + reader = self.rfile.read + + response = [] + while self.chunk_length != 0: + maxreadlen = self.chunk_length - self.position + if length is not None and length < maxreadlen: + maxreadlen = length + + if maxreadlen > 0: + data = reader(maxreadlen) + if not data: + self.chunk_length = 0 + self._chunked_input_error = True + raise IOError("unexpected end of file while parsing chunked data") + + datalen = len(data) + response.append(data) + + self.position += datalen + if self.chunk_length == self.position: + rfile.readline() + + if length is not None: + length -= datalen + if length == 0: + break + if use_readline and data[-1] == b"\n"[0]: + break + else: + # We're at the beginning of a chunk, so we need to + # determine the next size to read + self.chunk_length = self.__read_chunk_length(rfile) + self.position = 0 + if self.chunk_length == 0: + # Last chunk. Terminates with a CRLF. + rfile.readline() + return b''.join(response) + + def read(self, length=None): + if self.chunked_input: + return self._chunked_read(length) + return self._do_read(length) + + def readline(self, size=None): + if self.chunked_input: + return self._chunked_read(size, True) + return self._do_read(size, use_readline=True) + + def readlines(self, hint=None): + # pylint:disable=unused-argument + return list(self) + + def __iter__(self): + return self + + def next(self): + line = self.readline() + if not line: + raise StopIteration + return line + __next__ = next + + +try: + import mimetools + headers_factory = mimetools.Message +except ImportError: + # adapt Python 3 HTTP headers to old API + from http import client # pylint:disable=import-error + + class OldMessage(client.HTTPMessage): + def __init__(self, **kwargs): + super(client.HTTPMessage, self).__init__(**kwargs) # pylint:disable=bad-super-call + self.status = '' + + def getheader(self, name, default=None): + return self.get(name, default) + + @property + def headers(self): + for key, value in self._headers: + yield '%s: %s\r\n' % (key, value) + + @property + def typeheader(self): + return self.get('content-type') + + def headers_factory(fp, *args): # pylint:disable=unused-argument + try: + ret = client.parse_headers(fp, _class=OldMessage) + except client.LineTooLong: + ret = OldMessage() + ret.status = 'Line too long' + return ret + + +class WSGIHandler(object): + """ + Handles HTTP requests from a socket, creates the WSGI environment, and + interacts with the WSGI application. + + This is the default value of :attr:`WSGIServer.handler_class`. + This class may be subclassed carefully, and that class set on a + :class:`WSGIServer` instance through a keyword argument at + construction time. + + Instances are constructed with the same arguments as passed to the + server's :meth:`WSGIServer.handle` method followed by the server + itself. The application and environment are obtained from the server. + + """ + # pylint:disable=too-many-instance-attributes + + protocol_version = 'HTTP/1.1' + if PY3: + # if we do like Py2, then headers_factory unconditionally + # becomes a bound method, meaning the fp argument becomes WSGIHandler + def MessageClass(self, *args): + return headers_factory(*args) + else: + MessageClass = headers_factory + + # Attributes reset at various times for each request; not public + # documented. Class attributes to keep the constructor fast + # (but not make lint tools complain) + + status = None # byte string: b'200 OK' + _orig_status = None # native string: '200 OK' + response_headers = None # list of tuples (b'name', b'value') + code = None # Integer parsed from status + provided_date = None + provided_content_length = None + close_connection = False + time_start = 0 # time.time() when begin handling request + time_finish = 0 # time.time() when done handling request + headers_sent = False # Have we already sent headers? + response_use_chunked = False # Write with transfer-encoding chunked + environ = None # Dict from self.get_environ + application = None # application callable from self.server.application + requestline = None # native str 'GET / HTTP/1.1' + response_length = 0 # How much data we sent + result = None # The return value of the WSGI application + wsgi_input = None # Instance of Input() + content_length = 0 # From application-provided headers Incoming + # request headers, instance of MessageClass (gunicorn uses hasattr + # on this so the default value needs to be compatible with the + # API) + headers = headers_factory(BytesIO()) + request_version = None # str: 'HTTP 1.1' + command = None # str: 'GET' + path = None # str: '/' + + def __init__(self, sock, address, server, rfile=None): + # Deprecation: The rfile kwarg was introduced in 1.0a1 as part + # of a refactoring. It was never documented or used. It is + # considered DEPRECATED and may be removed in the future. Its + # use is not supported. + + self.socket = sock + self.client_address = address + self.server = server + if rfile is None: + self.rfile = sock.makefile('rb', -1) + else: + self.rfile = rfile + + def handle(self): + """ + The main request handling method, called by the server. + + This method runs a request handling loop, calling + :meth:`handle_one_request` until all requests on the + connection have been handled (that is, it implements + keep-alive). + """ + try: + while self.socket is not None: + self.time_start = time.time() + self.time_finish = 0 + + result = self.handle_one_request() + if result is None: + break + if result is True: + continue + + self.status, response_body = result + self.socket.sendall(response_body) + if self.time_finish == 0: + self.time_finish = time.time() + self.log_request() + break + finally: + if self.socket is not None: + _sock = getattr(self.socket, '_sock', None) # Python 3 + try: + # read out request data to prevent error: [Errno 104] Connection reset by peer + if _sock: + try: + # socket.recv would hang + _sock.recv(16384) + finally: + _sock.close() + self.socket.close() + except socket.error: + pass + self.__dict__.pop('socket', None) + self.__dict__.pop('rfile', None) + + def _check_http_version(self): + version_str = self.request_version + if not version_str.startswith("HTTP/"): + return False + version = tuple(int(x) for x in version_str[5:].split(".")) # "HTTP/" + if version[1] < 0 or version < (0, 9) or version >= (2, 0): + return False + return True + + def read_request(self, raw_requestline): + """ + Parse the incoming request. + + Parses various headers into ``self.headers`` using + :attr:`MessageClass`. Other attributes that are set upon a successful + return of this method include ``self.content_length`` and ``self.close_connection``. + + :param str raw_requestline: A native :class:`str` representing + the request line. A processed version of this will be stored + into ``self.requestline``. + + :raises ValueError: If the request is invalid. This error will + not be logged as a traceback (because it's a client issue, not a server problem). + :return: A boolean value indicating whether the request was successfully parsed. + This method should either return a true value or have raised a ValueError + with details about the parsing error. + + .. versionchanged:: 1.1b6 + Raise the previously documented :exc:`ValueError` in more cases instead of returning a + false value; this allows subclasses more opportunity to customize behaviour. + """ + # pylint:disable=too-many-branches + self.requestline = raw_requestline.rstrip() + words = self.requestline.split() + if len(words) == 3: + self.command, self.path, self.request_version = words + if not self._check_http_version(): + raise _InvalidClientRequest('Invalid http version: %r', raw_requestline) + elif len(words) == 2: + self.command, self.path = words + if self.command != "GET": + raise _InvalidClientRequest('Expected GET method: %r', raw_requestline) + self.request_version = "HTTP/0.9" + # QQQ I'm pretty sure we can drop support for HTTP/0.9 + else: + raise _InvalidClientRequest('Invalid HTTP method: %r', raw_requestline) + + self.headers = self.MessageClass(self.rfile, 0) + + if self.headers.status: + raise _InvalidClientRequest('Invalid headers status: %r', self.headers.status) + + if self.headers.get("transfer-encoding", "").lower() == "chunked": + try: + del self.headers["content-length"] + except KeyError: + pass + + content_length = self.headers.get("content-length") + if content_length is not None: + content_length = int(content_length) + if content_length < 0: + raise _InvalidClientRequest('Invalid Content-Length: %r', content_length) + + if content_length and self.command in ('HEAD', ): + raise _InvalidClientRequest('Unexpected Content-Length') + + self.content_length = content_length + + if self.request_version == "HTTP/1.1": + conntype = self.headers.get("Connection", "").lower() + self.close_connection = (conntype == 'close') + else: + self.close_connection = True + + return True + + def log_error(self, msg, *args): + try: + message = msg % args + except Exception: # pylint:disable=broad-except + traceback.print_exc() + message = '%r %r' % (msg, args) + try: + message = '%s: %s' % (self.socket, message) + except Exception: # pylint:disable=broad-except + pass + + try: + self.server.error_log.write(message + '\n') + except Exception: # pylint:disable=broad-except + traceback.print_exc() + + def read_requestline(self): + """ + Read and return the HTTP request line. + + Under both Python 2 and 3, this should return the native + ``str`` type; under Python 3, this probably means the bytes read + from the network need to be decoded (using the ISO-8859-1 charset, aka + latin-1). + """ + line = self.rfile.readline(MAX_REQUEST_LINE) + if PY3: + line = line.decode('latin-1') + return line + + def handle_one_request(self): + """ + Handles one HTTP request using ``self.socket`` and ``self.rfile``. + + Each invocation of this method will do several things, including (but not limited to): + + - Read the request line using :meth:`read_requestline`; + - Read the rest of the request, including headers, with :meth:`read_request`; + - Construct a new WSGI environment in ``self.environ`` using :meth:`get_environ`; + - Store the application in ``self.application``, retrieving it from the server; + - Handle the remainder of the request, including invoking the application, + with :meth:`handle_one_response` + + There are several possible return values to indicate the state + of the client connection: + + - ``None`` + The client connection is already closed or should + be closed because the WSGI application or client set the + ``Connection: close`` header. The request handling + loop should terminate and perform cleanup steps. + - (status, body) + An HTTP status and body tuple. The request was in error, + as detailed by the status and body. The request handling + loop should terminate, close the connection, and perform + cleanup steps. Note that the ``body`` is the complete contents + to send to the client, including all headers and the initial + status line. + - ``True`` + The literal ``True`` value. The request was successfully handled + and the response sent to the client by :meth:`handle_one_response`. + The connection remains open to process more requests and the connection + handling loop should call this method again. This is the typical return + value. + + .. seealso:: :meth:`handle` + + .. versionchanged:: 1.1b6 + Funnel exceptions having to do with invalid HTTP requests through + :meth:`_handle_client_error` to allow subclasses to customize. Note that + this is experimental and may change in the future. + """ + # pylint:disable=too-many-return-statements + if self.rfile.closed: + return + + try: + self.requestline = self.read_requestline() + # Account for old subclasses that haven't done this + if PY3 and isinstance(self.requestline, bytes): + self.requestline = self.requestline.decode('latin-1') + except socket.error: + # "Connection reset by peer" or other socket errors aren't interesting here + return + + if not self.requestline: + return + + self.response_length = 0 + + if len(self.requestline) >= MAX_REQUEST_LINE: + return ('414', _REQUEST_TOO_LONG_RESPONSE) + + try: + # for compatibility with older versions of pywsgi, we pass self.requestline as an argument there + # NOTE: read_request is supposed to raise ValueError on invalid input; allow old + # subclasses that return a False value instead. + # NOTE: This can mutate the value of self.headers, so self.get_environ() must not be + # called until AFTER this call is done. + if not self.read_request(self.requestline): + return ('400', _BAD_REQUEST_RESPONSE) + except Exception as ex: # pylint:disable=broad-except + # Notice we don't use self.handle_error because it reports + # a 500 error to the client, and this is almost certainly + # a client error. + # Provide a hook for subclasses. + return self._handle_client_error(ex) + + self.environ = self.get_environ() + self.application = self.server.application + + self.handle_one_response() + + if self.close_connection: + return + + if self.rfile.closed: + return + + return True # read more requests + + def finalize_headers(self): + if self.provided_date is None: + self.response_headers.append((b'Date', format_date_time(time.time()))) + + if self.code not in (304, 204): + # the reply will include message-body; make sure we have either Content-Length or chunked + if self.provided_content_length is None: + if hasattr(self.result, '__len__'): + total_len = sum(len(chunk) for chunk in self.result) + total_len_str = str(total_len) + if PY3: + total_len_str = total_len_str.encode("latin-1") + self.response_headers.append((b'Content-Length', total_len_str)) + else: + if self.request_version != 'HTTP/1.0': + self.response_use_chunked = True + self.response_headers.append((b'Transfer-Encoding', b'chunked')) + + def _sendall(self, data): + try: + self.socket.sendall(data) + except socket.error as ex: + self.status = 'socket error: %s' % ex + if self.code > 0: + self.code = -self.code + raise + self.response_length += len(data) + + def _write(self, data): + if not data: + # The application/middleware are allowed to yield + # empty bytestrings. + return + + if self.response_use_chunked: + ## Write the chunked encoding + header = ("%x\r\n" % len(data)).encode('ascii') + # socket.sendall will slice these small strings, as [0:], + # but that's special cased to return the original string. + # They're small enough we probably expect them to go down to the network + # buffers in one go anyway. + self._sendall(header) + self._sendall(data) + self._sendall(b'\r\n') # trailer + else: + self._sendall(data) + + def write(self, data): + # The write() callable we return from start_response. + # https://www.python.org/dev/peps/pep-3333/#the-write-callable + # Supposed to do pretty much the same thing as yielding values + # from the application's return. + if self.code in (304, 204) and data: + raise AssertionError('The %s response must have no body' % self.code) + + if self.headers_sent: + self._write(data) + else: + if not self.status: + raise AssertionError("The application did not call start_response()") + self._write_with_headers(data) + + def _write_with_headers(self, data): + towrite = bytearray() + self.headers_sent = True + self.finalize_headers() + + # self.response_headers and self.status are already in latin-1, as encoded by self.start_response + towrite.extend(b'HTTP/1.1 ') + towrite.extend(self.status) + towrite.extend(b'\r\n') + for header, value in self.response_headers: + towrite.extend(header) + towrite.extend(b': ') + towrite.extend(value) + towrite.extend(b"\r\n") + + towrite.extend(b'\r\n') + self._sendall(towrite) + # No need to copy the data into towrite; we may make an extra syscall + # but the copy time could be substantial too, and it reduces the chances + # of sendall being able to send everything in one go + self._write(data) + + def start_response(self, status, headers, exc_info=None): + """ + .. versionchanged:: 1.2a1 + Avoid HTTP header injection by raising a :exc:`ValueError` + if *status* or any *header* name or value contains a carriage + return or newline. + .. versionchanged:: 1.1b5 + Pro-actively handle checking the encoding of the status line + and headers during this method. On Python 2, avoid some + extra encodings. + """ + # pylint:disable=too-many-branches,too-many-statements + if exc_info: + try: + if self.headers_sent: + # Re-raise original exception if headers sent + reraise(*exc_info) + finally: + # Avoid dangling circular ref + exc_info = None + + # Pep 3333, "The start_response callable": + # https://www.python.org/dev/peps/pep-3333/#the-start-response-callable + # "Servers should check for errors in the headers at the time + # start_response is called, so that an error can be raised + # while the application is still running." Here, we check the encoding. + # This aids debugging: headers especially are generated programatically + # and an encoding error in a loop or list comprehension yields an opaque + # UnicodeError without any clue which header was wrong. + # Note that this results in copying the header list at this point, not modifying it, + # although we are allowed to do so if needed. This slightly increases memory usage. + # We also check for HTTP Response Splitting vulnerabilities + response_headers = [] + header = None + value = None + try: + for header, value in headers: + if not isinstance(header, str): + raise UnicodeError("The header must be a native string", header, value) + if not isinstance(value, str): + raise UnicodeError("The value must be a native string", header, value) + if '\r' in header or '\n' in header: + raise ValueError('carriage return or newline in header name', header) + if '\r' in value or '\n' in value: + raise ValueError('carriage return or newline in header value', value) + # Either we're on Python 2, in which case bytes is correct, or + # we're on Python 3 and the user screwed up (because it should be a native + # string). In either case, make sure that this is latin-1 compatible. Under + # Python 2, bytes.encode() will take a round-trip through the system encoding, + # which may be ascii, which is not really what we want. However, the latin-1 encoding + # can encode everything except control characters and the block from 0x7F to 0x9F, so + # explicitly round-tripping bytes through the encoding is unlikely to be of much + # benefit, so we go for speed (the WSGI spec specifically calls out allowing the range + # from 0x00 to 0xFF, although the HTTP spec forbids the control characters). + # Note: Some Python 2 implementations, like Jython, may allow non-octet (above 255) values + # in their str implementation; this is mentioned in the WSGI spec, but we don't + # run on any platform like that so we can assume that a str value is pure bytes. + response_headers.append((header if not PY3 else header.encode("latin-1"), + value if not PY3 else value.encode("latin-1"))) + except UnicodeEncodeError: + # If we get here, we're guaranteed to have a header and value + raise UnicodeError("Non-latin1 header", repr(header), repr(value)) + + # Same as above + if not isinstance(status, str): + raise UnicodeError("The status string must be a native string") + if '\r' in status or '\n' in status: + raise ValueError("carriage return or newline in status", status) + # don't assign to anything until the validation is complete, including parsing the + # code + code = int(status.split(' ', 1)[0]) + + self.status = status if not PY3 else status.encode("latin-1") + self._orig_status = status # Preserve the native string for logging + self.response_headers = response_headers + self.code = code + + provided_connection = None + self.provided_date = None + self.provided_content_length = None + + for header, value in headers: + header = header.lower() + if header == 'connection': + provided_connection = value + elif header == 'date': + self.provided_date = value + elif header == 'content-length': + self.provided_content_length = value + + if self.request_version == 'HTTP/1.0' and provided_connection is None: + response_headers.append((b'Connection', b'close')) + self.close_connection = True + elif provided_connection == 'close': + self.close_connection = True + + if self.code in (304, 204): + if self.provided_content_length is not None and self.provided_content_length != '0': + msg = 'Invalid Content-Length for %s response: %r (must be absent or zero)' % (self.code, self.provided_content_length) + if PY3: + msg = msg.encode('latin-1') + raise AssertionError(msg) + + return self.write + + def log_request(self): + self.server.log.write(self.format_request() + '\n') + + def format_request(self): + now = datetime.now().replace(microsecond=0) + length = self.response_length or '-' + if self.time_finish: + delta = '%.6f' % (self.time_finish - self.time_start) + else: + delta = '-' + client_address = self.client_address[0] if isinstance(self.client_address, tuple) else self.client_address + return '%s - - [%s] "%s" %s %s %s' % ( + client_address or '-', + now, + self.requestline or '', + # Use the native string version of the status, saved so we don't have to + # decode. But fallback to the encoded 'status' in case of subclasses + # (Is that really necessary? At least there's no overhead.) + (self._orig_status or self.status or '000').split()[0], + length, + delta) + + def process_result(self): + for data in self.result: + if data: + self.write(data) + if self.status and not self.headers_sent: + # In other words, the application returned an empty + # result iterable (and did not use the write callable) + # Trigger the flush of the headers. + self.write(b'') + if self.response_use_chunked: + self.socket.sendall(b'0\r\n\r\n') + self.response_length += 5 + + def run_application(self): + assert self.result is None + try: + self.result = self.application(self.environ, self.start_response) + self.process_result() + finally: + close = getattr(self.result, 'close', None) + try: + if close is not None: + close() + finally: + # Discard the result. If it's a generator this can + # free a lot of hidden resources (if we failed to iterate + # all the way through it---the frames are automatically + # cleaned up when StopIteration is raised); but other cases + # could still free up resources sooner than otherwise. + close = None + self.result = None + + def handle_one_response(self): + self.time_start = time.time() + self.status = None + self.headers_sent = False + + self.result = None + self.response_use_chunked = False + self.response_length = 0 + + try: + try: + self.run_application() + finally: + try: + self.wsgi_input._discard() + except (socket.error, IOError): + # Don't let exceptions during discarding + # input override any exception that may have been + # raised by the application, such as our own _InvalidClientInput. + # In the general case, these aren't even worth logging (see the comment + # just below) + pass + except _InvalidClientInput: + self._send_error_response_if_possible(400) + except socket.error as ex: + if ex.args[0] in (errno.EPIPE, errno.ECONNRESET): + # Broken pipe, connection reset by peer. + # Swallow these silently to avoid spewing + # useless info on normal operating conditions, + # bloating logfiles. See https://github.com/gevent/gevent/pull/377 + # and https://github.com/gevent/gevent/issues/136. + if not PY3: + sys.exc_clear() + self.close_connection = True + else: + self.handle_error(*sys.exc_info()) + except: # pylint:disable=bare-except + self.handle_error(*sys.exc_info()) + finally: + self.time_finish = time.time() + self.log_request() + + def _send_error_response_if_possible(self, error_code): + if self.response_length: + self.close_connection = True + else: + status, headers, body = _ERRORS[error_code] + try: + self.start_response(status, headers[:]) + self.write(body) + except socket.error: + if not PY3: + sys.exc_clear() + self.close_connection = True + + def _log_error(self, t, v, tb): + # TODO: Shouldn't we dump this to wsgi.errors? If we did that now, it would + # wind up getting logged twice + if not issubclass(t, GreenletExit): + context = self.environ + if not isinstance(context, self.server.secure_environ_class): + context = self.server.secure_environ_class(context) + self.server.loop.handle_error(context, t, v, tb) + + def handle_error(self, t, v, tb): + # Called for internal, unexpected errors, NOT invalid client input + self._log_error(t, v, tb) + del tb + self._send_error_response_if_possible(500) + + def _handle_client_error(self, ex): + # Called for invalid client input + # Returns the appropriate error response. + if not isinstance(ex, ValueError): + # XXX: Why not self._log_error to send it through the loop's + # handle_error method? + traceback.print_exc() + if isinstance(ex, _InvalidClientRequest): + # These come with good error messages, and we want to let + # log_error deal with the formatting, especially to handle encoding + self.log_error(*ex.args) + else: + self.log_error('Invalid request: %s', str(ex) or ex.__class__.__name__) + return ('400', _BAD_REQUEST_RESPONSE) + + def _headers(self): + key = None + value = None + IGNORED_KEYS = (None, 'CONTENT_TYPE', 'CONTENT_LENGTH') + for header in self.headers.headers: + if key is not None and header[:1] in " \t": + value += header + continue + + if key not in IGNORED_KEYS: + yield 'HTTP_' + key, value.strip() + + key, value = header.split(':', 1) + if '_' in key: + # strip incoming bad veaders + key = None + else: + key = key.replace('-', '_').upper() + + if key not in IGNORED_KEYS: + yield 'HTTP_' + key, value.strip() + + def get_environ(self): + """ + Construct and return a new WSGI environment dictionary for a specific request. + + This should begin with asking the server for the base environment + using :meth:`WSGIServer.get_environ`, and then proceed to add the + request specific values. + + By the time this method is invoked the request line and request shall have + been parsed and ``self.headers`` shall be populated. + """ + env = self.server.get_environ() + env['REQUEST_METHOD'] = self.command + env['SCRIPT_NAME'] = '' + + if '?' in self.path: + path, query = self.path.split('?', 1) + else: + path, query = self.path, '' + # Note that self.path contains the original str object; if it contains + # encoded escapes, it will NOT match PATH_INFO. + env['PATH_INFO'] = unquote_latin1(path) + env['QUERY_STRING'] = query + + if self.headers.typeheader is not None: + env['CONTENT_TYPE'] = self.headers.typeheader + + length = self.headers.getheader('content-length') + if length: + env['CONTENT_LENGTH'] = length + env['SERVER_PROTOCOL'] = self.request_version + + client_address = self.client_address + if isinstance(client_address, tuple): + env['REMOTE_ADDR'] = str(client_address[0]) + env['REMOTE_PORT'] = str(client_address[1]) + + for key, value in self._headers(): + if key in env: + if 'COOKIE' in key: + env[key] += '; ' + value + else: + env[key] += ',' + value + else: + env[key] = value + + if env.get('HTTP_EXPECT') == '100-continue': + sock = self.socket + else: + sock = None + + chunked = env.get('HTTP_TRANSFER_ENCODING', '').lower() == 'chunked' + self.wsgi_input = Input(self.rfile, self.content_length, socket=sock, chunked_input=chunked) + env['wsgi.input'] = self.wsgi_input + return env + + +class _NoopLog(object): + # Does nothing; implements just enough file-like methods + # to pass the WSGI validator + + def write(self, *args, **kwargs): + # pylint:disable=unused-argument + return + + def flush(self): + pass + + def writelines(self, *args, **kwargs): + pass + + +class LoggingLogAdapter(object): + """ + An adapter for :class:`logging.Logger` instances + to let them be used with :class:`WSGIServer`. + + .. warning:: Unless the entire process is monkey-patched at a very + early part of the lifecycle (before logging is configured), + loggers are likely to not be gevent-cooperative. For example, + the socket and syslog handlers use the socket module in a way + that can block, and most handlers acquire threading locks. + + .. warning:: It *may* be possible for the logging functions to be + called in the :class:`gevent.Hub` greenlet. Code running in the + hub greenlet cannot use any gevent blocking functions without triggering + a ``LoopExit``. + + .. versionadded:: 1.1a3 + + .. versionchanged:: 1.1b6 + Attributes not present on this object are proxied to the underlying + logger instance. This permits using custom :class:`~logging.Logger` + subclasses (or indeed, even duck-typed objects). + + .. versionchanged:: 1.1 + Strip trailing newline characters on the message passed to :meth:`write` + because log handlers will usually add one themselves. + """ + + # gevent avoids importing and using logging because importing it and + # creating loggers creates native locks unless monkey-patched. + + __slots__ = ('_logger', '_level') + + def __init__(self, logger, level=20): + """ + Write information to the *logger* at the given *level* (default to INFO). + """ + self._logger = logger + self._level = level + + def write(self, msg): + if msg and msg.endswith('\n'): + msg = msg[:-1] + self._logger.log(self._level, msg) + + def flush(self): + "No-op; required to be a file-like object" + pass + + def writelines(self, lines): + for line in lines: + self.write(line) + + def __getattr__(self, name): + return getattr(self._logger, name) + + def __setattr__(self, name, value): + if name not in LoggingLogAdapter.__slots__: + setattr(self._logger, name, value) + else: + object.__setattr__(self, name, value) + + def __delattr__(self, name): + delattr(self._logger, name) + +#### +## Environ classes. +# These subclass dict. They could subclass collections.UserDict on +# 3.3+ and proxy to the underlying real dict to avoid a copy if we +# have to print them (on 2.7 it's slightly more complicated to be an +# instance of collections.MutableMapping; UserDict.UserDict isn't.) +# Then we could have either the WSGIHandler.get_environ or the +# WSGIServer.get_environ return one of these proxies, and +# WSGIHandler.run_application would know to access the `environ.data` +# attribute to be able to pass the *real* dict to the application +# (because PEP3333 requires no subclasses, only actual dict objects; +# wsgiref.validator and webob.Request both enforce this). This has the +# advantage of not being fragile if anybody else tries to print/log +# self.environ (and not requiring a copy). However, if there are any +# subclasses of Handler or Server, this could break if they don't know +# to return this type. +#### + +class Environ(dict): + """ + A base class that can be used for WSGI environment objects. + + Provisional API. + + .. versionadded:: 1.2a1 + """ + + __slots__ = () # add no ivars or weakref ability + + def copy(self): + return self.__class__(self) + + if not hasattr(dict, 'iteritems'): + # Python 3 + def iteritems(self): + return self.items() + + def __reduce_ex__(self, proto): + return (dict, (), None, None, iter(self.iteritems())) + +class SecureEnviron(Environ): + """ + An environment that does not print its keys and values + by default. + + Provisional API. + + This is intended to keep potentially sensitive information like + HTTP authorization and cookies from being inadvertently printed + or logged. + + For debugging, each instance can have its *secure_repr* attribute + set to ``False``, which will cause it to print like a normal dict. + + When *secure_repr* is ``True`` (the default), then the value of + the *whitelist_keys* attribute is consulted; if this value is + true-ish, it should be a container (something that responds to + ``in``) of key names (typically a list or set). Keys and values in + this dictionary that are in *whitelist_keys* will then be printed, + while all other values will be masked. These values may be + customized on the class by setting the *default_secure_repr* and + *default_whitelist_keys*, respectively:: + + >>> environ = SecureEnviron(key='value') + >>> environ # doctest: +ELLIPSIS + <pywsgi.SecureEnviron dict (keys: 1) at ... + + If we whitelist the key, it gets printed:: + + >>> environ.whitelist_keys = {'key'} + >>> environ + {'key': 'value'} + + A non-whitelisted key (*only*, to avoid doctest issues) is masked:: + + >>> environ['secure'] = 'secret'; del environ['key'] + >>> environ + {'secure': '<MASKED>'} + + We can turn it off entirely for the instance:: + + >>> environ.secure_repr = False + >>> environ + {'secure': 'secret'} + + We can also customize it at the class level (here we use a new + class to be explicit and to avoid polluting the true default + values; we would set this class to be the ``environ_class`` of the + server):: + + >>> class MyEnviron(SecureEnviron): + ... default_whitelist_keys = ('key',) + ... + >>> environ = MyEnviron({'key': 'value'}) + >>> environ + {'key': 'value'} + + .. versionadded:: 1.2a1 + """ + + default_secure_repr = True + default_whitelist_keys = () + default_print_masked_keys = True + + # Allow instances to override the class values, + # but inherit from the class if not present. Keeps instances + # small since we can't combine __slots__ with class attributes + # of the same name. + __slots__ = ('secure_repr', 'whitelist_keys', 'print_masked_keys') + + def __getattr__(self, name): + if name in SecureEnviron.__slots__: + return getattr(type(self), 'default_' + name) + raise AttributeError(name) + + def __repr__(self): + if self.secure_repr: + whitelist = self.whitelist_keys + print_masked = self.print_masked_keys + if whitelist: + safe = {k: self[k] if k in whitelist else "<MASKED>" + for k in self + if k in whitelist or print_masked} + safe_repr = repr(safe) + if not print_masked and len(safe) != len(self): + safe_repr = safe_repr[:-1] + ", (hidden keys: %d)}" % (len(self) - len(safe)) + return safe_repr + return "<pywsgi.SecureEnviron dict (keys: %d) at %s>" % (len(self), id(self)) + return Environ.__repr__(self) + __str__ = __repr__ + + +class WSGISecureEnviron(SecureEnviron): + """ + Specializes the default list of whitelisted keys to a few + common WSGI variables. + + Example:: + + >>> environ = WSGISecureEnviron(REMOTE_ADDR='::1', HTTP_AUTHORIZATION='secret') + >>> environ + {'REMOTE_ADDR': '::1', (hidden keys: 1)} + >>> import pprint + >>> pprint.pprint(environ) + {'REMOTE_ADDR': '::1', (hidden keys: 1)} + >>> print(pprint.pformat(environ)) + {'REMOTE_ADDR': '::1', (hidden keys: 1)} + """ + default_whitelist_keys = ('REMOTE_ADDR', 'REMOTE_PORT', 'HTTP_HOST') + default_print_masked_keys = False + + +class WSGIServer(StreamServer): + """ + A WSGI server based on :class:`StreamServer` that supports HTTPS. + + + :keyword log: If given, an object with a ``write`` method to which + request (access) logs will be written. If not given, defaults + to :obj:`sys.stderr`. You may pass ``None`` to disable request + logging. You may use a wrapper, around e.g., :mod:`logging`, + to support objects that don't implement a ``write`` method. + (If you pass a :class:`~logging.Logger` instance, or in + general something that provides a ``log`` method but not a + ``write`` method, such a wrapper will automatically be created + and it will be logged to at the :data:`~logging.INFO` level.) + + :keyword error_log: If given, a file-like object with ``write``, + ``writelines`` and ``flush`` methods to which error logs will + be written. If not given, defaults to :obj:`sys.stderr`. You + may pass ``None`` to disable error logging (not recommended). + You may use a wrapper, around e.g., :mod:`logging`, to support + objects that don't implement the proper methods. This + parameter will become the value for ``wsgi.errors`` in the + WSGI environment (if not already set). (As with *log*, + wrappers for :class:`~logging.Logger` instances and the like + will be created automatically and logged to at the :data:`~logging.ERROR` + level.) + + .. seealso:: + + :class:`LoggingLogAdapter` + See important warnings before attempting to use :mod:`logging`. + + .. versionchanged:: 1.1a3 + Added the ``error_log`` parameter, and set ``wsgi.errors`` in the WSGI + environment to this value. + .. versionchanged:: 1.1a3 + Add support for passing :class:`logging.Logger` objects to the ``log`` and + ``error_log`` arguments. + """ + + #: A callable taking three arguments: (socket, address, server) and returning + #: an object with a ``handle()`` method. The callable is called once for + #: each incoming socket request, as is its handle method. The handle method should not + #: return until all use of the socket is complete. + #: + #: This class uses the :class:`WSGIHandler` object as the default value. You may + #: subclass this class and set a different default value, or you may pass + #: a value to use in the ``handler_class`` keyword constructor argument. + handler_class = WSGIHandler + + #: The object to which request logs will be written. + #: It must never be None. Initialized from the ``log`` constructor + #: parameter. + log = None + + #: The object to which error logs will be written. + #: It must never be None. Initialized from the ``error_log`` constructor + #: parameter. + error_log = None + + #: The class of environ objects passed to the handlers. + #: Must be a dict subclass. For compliance with :pep:`3333` + #: and libraries like WebOb, this is simply :class:`dict` + #: but this can be customized in a subclass or per-instance + #: (probably to :class:`WSGISecureEnviron`). + #: + #: .. versionadded:: 1.2a1 + environ_class = dict + + # Undocumented internal detail: the class that WSGIHandler._log_error + # will cast to before passing to the loop. + secure_environ_class = WSGISecureEnviron + + base_env = {'GATEWAY_INTERFACE': 'CGI/1.1', + 'SERVER_SOFTWARE': 'gevent/%d.%d Python/%d.%d' % (gevent.version_info[:2] + sys.version_info[:2]), + 'SCRIPT_NAME': '', + 'wsgi.version': (1, 0), + 'wsgi.multithread': False, # XXX: Aren't we really, though? + 'wsgi.multiprocess': False, + 'wsgi.run_once': False} + + def __init__(self, listener, application=None, backlog=None, spawn='default', + log='default', error_log='default', + handler_class=None, + environ=None, **ssl_args): + StreamServer.__init__(self, listener, backlog=backlog, spawn=spawn, **ssl_args) + if application is not None: + self.application = application + if handler_class is not None: + self.handler_class = handler_class + + # Note that we can't initialize these as class variables: + # sys.stderr might get monkey patched at runtime. + def _make_log(l, level=20): + if l == 'default': + return sys.stderr + if l is None: + return _NoopLog() + if not hasattr(l, 'write') and hasattr(l, 'log'): + return LoggingLogAdapter(l, level) + return l + self.log = _make_log(log) + self.error_log = _make_log(error_log, 40) # logging.ERROR + + self.set_environ(environ) + self.set_max_accept() + + def set_environ(self, environ=None): + if environ is not None: + self.environ = environ + environ_update = getattr(self, 'environ', None) + + self.environ = self.environ_class(self.base_env) + if self.ssl_enabled: + self.environ['wsgi.url_scheme'] = 'https' + else: + self.environ['wsgi.url_scheme'] = 'http' + if environ_update is not None: + self.environ.update(environ_update) + if self.environ.get('wsgi.errors') is None: + self.environ['wsgi.errors'] = self.error_log + + def set_max_accept(self): + if self.environ.get('wsgi.multiprocess'): + self.max_accept = 1 + + def get_environ(self): + return self.environ_class(self.environ) + + def init_socket(self): + StreamServer.init_socket(self) + self.update_environ() + + def update_environ(self): + """ + Called before the first request is handled to fill in WSGI environment values. + + This includes getting the correct server name and port. + """ + address = self.address + if isinstance(address, tuple): + if 'SERVER_NAME' not in self.environ: + try: + name = socket.getfqdn(address[0]) + except socket.error: + name = str(address[0]) + if PY3 and not isinstance(name, str): + name = name.decode('ascii') + self.environ['SERVER_NAME'] = name + self.environ.setdefault('SERVER_PORT', str(address[1])) + else: + self.environ.setdefault('SERVER_NAME', '') + self.environ.setdefault('SERVER_PORT', '') + + def handle(self, sock, address): + """ + Create an instance of :attr:`handler_class` to handle the request. + + This method blocks until the handler returns. + """ + # pylint:disable=method-hidden + handler = self.handler_class(sock, address, self) + handler.handle() + +def _main(): + # Provisional main handler, for quick tests, not production + # usage. + from gevent import monkey; monkey.patch_all() + + import argparse + import importlib + + parser = argparse.ArgumentParser() + parser.add_argument("app", help="dotted name of WSGI app callable [module:callable]") + parser.add_argument("-b", "--bind", + help="The socket to bind", + default=":8080") + + args = parser.parse_args() + + module_name, app_name = args.app.split(':') + module = importlib.import_module(module_name) + app = getattr(module, app_name) + bind = args.bind + + server = WSGIServer(bind, app) + server.serve_forever() + +if __name__ == '__main__': + _main() |