From 3d57e14df7ba5f14a634295caf3b2e60da50bfe2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 6 Sep 2019 16:31:13 -0700 Subject: Remove windows python distribution from repo and add requirements.txt --- python/werkzeug/formparser.py | 586 ------------------------------------------ 1 file changed, 586 deletions(-) delete mode 100644 python/werkzeug/formparser.py (limited to 'python/werkzeug/formparser.py') diff --git a/python/werkzeug/formparser.py b/python/werkzeug/formparser.py deleted file mode 100644 index 0ddc5c8..0000000 --- a/python/werkzeug/formparser.py +++ /dev/null @@ -1,586 +0,0 @@ -# -*- coding: utf-8 -*- -""" - werkzeug.formparser - ~~~~~~~~~~~~~~~~~~~ - - This module implements the form parsing. It supports url-encoded forms - as well as non-nested multipart uploads. - - :copyright: 2007 Pallets - :license: BSD-3-Clause -""" -import codecs -import re -from functools import update_wrapper -from itertools import chain -from itertools import repeat -from itertools import tee - -from ._compat import BytesIO -from ._compat import text_type -from ._compat import to_native -from .datastructures import FileStorage -from .datastructures import Headers -from .datastructures import MultiDict -from .http import parse_options_header -from .urls import url_decode_stream -from .wsgi import get_content_length -from .wsgi import get_input_stream -from .wsgi import make_line_iter - -# there are some platforms where SpooledTemporaryFile is not available. -# In that case we need to provide a fallback. -try: - from tempfile import SpooledTemporaryFile -except ImportError: - from tempfile import TemporaryFile - - SpooledTemporaryFile = None - - -#: an iterator that yields empty strings -_empty_string_iter = repeat("") - -#: a regular expression for multipart boundaries -_multipart_boundary_re = re.compile("^[ -~]{0,200}[!-~]$") - -#: supported http encodings that are also available in python we support -#: for multipart messages. -_supported_multipart_encodings = frozenset(["base64", "quoted-printable"]) - - -def default_stream_factory( - total_content_length, filename, content_type, content_length=None -): - """The stream factory that is used per default.""" - max_size = 1024 * 500 - if SpooledTemporaryFile is not None: - return SpooledTemporaryFile(max_size=max_size, mode="wb+") - if total_content_length is None or total_content_length > max_size: - return TemporaryFile("wb+") - return BytesIO() - - -def parse_form_data( - environ, - stream_factory=None, - charset="utf-8", - errors="replace", - max_form_memory_size=None, - max_content_length=None, - cls=None, - silent=True, -): - """Parse the form data in the environ and return it as tuple in the form - ``(stream, form, files)``. You should only call this method if the - transport method is `POST`, `PUT`, or `PATCH`. - - If the mimetype of the data transmitted is `multipart/form-data` the - files multidict will be filled with `FileStorage` objects. If the - mimetype is unknown the input stream is wrapped and returned as first - argument, else the stream is empty. - - This is a shortcut for the common usage of :class:`FormDataParser`. - - Have a look at :ref:`dealing-with-request-data` for more details. - - .. versionadded:: 0.5 - The `max_form_memory_size`, `max_content_length` and - `cls` parameters were added. - - .. versionadded:: 0.5.1 - The optional `silent` flag was added. - - :param environ: the WSGI environment to be used for parsing. - :param stream_factory: An optional callable that returns a new read and - writeable file descriptor. This callable works - the same as :meth:`~BaseResponse._get_file_stream`. - :param charset: The character set for URL and url encoded form data. - :param errors: The encoding error behavior. - :param max_form_memory_size: the maximum number of bytes to be accepted for - in-memory stored form data. If the data - exceeds the value specified an - :exc:`~exceptions.RequestEntityTooLarge` - exception is raised. - :param max_content_length: If this is provided and the transmitted data - is longer than this value an - :exc:`~exceptions.RequestEntityTooLarge` - exception is raised. - :param cls: an optional dict class to use. If this is not specified - or `None` the default :class:`MultiDict` is used. - :param silent: If set to False parsing errors will not be caught. - :return: A tuple in the form ``(stream, form, files)``. - """ - return FormDataParser( - stream_factory, - charset, - errors, - max_form_memory_size, - max_content_length, - cls, - silent, - ).parse_from_environ(environ) - - -def exhaust_stream(f): - """Helper decorator for methods that exhausts the stream on return.""" - - def wrapper(self, stream, *args, **kwargs): - try: - return f(self, stream, *args, **kwargs) - finally: - exhaust = getattr(stream, "exhaust", None) - if exhaust is not None: - exhaust() - else: - while 1: - chunk = stream.read(1024 * 64) - if not chunk: - break - - return update_wrapper(wrapper, f) - - -class FormDataParser(object): - """This class implements parsing of form data for Werkzeug. By itself - it can parse multipart and url encoded form data. It can be subclassed - and extended but for most mimetypes it is a better idea to use the - untouched stream and expose it as separate attributes on a request - object. - - .. versionadded:: 0.8 - - :param stream_factory: An optional callable that returns a new read and - writeable file descriptor. This callable works - the same as :meth:`~BaseResponse._get_file_stream`. - :param charset: The character set for URL and url encoded form data. - :param errors: The encoding error behavior. - :param max_form_memory_size: the maximum number of bytes to be accepted for - in-memory stored form data. If the data - exceeds the value specified an - :exc:`~exceptions.RequestEntityTooLarge` - exception is raised. - :param max_content_length: If this is provided and the transmitted data - is longer than this value an - :exc:`~exceptions.RequestEntityTooLarge` - exception is raised. - :param cls: an optional dict class to use. If this is not specified - or `None` the default :class:`MultiDict` is used. - :param silent: If set to False parsing errors will not be caught. - """ - - def __init__( - self, - stream_factory=None, - charset="utf-8", - errors="replace", - max_form_memory_size=None, - max_content_length=None, - cls=None, - silent=True, - ): - if stream_factory is None: - stream_factory = default_stream_factory - self.stream_factory = stream_factory - self.charset = charset - self.errors = errors - self.max_form_memory_size = max_form_memory_size - self.max_content_length = max_content_length - if cls is None: - cls = MultiDict - self.cls = cls - self.silent = silent - - def get_parse_func(self, mimetype, options): - return self.parse_functions.get(mimetype) - - def parse_from_environ(self, environ): - """Parses the information from the environment as form data. - - :param environ: the WSGI environment to be used for parsing. - :return: A tuple in the form ``(stream, form, files)``. - """ - content_type = environ.get("CONTENT_TYPE", "") - content_length = get_content_length(environ) - mimetype, options = parse_options_header(content_type) - return self.parse(get_input_stream(environ), mimetype, content_length, options) - - def parse(self, stream, mimetype, content_length, options=None): - """Parses the information from the given stream, mimetype, - content length and mimetype parameters. - - :param stream: an input stream - :param mimetype: the mimetype of the data - :param content_length: the content length of the incoming data - :param options: optional mimetype parameters (used for - the multipart boundary for instance) - :return: A tuple in the form ``(stream, form, files)``. - """ - if ( - self.max_content_length is not None - and content_length is not None - and content_length > self.max_content_length - ): - raise exceptions.RequestEntityTooLarge() - if options is None: - options = {} - - parse_func = self.get_parse_func(mimetype, options) - if parse_func is not None: - try: - return parse_func(self, stream, mimetype, content_length, options) - except ValueError: - if not self.silent: - raise - - return stream, self.cls(), self.cls() - - @exhaust_stream - def _parse_multipart(self, stream, mimetype, content_length, options): - parser = MultiPartParser( - self.stream_factory, - self.charset, - self.errors, - max_form_memory_size=self.max_form_memory_size, - cls=self.cls, - ) - boundary = options.get("boundary") - if boundary is None: - raise ValueError("Missing boundary") - if isinstance(boundary, text_type): - boundary = boundary.encode("ascii") - form, files = parser.parse(stream, boundary, content_length) - return stream, form, files - - @exhaust_stream - def _parse_urlencoded(self, stream, mimetype, content_length, options): - if ( - self.max_form_memory_size is not None - and content_length is not None - and content_length > self.max_form_memory_size - ): - raise exceptions.RequestEntityTooLarge() - form = url_decode_stream(stream, self.charset, errors=self.errors, cls=self.cls) - return stream, form, self.cls() - - #: mapping of mimetypes to parsing functions - parse_functions = { - "multipart/form-data": _parse_multipart, - "application/x-www-form-urlencoded": _parse_urlencoded, - "application/x-url-encoded": _parse_urlencoded, - } - - -def is_valid_multipart_boundary(boundary): - """Checks if the string given is a valid multipart boundary.""" - return _multipart_boundary_re.match(boundary) is not None - - -def _line_parse(line): - """Removes line ending characters and returns a tuple (`stripped_line`, - `is_terminated`). - """ - if line[-2:] in ["\r\n", b"\r\n"]: - return line[:-2], True - elif line[-1:] in ["\r", "\n", b"\r", b"\n"]: - return line[:-1], True - return line, False - - -def parse_multipart_headers(iterable): - """Parses multipart headers from an iterable that yields lines (including - the trailing newline symbol). The iterable has to be newline terminated. - - The iterable will stop at the line where the headers ended so it can be - further consumed. - - :param iterable: iterable of strings that are newline terminated - """ - result = [] - for line in iterable: - line = to_native(line) - line, line_terminated = _line_parse(line) - if not line_terminated: - raise ValueError("unexpected end of line in multipart header") - if not line: - break - elif line[0] in " \t" and result: - key, value = result[-1] - result[-1] = (key, value + "\n " + line[1:]) - else: - parts = line.split(":", 1) - if len(parts) == 2: - result.append((parts[0].strip(), parts[1].strip())) - - # we link the list to the headers, no need to create a copy, the - # list was not shared anyways. - return Headers(result) - - -_begin_form = "begin_form" -_begin_file = "begin_file" -_cont = "cont" -_end = "end" - - -class MultiPartParser(object): - def __init__( - self, - stream_factory=None, - charset="utf-8", - errors="replace", - max_form_memory_size=None, - cls=None, - buffer_size=64 * 1024, - ): - self.charset = charset - self.errors = errors - self.max_form_memory_size = max_form_memory_size - self.stream_factory = ( - default_stream_factory if stream_factory is None else stream_factory - ) - self.cls = MultiDict if cls is None else cls - - # make sure the buffer size is divisible by four so that we can base64 - # decode chunk by chunk - assert buffer_size % 4 == 0, "buffer size has to be divisible by 4" - # also the buffer size has to be at least 1024 bytes long or long headers - # will freak out the system - assert buffer_size >= 1024, "buffer size has to be at least 1KB" - - self.buffer_size = buffer_size - - def _fix_ie_filename(self, filename): - """Internet Explorer 6 transmits the full file name if a file is - uploaded. This function strips the full path if it thinks the - filename is Windows-like absolute. - """ - if filename[1:3] == ":\\" or filename[:2] == "\\\\": - return filename.split("\\")[-1] - return filename - - def _find_terminator(self, iterator): - """The terminator might have some additional newlines before it. - There is at least one application that sends additional newlines - before headers (the python setuptools package). - """ - for line in iterator: - if not line: - break - line = line.strip() - if line: - return line - return b"" - - def fail(self, message): - raise ValueError(message) - - def get_part_encoding(self, headers): - transfer_encoding = headers.get("content-transfer-encoding") - if ( - transfer_encoding is not None - and transfer_encoding in _supported_multipart_encodings - ): - return transfer_encoding - - def get_part_charset(self, headers): - # Figure out input charset for current part - content_type = headers.get("content-type") - if content_type: - mimetype, ct_params = parse_options_header(content_type) - return ct_params.get("charset", self.charset) - return self.charset - - def start_file_streaming(self, filename, headers, total_content_length): - if isinstance(filename, bytes): - filename = filename.decode(self.charset, self.errors) - filename = self._fix_ie_filename(filename) - content_type = headers.get("content-type") - try: - content_length = int(headers["content-length"]) - except (KeyError, ValueError): - content_length = 0 - container = self.stream_factory( - total_content_length=total_content_length, - filename=filename, - content_type=content_type, - content_length=content_length, - ) - return filename, container - - def in_memory_threshold_reached(self, bytes): - raise exceptions.RequestEntityTooLarge() - - def validate_boundary(self, boundary): - if not boundary: - self.fail("Missing boundary") - if not is_valid_multipart_boundary(boundary): - self.fail("Invalid boundary: %s" % boundary) - if len(boundary) > self.buffer_size: # pragma: no cover - # this should never happen because we check for a minimum size - # of 1024 and boundaries may not be longer than 200. The only - # situation when this happens is for non debug builds where - # the assert is skipped. - self.fail("Boundary longer than buffer size") - - def parse_lines(self, file, boundary, content_length, cap_at_buffer=True): - """Generate parts of - ``('begin_form', (headers, name))`` - ``('begin_file', (headers, name, filename))`` - ``('cont', bytestring)`` - ``('end', None)`` - - Always obeys the grammar - parts = ( begin_form cont* end | - begin_file cont* end )* - """ - next_part = b"--" + boundary - last_part = next_part + b"--" - - iterator = chain( - make_line_iter( - file, - limit=content_length, - buffer_size=self.buffer_size, - cap_at_buffer=cap_at_buffer, - ), - _empty_string_iter, - ) - - terminator = self._find_terminator(iterator) - - if terminator == last_part: - return - elif terminator != next_part: - self.fail("Expected boundary at start of multipart data") - - while terminator != last_part: - headers = parse_multipart_headers(iterator) - - disposition = headers.get("content-disposition") - if disposition is None: - self.fail("Missing Content-Disposition header") - disposition, extra = parse_options_header(disposition) - transfer_encoding = self.get_part_encoding(headers) - name = extra.get("name") - filename = extra.get("filename") - - # if no content type is given we stream into memory. A list is - # used as a temporary container. - if filename is None: - yield _begin_form, (headers, name) - - # otherwise we parse the rest of the headers and ask the stream - # factory for something we can write in. - else: - yield _begin_file, (headers, name, filename) - - buf = b"" - for line in iterator: - if not line: - self.fail("unexpected end of stream") - - if line[:2] == b"--": - terminator = line.rstrip() - if terminator in (next_part, last_part): - break - - if transfer_encoding is not None: - if transfer_encoding == "base64": - transfer_encoding = "base64_codec" - try: - line = codecs.decode(line, transfer_encoding) - except Exception: - self.fail("could not decode transfer encoded chunk") - - # we have something in the buffer from the last iteration. - # this is usually a newline delimiter. - if buf: - yield _cont, buf - buf = b"" - - # If the line ends with windows CRLF we write everything except - # the last two bytes. In all other cases however we write - # everything except the last byte. If it was a newline, that's - # fine, otherwise it does not matter because we will write it - # the next iteration. this ensures we do not write the - # final newline into the stream. That way we do not have to - # truncate the stream. However we do have to make sure that - # if something else than a newline is in there we write it - # out. - if line[-2:] == b"\r\n": - buf = b"\r\n" - cutoff = -2 - else: - buf = line[-1:] - cutoff = -1 - yield _cont, line[:cutoff] - - else: # pragma: no cover - raise ValueError("unexpected end of part") - - # if we have a leftover in the buffer that is not a newline - # character we have to flush it, otherwise we will chop of - # certain values. - if buf not in (b"", b"\r", b"\n", b"\r\n"): - yield _cont, buf - - yield _end, None - - def parse_parts(self, file, boundary, content_length): - """Generate ``('file', (name, val))`` and - ``('form', (name, val))`` parts. - """ - in_memory = 0 - - for ellt, ell in self.parse_lines(file, boundary, content_length): - if ellt == _begin_file: - headers, name, filename = ell - is_file = True - guard_memory = False - filename, container = self.start_file_streaming( - filename, headers, content_length - ) - _write = container.write - - elif ellt == _begin_form: - headers, name = ell - is_file = False - container = [] - _write = container.append - guard_memory = self.max_form_memory_size is not None - - elif ellt == _cont: - _write(ell) - # if we write into memory and there is a memory size limit we - # count the number of bytes in memory and raise an exception if - # there is too much data in memory. - if guard_memory: - in_memory += len(ell) - if in_memory > self.max_form_memory_size: - self.in_memory_threshold_reached(in_memory) - - elif ellt == _end: - if is_file: - container.seek(0) - yield ( - "file", - (name, FileStorage(container, filename, name, headers=headers)), - ) - else: - part_charset = self.get_part_charset(headers) - yield ( - "form", - (name, b"".join(container).decode(part_charset, self.errors)), - ) - - def parse(self, file, boundary, content_length): - formstream, filestream = tee( - self.parse_parts(file, boundary, content_length), 2 - ) - form = (p[1] for p in formstream if p[0] == "form") - files = (p[1] for p in filestream if p[0] == "file") - return self.cls(form), self.cls(files) - - -from . import exceptions -- cgit v1.2.3